| /****************************************************************************** |
| * |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ***************************************************************************** |
| * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| */ |
| /** |
| ******************************************************************************* |
| * @file |
| * ihevce_sao.c |
| * |
| * @brief |
| * Contains definition for the ctb level sao function |
| * |
| * @author |
| * Ittiam |
| * |
| * @par List of Functions: |
| * ihevce_sao_set_avilability() |
| * ihevce_sao_ctb() |
| * ihevce_sao_analyse() |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| |
| /*****************************************************************************/ |
| /* File Includes */ |
| /*****************************************************************************/ |
| /* System include files */ |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <stdarg.h> |
| #include <math.h> |
| |
| /* User include files */ |
| #include "ihevc_typedefs.h" |
| #include "itt_video_api.h" |
| #include "ihevce_api.h" |
| |
| #include "rc_cntrl_param.h" |
| #include "rc_frame_info_collector.h" |
| #include "rc_look_ahead_params.h" |
| |
| #include "ihevc_defs.h" |
| #include "ihevc_structs.h" |
| #include "ihevc_platform_macros.h" |
| #include "ihevc_deblk.h" |
| #include "ihevc_itrans_recon.h" |
| #include "ihevc_chroma_itrans_recon.h" |
| #include "ihevc_chroma_intra_pred.h" |
| #include "ihevc_intra_pred.h" |
| #include "ihevc_inter_pred.h" |
| #include "ihevc_mem_fns.h" |
| #include "ihevc_padding.h" |
| #include "ihevc_weighted_pred.h" |
| #include "ihevc_sao.h" |
| #include "ihevc_resi_trans.h" |
| #include "ihevc_quant_iquant_ssd.h" |
| #include "ihevc_cabac_tables.h" |
| |
| #include "ihevce_defs.h" |
| #include "ihevce_lap_enc_structs.h" |
| #include "ihevce_multi_thrd_structs.h" |
| #include "ihevce_me_common_defs.h" |
| #include "ihevce_had_satd.h" |
| #include "ihevce_error_codes.h" |
| #include "ihevce_bitstream.h" |
| #include "ihevce_cabac.h" |
| #include "ihevce_rdoq_macros.h" |
| #include "ihevce_function_selector.h" |
| #include "ihevce_enc_structs.h" |
| #include "ihevce_entropy_structs.h" |
| #include "ihevce_cmn_utils_instr_set_router.h" |
| #include "ihevce_enc_loop_structs.h" |
| #include "ihevce_cabac_rdo.h" |
| #include "ihevce_sao.h" |
| |
| /*****************************************************************************/ |
| /* Function Definitions */ |
| /*****************************************************************************/ |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * ihevce_sao_set_avilability |
| * |
| * @par Description: |
| * Sets the availability flag for SAO. |
| * |
| * @param[in] |
| * ps_sao_ctxt: Pointer to SAO context |
| * @returns |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| void ihevce_sao_set_avilability( |
| UWORD8 *pu1_avail, sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params) |
| { |
| WORD32 i; |
| |
| WORD32 ctb_x_pos = ps_sao_ctxt->i4_ctb_x; |
| WORD32 ctb_y_pos = ps_sao_ctxt->i4_ctb_y; |
| |
| for(i = 0; i < 8; i++) |
| { |
| pu1_avail[i] = 255; |
| } |
| |
| /* SAO_note_01: If the CTB lies on a tile or a slice boundary and |
| in-loop filtering is enabled at tile and slice boundary, then SAO must |
| be performed at tile/slice boundaries also. |
| Hence the boundary checks should be based on frame position of CTB |
| rather than s_ctb_nbr_avail_flags.u1_left_avail flags. |
| Search for <SAO_note_01> in workspace to know more */ |
| /* Availaibility flags for first col*/ |
| if(ctb_x_pos == ps_tile_params->i4_first_ctb_x) |
| { |
| pu1_avail[0] = 0; |
| pu1_avail[4] = 0; |
| pu1_avail[6] = 0; |
| } |
| |
| /* Availaibility flags for last col*/ |
| if((ctb_x_pos + 1) == |
| (ps_tile_params->i4_first_ctb_x + ps_tile_params->i4_curr_tile_wd_in_ctb_unit)) |
| { |
| pu1_avail[1] = 0; |
| pu1_avail[5] = 0; |
| pu1_avail[7] = 0; |
| } |
| |
| /* Availaibility flags for first row*/ |
| if(ctb_y_pos == ps_tile_params->i4_first_ctb_y) |
| { |
| pu1_avail[2] = 0; |
| pu1_avail[4] = 0; |
| pu1_avail[5] = 0; |
| } |
| |
| /* Availaibility flags for last row*/ |
| if((ctb_y_pos + 1) == |
| (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit)) |
| { |
| pu1_avail[3] = 0; |
| pu1_avail[6] = 0; |
| pu1_avail[7] = 0; |
| } |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * Sao CTB level function. |
| * |
| * @par Description: |
| * For a given CTB, sao is done. Both the luma and chroma |
| * blocks are processed |
| * |
| * @param[in] |
| * ps_sao_ctxt: Pointer to SAO context |
| * |
| * @returns |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| void ihevce_sao_ctb(sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params) |
| { |
| sao_enc_t *ps_sao; |
| UWORD8 u1_src_top_left_luma, u1_src_top_left_chroma[2]; |
| UWORD8 *pu1_src_left_luma_buf, *pu1_src_top_luma_buf; |
| UWORD8 *pu1_src_left_chroma_buf, *pu1_src_top_chroma_buf; |
| UWORD8 *pu1_src_luma, *pu1_src_chroma; |
| WORD32 luma_src_stride, ctb_size; |
| WORD32 chroma_src_stride; |
| UWORD8 au1_avail_luma[8], au1_avail_chroma[8]; |
| WORD32 sao_blk_wd, sao_blk_ht, sao_wd_chroma, sao_ht_chroma; |
| UWORD8 *pu1_top_left_luma, *pu1_top_left_chroma; |
| UWORD8 *pu1_src_bot_left_luma, *pu1_src_top_right_luma; |
| UWORD8 *pu1_src_bot_left_chroma, *pu1_src_top_right_chroma; |
| UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2); |
| |
| ps_sao = ps_sao_ctxt->ps_sao; |
| |
| ASSERT( |
| (abs(ps_sao->u1_y_offset[1]) <= 7) && (abs(ps_sao->u1_y_offset[2]) <= 7) && |
| (abs(ps_sao->u1_y_offset[3]) <= 7) && (abs(ps_sao->u1_y_offset[4]) <= 7)); |
| ASSERT( |
| (abs(ps_sao->u1_cb_offset[1]) <= 7) && (abs(ps_sao->u1_cb_offset[2]) <= 7) && |
| (abs(ps_sao->u1_cb_offset[3]) <= 7) && (abs(ps_sao->u1_cb_offset[4]) <= 7)); |
| ASSERT( |
| (abs(ps_sao->u1_cr_offset[1]) <= 7) && (abs(ps_sao->u1_cr_offset[2]) <= 7) && |
| (abs(ps_sao->u1_cr_offset[3]) <= 7) && (abs(ps_sao->u1_cr_offset[4]) <= 7)); |
| ASSERT( |
| (ps_sao->b5_y_band_pos <= 28) && (ps_sao->b5_cb_band_pos <= 28) && |
| (ps_sao->b5_cr_band_pos <= 28)); |
| |
| if(ps_sao_ctxt->i1_slice_sao_luma_flag) |
| { |
| /*initialize the src pointer to current row*/ |
| luma_src_stride = ps_sao_ctxt->i4_cur_luma_recon_stride; |
| |
| ctb_size = ps_sao_ctxt->i4_ctb_size; |
| |
| /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/ |
| ps_sao->u1_y_offset[0] = 0; /* 0th element is not being used */ |
| sao_blk_wd = ps_sao_ctxt->i4_sao_blk_wd; |
| sao_blk_ht = ps_sao_ctxt->i4_sao_blk_ht; |
| |
| pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf; |
| /* Pointer to the top luma buffer corresponding to the current ctb row*/ |
| pu1_src_top_luma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_luma; |
| |
| /* Pointer to left luma buffer corresponding to the current ctb row*/ |
| pu1_src_left_luma_buf = ps_sao_ctxt->au1_left_luma_scratch; |
| |
| /* Pointer to the top right luma buffer corresponding to the current ctb row*/ |
| pu1_src_top_right_luma = pu1_src_top_luma_buf /*- top_buf_stide*/ + sao_blk_wd; |
| |
| /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/ |
| pu1_src_bot_left_luma = |
| ps_sao_ctxt->pu1_frm_luma_recon_buf + ctb_size * ps_sao_ctxt->i4_frm_luma_recon_stride - |
| 1 + (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + |
| (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/ |
| |
| /* Back up the top left pixel for (x+1, y+1)th ctb*/ |
| u1_src_top_left_luma = *(pu1_src_top_luma_buf + sao_blk_wd - 1); |
| pu1_top_left_luma = pu1_src_top_luma_buf - 1; |
| |
| if(SAO_BAND == ps_sao->b3_y_type_idx) |
| { |
| ihevc_sao_band_offset_luma( |
| pu1_src_luma, |
| luma_src_stride, |
| pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */ |
| pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */ |
| pu1_src_top_luma_buf - 1, /* Top left*/ |
| ps_sao->b5_y_band_pos, |
| ps_sao->u1_y_offset, |
| sao_blk_wd, |
| sao_blk_ht); |
| |
| if((ps_sao_ctxt->i4_ctb_y > 0)) |
| { |
| *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma; |
| } |
| } |
| else if(ps_sao->b3_y_type_idx >= SAO_EDGE_0_DEG) |
| { |
| /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets |
| * corresponding to EO category 1 and 2 which should be always positive |
| * And 3rd and 4th offsets are always inferred as offsets corresponding to |
| * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx) |
| */ |
| // clang-format off |
| ASSERT((ps_sao->u1_y_offset[1] >= 0) && (ps_sao->u1_y_offset[2] >= 0)); |
| ASSERT((ps_sao->u1_y_offset[3] <= 0) && (ps_sao->u1_y_offset[4] <= 0)); |
| // clang-format on |
| |
| ihevce_sao_set_avilability(au1_avail_luma, ps_sao_ctxt, ps_tile_params); |
| |
| ps_sao_ctxt->apf_sao_luma[ps_sao->b3_y_type_idx - 2]( |
| pu1_src_luma, |
| luma_src_stride, |
| pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */ |
| pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */ |
| pu1_top_left_luma, /* Top left*/ |
| pu1_src_top_right_luma, /* Top right*/ |
| pu1_src_bot_left_luma, /* Bottom left*/ |
| au1_avail_luma, |
| ps_sao->u1_y_offset, |
| sao_blk_wd, |
| sao_blk_ht); |
| |
| if((ps_sao_ctxt->i4_ctb_y > 0)) |
| { |
| *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma; |
| } |
| } |
| } |
| |
| if(ps_sao_ctxt->i1_slice_sao_chroma_flag) |
| { |
| /*initialize the src pointer to current row*/ |
| chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride; |
| ctb_size = ps_sao_ctxt->i4_ctb_size; |
| |
| /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/ |
| //top_buf_stide = ps_sao_ctxt->u4_ctb_aligned_wd + 2; |
| ps_sao->u1_cb_offset[0] = 0; /* 0th element is not used */ |
| ps_sao->u1_cr_offset[0] = 0; |
| sao_wd_chroma = ps_sao_ctxt->i4_sao_blk_wd; |
| sao_ht_chroma = ps_sao_ctxt->i4_sao_blk_ht / (!u1_is_422 + 1); |
| |
| pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf; |
| /* Pointer to the top luma buffer corresponding to the current ctb row*/ |
| pu1_src_top_chroma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_chroma; |
| // clang-format off |
| /* Pointer to left luma buffer corresponding to the current ctb row*/ |
| pu1_src_left_chroma_buf = ps_sao_ctxt->au1_left_chroma_scratch; //ps_sao_ctxt->au1_sao_src_left_chroma; |
| // clang-format on |
| /* Pointer to the top right chroma buffer corresponding to the current ctb row*/ |
| pu1_src_top_right_chroma = pu1_src_top_chroma_buf /*- top_buf_stide*/ + sao_wd_chroma; |
| |
| /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/ |
| pu1_src_bot_left_chroma = |
| ps_sao_ctxt->pu1_frm_chroma_recon_buf + |
| (ctb_size >> !u1_is_422) * ps_sao_ctxt->i4_frm_chroma_recon_stride - 2 + |
| (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y * |
| (ctb_size >> !u1_is_422)) + |
| (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/ |
| |
| /* Back up the top left pixel for (x+1, y+1)th ctb*/ |
| u1_src_top_left_chroma[0] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 2); |
| u1_src_top_left_chroma[1] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 1); |
| pu1_top_left_chroma = pu1_src_top_chroma_buf - 2; |
| |
| if(SAO_BAND == ps_sao->b3_cb_type_idx) |
| { |
| ihevc_sao_band_offset_chroma( |
| pu1_src_chroma, |
| chroma_src_stride, |
| pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */ |
| pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */ |
| pu1_top_left_chroma, /* Top left*/ |
| ps_sao->b5_cb_band_pos, |
| ps_sao->b5_cr_band_pos, |
| ps_sao->u1_cb_offset, |
| ps_sao->u1_cr_offset, |
| sao_wd_chroma, |
| sao_ht_chroma); |
| |
| if((ps_sao_ctxt->i4_ctb_y > 0)) |
| { |
| *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0]; |
| *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1]; |
| } |
| } |
| else if(ps_sao->b3_cb_type_idx >= SAO_EDGE_0_DEG) |
| { |
| /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets |
| * corresponding to EO category 1 and 2 which should be always positive |
| * And 3rd and 4th offsets are always inferred as offsets corresponding to |
| * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx) |
| */ |
| ASSERT((ps_sao->u1_cb_offset[1] >= 0) && (ps_sao->u1_cb_offset[2] >= 0)); |
| ASSERT((ps_sao->u1_cb_offset[3] <= 0) && (ps_sao->u1_cb_offset[4] <= 0)); |
| |
| ASSERT((ps_sao->u1_cr_offset[1] >= 0) && (ps_sao->u1_cr_offset[2] >= 0)); |
| ASSERT((ps_sao->u1_cr_offset[3] <= 0) && (ps_sao->u1_cr_offset[4] <= 0)); |
| |
| ihevce_sao_set_avilability(au1_avail_chroma, ps_sao_ctxt, ps_tile_params); |
| |
| ps_sao_ctxt->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2]( |
| pu1_src_chroma, |
| chroma_src_stride, |
| pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */ |
| pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */ |
| pu1_top_left_chroma, /* Top left*/ |
| pu1_src_top_right_chroma, /* Top right*/ |
| pu1_src_bot_left_chroma, /* Bottom left*/ |
| au1_avail_chroma, |
| ps_sao->u1_cb_offset, |
| ps_sao->u1_cr_offset, |
| sao_wd_chroma, |
| sao_ht_chroma); |
| |
| if((ps_sao_ctxt->i4_ctb_y > 0)) |
| { |
| *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0]; |
| *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1]; |
| } |
| } |
| } |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * CTB level function to do SAO analysis. |
| * |
| * @par Description: |
| * For a given CTB, sao analysis is done for both luma and chroma. |
| * |
| * |
| * @param[in] |
| * ps_sao_ctxt: Pointer to SAO context |
| * ps_ctb_enc_loop_out : pointer to ctb level output structure from enc loop |
| * |
| * @returns |
| * |
| * @remarks |
| * None |
| * |
| * @Assumptions: |
| * 1) Initial Cabac state for current ctb to be sao'ed (i.e (x-1,y-1)th ctb) is assumed to be |
| * almost same as cabac state of (x,y)th ctb. |
| * 2) Distortion is calculated in spatial domain but lamda used to calculate the cost is |
| * in freq domain. |
| ******************************************************************************* |
| */ |
| void ihevce_sao_analyse( |
| sao_ctxt_t *ps_sao_ctxt, |
| ctb_enc_loop_out_t *ps_ctb_enc_loop_out, |
| UWORD32 *pu4_frame_rdopt_header_bits, |
| ihevce_tile_params_t *ps_tile_params) |
| { |
| UWORD8 *pu1_luma_scratch_buf; |
| UWORD8 *pu1_chroma_scratch_buf; |
| UWORD8 *pu1_src_luma, *pu1_recon_luma; |
| UWORD8 *pu1_src_chroma, *pu1_recon_chroma; |
| WORD32 luma_src_stride, luma_recon_stride, ctb_size, ctb_wd, ctb_ht; |
| WORD32 chroma_src_stride, chroma_recon_stride; |
| WORD32 i4_luma_scratch_buf_stride; |
| WORD32 i4_chroma_scratch_buf_stride; |
| sao_ctxt_t s_sao_ctxt; |
| UWORD32 ctb_bits = 0, distortion = 0, curr_cost = 0, best_cost = 0; |
| LWORD64 i8_cl_ssd_lambda_qf, i8_cl_ssd_lambda_chroma_qf; |
| WORD32 rdo_cand, num_luma_rdo_cand = 0, num_rdo_cand = 0; |
| WORD32 curr_buf_idx, best_buf_idx, best_cand_idx; |
| WORD32 row; |
| WORD32 edgeidx; |
| WORD32 acc_error_category[5] = { 0, 0, 0, 0, 0 }, category_count[5] = { 0, 0, 0, 0, 0 }; |
| sao_enc_t s_best_luma_chroma_cand; |
| WORD32 best_ctb_sao_bits = 0; |
| #if DISABLE_SAO_WHEN_NOISY && !defined(ENC_VER_v2) |
| UWORD8 u1_force_no_offset = |
| ps_sao_ctxt |
| ->ps_ctb_data |
| [ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_data_stride * ps_sao_ctxt->i4_ctb_y] |
| .s_ctb_noise_params.i4_noise_present; |
| #endif |
| UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2); |
| |
| *pu4_frame_rdopt_header_bits = 0; |
| |
| ctb_size = ps_sao_ctxt->i4_ctb_size; |
| ctb_wd = ps_sao_ctxt->i4_sao_blk_wd; |
| ctb_ht = ps_sao_ctxt->i4_sao_blk_ht; |
| |
| s_sao_ctxt = ps_sao_ctxt[0]; |
| |
| /* Memset the best luma_chroma_cand structure to avoid asserts in debug mode*/ |
| memset(&s_best_luma_chroma_cand, 0, sizeof(sao_enc_t)); |
| |
| /* Initialize the pointer and strides for luma buffers*/ |
| pu1_recon_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf; |
| luma_recon_stride = ps_sao_ctxt->i4_cur_luma_recon_stride; |
| |
| pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_src_buf; |
| luma_src_stride = ps_sao_ctxt->i4_cur_luma_src_stride; |
| i4_luma_scratch_buf_stride = SCRATCH_BUF_STRIDE; |
| |
| /* Initialize the pointer and strides for luma buffers*/ |
| pu1_recon_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf; |
| chroma_recon_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride; |
| |
| pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_src_buf; |
| chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_src_stride; |
| i4_chroma_scratch_buf_stride = SCRATCH_BUF_STRIDE; |
| |
| i8_cl_ssd_lambda_qf = ps_sao_ctxt->i8_cl_ssd_lambda_qf; |
| i8_cl_ssd_lambda_chroma_qf = ps_sao_ctxt->i8_cl_ssd_lambda_chroma_qf; |
| |
| /*****************************************************/ |
| /********************RDO FOR LUMA CAND****************/ |
| /*****************************************************/ |
| |
| #if !DISABLE_SAO_WHEN_NOISY |
| if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) |
| #else |
| if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && !u1_force_no_offset) |
| #endif |
| { |
| /* Candidate for Edge offset SAO*/ |
| /* Following is the convention for curr pixel and |
| * two neighbouring pixels for 0 deg, 90 deg, 135 deg and 45 deg */ |
| /* |
| * 0 deg : a c b 90 deg: a 135 deg: a 45 deg: a |
| * c c c |
| * b b b |
| */ |
| |
| /* 0 deg SAO CAND*/ |
| /* Reset the error and edge count*/ |
| for(edgeidx = 0; edgeidx < 5; edgeidx++) |
| { |
| acc_error_category[edgeidx] = 0; |
| category_count[edgeidx] = 0; |
| } |
| |
| /* Call the funciton to populate the EO parameter for this ctb for 0 deg EO class*/ |
| // clang-format off |
| ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_0_DEG, |
| acc_error_category, category_count); |
| // clang-format on |
| // clang-format off |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_0_DEG; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0] |
| ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) |
| : 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1] |
| ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) |
| : 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3] |
| ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) |
| : 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] =category_count[4] |
| ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) |
| : 0; |
| // clang-format on |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0; |
| // clang-format off |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE; |
| // clang-format on |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0; |
| |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0; |
| |
| num_luma_rdo_cand++; |
| |
| /* 90 degree SAO CAND*/ |
| for(edgeidx = 0; edgeidx < 5; edgeidx++) |
| { |
| acc_error_category[edgeidx] = 0; |
| category_count[edgeidx] = 0; |
| } |
| |
| /* Call the funciton to populate the EO parameter for this ctb for 90 deg EO class*/ |
| // clang-format off |
| ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_90_DEG, |
| acc_error_category, category_count); |
| |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_90_DEG; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0] |
| ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) |
| : 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1] |
| ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) |
| : 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3] |
| ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) |
| : 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4] |
| ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) |
| : 0; |
| // clang-format on |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0; |
| |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0; |
| |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0; |
| |
| num_luma_rdo_cand++; |
| |
| /* 135 degree SAO CAND*/ |
| for(edgeidx = 0; edgeidx < 5; edgeidx++) |
| { |
| acc_error_category[edgeidx] = 0; |
| category_count[edgeidx] = 0; |
| } |
| |
| /* Call the funciton to populate the EO parameter for this ctb for 135 deg EO class*/ |
| // clang-format off |
| ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_135_DEG, |
| acc_error_category, category_count); |
| |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_135_DEG; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0] |
| ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) |
| : 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1] |
| ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) |
| : 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3] |
| ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) |
| : 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4] |
| ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) |
| : 0; |
| // clang-format on |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0; |
| |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0; |
| |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0; |
| |
| num_luma_rdo_cand++; |
| |
| /* 45 degree SAO CAND*/ |
| for(edgeidx = 0; edgeidx < 5; edgeidx++) |
| { |
| acc_error_category[edgeidx] = 0; |
| category_count[edgeidx] = 0; |
| } |
| |
| /* Call the funciton to populate the EO parameter for this ctb for 45 deg EO class*/ |
| // clang-format off |
| ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_45_DEG, |
| acc_error_category, category_count); |
| |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_45_DEG; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0] |
| ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) |
| : 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1] |
| ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) |
| : 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3] |
| ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) |
| : 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4] |
| ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) |
| : 0; |
| // clang-format on |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0; |
| |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0; |
| |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0; |
| |
| num_luma_rdo_cand++; |
| |
| /* First cand will be best cand after 1st iteration*/ |
| curr_buf_idx = 0; |
| best_buf_idx = 1; |
| best_cost = 0xFFFFFFFF; |
| best_cand_idx = 0; |
| |
| /*Back up the top pixels for (x,y+1)th ctb*/ |
| if(!ps_sao_ctxt->i4_is_last_ctb_row) |
| { |
| memcpy( |
| ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride, |
| pu1_recon_luma + luma_recon_stride * (ctb_size - 1), |
| ps_sao_ctxt->i4_sao_blk_wd); |
| } |
| |
| for(rdo_cand = 0; rdo_cand < num_luma_rdo_cand; rdo_cand++) |
| { |
| s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand]; |
| |
| /* This memcpy is required because cabac uses parameters from this structure |
| * to evaluate bits and this structure ptr is sent to cabac through |
| * "ihevce_cabac_rdo_encode_sao" function |
| */ |
| memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t)); |
| |
| /* Copy the left pixels to the scratch buffer for evry rdo cand because its |
| overwritten by the sao leaf level function for next ctb*/ |
| memcpy( |
| s_sao_ctxt.au1_left_luma_scratch, |
| ps_sao_ctxt->au1_sao_src_left_luma, |
| ps_sao_ctxt->i4_sao_blk_ht); |
| |
| /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its |
| overwritten by the sao leaf level function for next ctb*/ |
| memcpy( |
| s_sao_ctxt.au1_top_luma_scratch, |
| ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1, |
| ps_sao_ctxt->i4_sao_blk_wd + 2); |
| s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1; |
| |
| pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx]; |
| |
| ASSERT( |
| (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7)); |
| ASSERT( |
| (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7)); |
| ASSERT( |
| (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7)); |
| ASSERT( |
| (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) && |
| (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) && |
| (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28)); |
| |
| /* Copy the deblocked recon data to scratch buffer to do sao*/ |
| |
| ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( |
| pu1_luma_scratch_buf, |
| i4_luma_scratch_buf_stride, |
| pu1_recon_luma, |
| luma_recon_stride, |
| SCRATCH_BUF_STRIDE, |
| ctb_ht + 1); |
| |
| s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf; |
| s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride; |
| |
| s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag; |
| s_sao_ctxt.i1_slice_sao_chroma_flag = 0; |
| |
| ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params); |
| |
| /* Calculate the distortion between sao'ed ctb and original src ctb*/ |
| // clang-format off |
| distortion = |
| ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma, |
| s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride, |
| s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht); |
| // clang-format on |
| |
| ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx; |
| ctb_bits = ihevce_cabac_rdo_encode_sao( |
| ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out); |
| |
| /* Calculate the cost as D+(lamda)*R */ |
| curr_cost = distortion + |
| COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
| |
| if(curr_cost < best_cost) |
| { |
| best_cost = curr_cost; |
| best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx; |
| best_cand_idx = rdo_cand; |
| curr_buf_idx = !curr_buf_idx; |
| } |
| } |
| |
| /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO |
| * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand |
| */ |
| s_best_luma_chroma_cand.b3_y_type_idx = |
| ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b3_y_type_idx; |
| s_best_luma_chroma_cand.u1_y_offset[1] = |
| ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[1]; |
| s_best_luma_chroma_cand.u1_y_offset[2] = |
| ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[2]; |
| s_best_luma_chroma_cand.u1_y_offset[3] = |
| ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[3]; |
| s_best_luma_chroma_cand.u1_y_offset[4] = |
| ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[4]; |
| s_best_luma_chroma_cand.b5_y_band_pos = |
| ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b5_y_band_pos; |
| } |
| else |
| { |
| /*Back up the top pixels for (x,y+1)th ctb*/ |
| if(!ps_sao_ctxt->i4_is_last_ctb_row) |
| { |
| memcpy( |
| ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride, |
| pu1_recon_luma + luma_recon_stride * (ctb_size - 1), |
| ps_sao_ctxt->i4_sao_blk_wd); |
| } |
| |
| s_best_luma_chroma_cand.b3_y_type_idx = SAO_NONE; |
| s_best_luma_chroma_cand.u1_y_offset[1] = 0; |
| s_best_luma_chroma_cand.u1_y_offset[2] = 0; |
| s_best_luma_chroma_cand.u1_y_offset[3] = 0; |
| s_best_luma_chroma_cand.u1_y_offset[4] = 0; |
| s_best_luma_chroma_cand.b5_y_band_pos = 0; |
| s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0; |
| s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0; |
| |
| s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE; |
| s_best_luma_chroma_cand.u1_cb_offset[1] = 0; |
| s_best_luma_chroma_cand.u1_cb_offset[2] = 0; |
| s_best_luma_chroma_cand.u1_cb_offset[3] = 0; |
| s_best_luma_chroma_cand.u1_cb_offset[4] = 0; |
| s_best_luma_chroma_cand.b5_cb_band_pos = 0; |
| |
| s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE; |
| s_best_luma_chroma_cand.u1_cr_offset[1] = 0; |
| s_best_luma_chroma_cand.u1_cr_offset[2] = 0; |
| s_best_luma_chroma_cand.u1_cr_offset[3] = 0; |
| s_best_luma_chroma_cand.u1_cr_offset[4] = 0; |
| s_best_luma_chroma_cand.b5_cr_band_pos = 0; |
| } |
| /*****************************************************/ |
| /********************RDO FOR CHROMA CAND**************/ |
| /*****************************************************/ |
| #if !DISABLE_SAO_WHEN_NOISY |
| if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) |
| #else |
| if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && !u1_force_no_offset) |
| #endif |
| { |
| /*Back up the top pixels for (x,y+1)th ctb*/ |
| if(!ps_sao_ctxt->i4_is_last_ctb_row) |
| { |
| memcpy( |
| ps_sao_ctxt->pu1_curr_sao_src_top_chroma + |
| ps_sao_ctxt->i4_frm_top_chroma_buf_stride, |
| pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1), |
| ps_sao_ctxt->i4_sao_blk_wd); |
| } |
| |
| /* Reset the error and edge count*/ |
| for(edgeidx = 0; edgeidx < 5; edgeidx++) |
| { |
| acc_error_category[edgeidx] = 0; |
| category_count[edgeidx] = 0; |
| } |
| // clang-format off |
| ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_chroma_eo_sao_params(ps_sao_ctxt, |
| s_best_luma_chroma_cand.b3_y_type_idx, acc_error_category, |
| category_count); |
| // clang-format on |
| |
| /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO |
| * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand |
| */ |
| // clang-format off |
| s_best_luma_chroma_cand.b3_cb_type_idx = s_best_luma_chroma_cand.b3_y_type_idx; |
| s_best_luma_chroma_cand.u1_cb_offset[1] = category_count[0] |
| ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) |
| : 0; |
| s_best_luma_chroma_cand.u1_cb_offset[2] = category_count[1] |
| ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) |
| : 0; |
| s_best_luma_chroma_cand.u1_cb_offset[3] = category_count[3] |
| ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) |
| : 0; |
| s_best_luma_chroma_cand.u1_cb_offset[4] = category_count[4] |
| ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) |
| : 0; |
| s_best_luma_chroma_cand.b5_cb_band_pos = 0; |
| |
| s_best_luma_chroma_cand.b3_cr_type_idx = s_best_luma_chroma_cand.b3_y_type_idx; |
| s_best_luma_chroma_cand.u1_cr_offset[1] = category_count[0] |
| ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) |
| : 0; |
| s_best_luma_chroma_cand.u1_cr_offset[2] = category_count[1] |
| ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) |
| : 0; |
| s_best_luma_chroma_cand.u1_cr_offset[3] = category_count[3] |
| ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) |
| : 0; |
| s_best_luma_chroma_cand.u1_cr_offset[4] = category_count[4] |
| ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) |
| : 0; |
| // clang-format on |
| s_best_luma_chroma_cand.b5_cr_band_pos = 0; |
| } |
| else |
| { |
| /*Back up the top pixels for (x,y+1)th ctb*/ |
| if(!ps_sao_ctxt->i4_is_last_ctb_row) |
| { |
| memcpy( |
| ps_sao_ctxt->pu1_curr_sao_src_top_chroma + |
| ps_sao_ctxt->i4_frm_top_chroma_buf_stride, |
| pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1), |
| ps_sao_ctxt->i4_sao_blk_wd); |
| } |
| |
| s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE; |
| s_best_luma_chroma_cand.u1_cb_offset[1] = 0; |
| s_best_luma_chroma_cand.u1_cb_offset[2] = 0; |
| s_best_luma_chroma_cand.u1_cb_offset[3] = 0; |
| s_best_luma_chroma_cand.u1_cb_offset[4] = 0; |
| s_best_luma_chroma_cand.b5_cb_band_pos = 0; |
| |
| s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE; |
| s_best_luma_chroma_cand.u1_cr_offset[1] = 0; |
| s_best_luma_chroma_cand.u1_cr_offset[2] = 0; |
| s_best_luma_chroma_cand.u1_cr_offset[3] = 0; |
| s_best_luma_chroma_cand.u1_cr_offset[4] = 0; |
| s_best_luma_chroma_cand.b5_cr_band_pos = 0; |
| |
| s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0; |
| s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0; |
| } |
| |
| s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0; |
| s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0; |
| |
| /*****************************************************/ |
| /**RDO for Best Luma - Chroma combined, No SAO,*******/ |
| /*************Left merge and Top merge****************/ |
| /*****************************************************/ |
| |
| /* No SAO cand*/ |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0; |
| |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_y_type_idx = SAO_NONE; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[1] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[2] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[3] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[4] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_y_band_pos = 0; |
| |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cb_type_idx = SAO_NONE; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[1] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[2] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[3] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[4] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cb_band_pos = 0; |
| |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cr_type_idx = SAO_NONE; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[1] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[2] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[3] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[4] = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cr_band_pos = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0; |
| |
| num_rdo_cand++; |
| |
| /* SAO_note_01: If the CTB lies on a tile or a slice boundary, then |
| the standard mandates that the merge candidates must be set to unavailable. |
| Hence, check for tile boundary condition by reading |
| s_ctb_nbr_avail_flags.u1_left_avail rather than frame position of CTB. |
| A special case: Merge-candidates should be available at dependent-slices boundaries. |
| Search for <SAO_note_01> in workspace to know more */ |
| |
| #if !DISABLE_SAO_WHEN_NOISY |
| if(1) |
| #else |
| if(!u1_force_no_offset) |
| #endif |
| { |
| /* Merge left cand*/ |
| if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_left_avail) |
| { |
| memcpy( |
| &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand], |
| &ps_sao_ctxt->s_left_ctb_sao, |
| sizeof(sao_enc_t)); |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 1; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0; |
| num_rdo_cand++; |
| } |
| |
| /* Merge top cand*/ |
| if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_top_avail) |
| { |
| memcpy( |
| &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand], |
| (ps_sao_ctxt->ps_top_ctb_sao - ps_sao_ctxt->u4_num_ctbs_horz), |
| sizeof(sao_enc_t)); |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0; |
| ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 1; |
| num_rdo_cand++; |
| } |
| |
| /* Best luma-chroma candidate*/ |
| memcpy( |
| &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand], |
| &s_best_luma_chroma_cand, |
| sizeof(sao_enc_t)); |
| num_rdo_cand++; |
| } |
| |
| { |
| UWORD32 luma_distortion = 0, chroma_distortion = 0; |
| /* First cand will be best cand after 1st iteration*/ |
| curr_buf_idx = 0; |
| best_buf_idx = 1; |
| best_cost = 0xFFFFFFFF; |
| best_cand_idx = 0; |
| |
| for(rdo_cand = 0; rdo_cand < num_rdo_cand; rdo_cand++) |
| { |
| s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand]; |
| |
| distortion = 0; |
| |
| /* This memcpy is required because cabac uses parameters from this structure |
| * to evaluate bits and this structure ptr is sent to cabac through |
| * "ihevce_cabac_rdo_encode_sao" function |
| */ |
| memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t)); |
| |
| if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) |
| { |
| /* Copy the left pixels to the scratch buffer for evry rdo cand because its |
| overwritten by the sao leaf level function for next ctb*/ |
| memcpy( |
| s_sao_ctxt.au1_left_luma_scratch, |
| ps_sao_ctxt->au1_sao_src_left_luma, |
| ps_sao_ctxt->i4_sao_blk_ht); |
| |
| /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its |
| overwritten by the sao leaf level function for next ctb*/ |
| memcpy( |
| s_sao_ctxt.au1_top_luma_scratch, |
| ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1, |
| ps_sao_ctxt->i4_sao_blk_wd + 2); |
| s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1; |
| |
| pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx]; |
| |
| /* Copy the deblocked recon data to scratch buffer to do sao*/ |
| |
| ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( |
| pu1_luma_scratch_buf, |
| i4_luma_scratch_buf_stride, |
| pu1_recon_luma, |
| luma_recon_stride, |
| SCRATCH_BUF_STRIDE, |
| ctb_ht + 1); |
| s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf; |
| s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride; |
| |
| ASSERT( |
| (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7)); |
| } |
| if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) |
| { |
| /* Copy the left pixels to the scratch buffer for evry rdo cand because its |
| overwritten by the sao leaf level function for next ctb*/ |
| memcpy( |
| s_sao_ctxt.au1_left_chroma_scratch, |
| ps_sao_ctxt->au1_sao_src_left_chroma, |
| (ps_sao_ctxt->i4_sao_blk_ht >> !u1_is_422) * 2); |
| |
| /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its |
| overwritten by the sao leaf level function for next ctb*/ |
| memcpy( |
| s_sao_ctxt.au1_top_chroma_scratch, |
| ps_sao_ctxt->pu1_curr_sao_src_top_chroma - 2, |
| ps_sao_ctxt->i4_sao_blk_wd + 4); |
| |
| s_sao_ctxt.pu1_curr_sao_src_top_chroma = s_sao_ctxt.au1_top_chroma_scratch + 2; |
| |
| pu1_chroma_scratch_buf = ps_sao_ctxt->au1_sao_chroma_scratch[curr_buf_idx]; |
| |
| /* Copy the deblocked recon data to scratch buffer to do sao*/ |
| |
| ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( |
| pu1_chroma_scratch_buf, |
| i4_chroma_scratch_buf_stride, |
| pu1_recon_chroma, |
| chroma_recon_stride, |
| SCRATCH_BUF_STRIDE, |
| (ctb_ht >> !u1_is_422) + 1); |
| |
| s_sao_ctxt.pu1_cur_chroma_recon_buf = pu1_chroma_scratch_buf; |
| s_sao_ctxt.i4_cur_chroma_recon_stride = i4_chroma_scratch_buf_stride; |
| |
| ASSERT( |
| (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7)); |
| ASSERT( |
| (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) && |
| (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7)); |
| } |
| |
| ASSERT( |
| (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) && |
| (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) && |
| (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28)); |
| |
| s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag; |
| s_sao_ctxt.i1_slice_sao_chroma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_chroma_flag; |
| |
| ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params); |
| |
| if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) |
| { // clang-format off |
| luma_distortion = |
| ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma, |
| s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride, |
| s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, |
| ctb_ht); |
| } // clang-format on |
| |
| if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) |
| { // clang-format off |
| chroma_distortion = |
| ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_chroma, |
| s_sao_ctxt.pu1_cur_chroma_recon_buf, |
| chroma_src_stride, |
| s_sao_ctxt.i4_cur_chroma_recon_stride, ctb_wd, |
| (ctb_ht >> !u1_is_422)); |
| } // clang-format on |
| |
| /*chroma distortion is added after correction because of lambda difference*/ |
| distortion = |
| luma_distortion + |
| (UWORD32)(chroma_distortion * (i8_cl_ssd_lambda_qf / i8_cl_ssd_lambda_chroma_qf)); |
| |
| ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx; |
| ctb_bits = ihevce_cabac_rdo_encode_sao( |
| ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out); |
| |
| /* Calculate the cost as D+(lamda)*R */ |
| curr_cost = distortion + |
| COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
| |
| if(curr_cost < best_cost) |
| { |
| best_ctb_sao_bits = ctb_bits; |
| best_cost = curr_cost; |
| best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx; |
| best_cand_idx = rdo_cand; |
| curr_buf_idx = !curr_buf_idx; |
| } |
| } |
| /*Adding sao bits to header bits*/ |
| *pu4_frame_rdopt_header_bits = best_ctb_sao_bits; |
| |
| ihevce_update_best_sao_cabac_state(ps_sao_ctxt->ps_rdopt_entropy_ctxt, best_buf_idx); |
| |
| /* store the sao parameters of curr ctb for top merge and left merge*/ |
| memcpy( |
| ps_sao_ctxt->ps_top_ctb_sao, |
| &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx], |
| sizeof(sao_enc_t)); |
| memcpy( |
| &ps_sao_ctxt->s_left_ctb_sao, |
| &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx], |
| sizeof(sao_enc_t)); |
| |
| /* Copy the sao parameters of winning candidate into the structure which will be sent to entropy thrd*/ |
| memcpy( |
| &ps_ctb_enc_loop_out->s_sao, |
| &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx], |
| sizeof(sao_enc_t)); |
| |
| if(!ps_sao_ctxt->i4_is_last_ctb_col) |
| { |
| /* Update left luma buffer for next ctb */ |
| for(row = 0; row < ps_sao_ctxt->i4_sao_blk_ht; row++) |
| { |
| ps_sao_ctxt->au1_sao_src_left_luma[row] = |
| ps_sao_ctxt->pu1_cur_luma_recon_buf |
| [row * ps_sao_ctxt->i4_cur_luma_recon_stride + |
| (ps_sao_ctxt->i4_sao_blk_wd - 1)]; |
| } |
| } |
| |
| if(!ps_sao_ctxt->i4_is_last_ctb_col) |
| { |
| /* Update left chroma buffer for next ctb */ |
| for(row = 0; row < (ps_sao_ctxt->i4_sao_blk_ht >> 1); row++) |
| { |
| *(UWORD16 *)(ps_sao_ctxt->au1_sao_src_left_chroma + row * 2) = |
| *(UWORD16 *)(ps_sao_ctxt->pu1_cur_chroma_recon_buf + |
| row * ps_sao_ctxt->i4_cur_chroma_recon_stride + |
| (ps_sao_ctxt->i4_sao_blk_wd - 2)); |
| } |
| } |
| |
| if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) |
| { |
| /* Copy the sao'ed output of the best candidate to the recon buffer*/ |
| |
| ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( |
| ps_sao_ctxt->pu1_cur_luma_recon_buf, |
| ps_sao_ctxt->i4_cur_luma_recon_stride, |
| ps_sao_ctxt->au1_sao_luma_scratch[best_buf_idx], |
| i4_luma_scratch_buf_stride, |
| ctb_wd, |
| ctb_ht); |
| } |
| if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) |
| { |
| /* Copy the sao'ed output of the best candidate to the chroma recon buffer*/ |
| |
| ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( |
| ps_sao_ctxt->pu1_cur_chroma_recon_buf, |
| ps_sao_ctxt->i4_cur_chroma_recon_stride, |
| ps_sao_ctxt->au1_sao_chroma_scratch[best_buf_idx], |
| i4_chroma_scratch_buf_stride, |
| ctb_wd, |
| ctb_ht >> !u1_is_422); |
| } |
| } |
| } |