| /****************************************************************************** |
| * |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ***************************************************************************** |
| * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| */ |
| /** |
| ******************************************************************************* |
| * @file |
| * ihevc_quant_iquant_ssd.c |
| * |
| * @brief |
| * Contains function definitions for quantization, followed by Inverse |
| * quantization to find transform domain SSD |
| * |
| * @author |
| * 100453, 100578 |
| * |
| * @par List of Functions: |
| * - ihevc_quant_iquant_ssd() |
| * - ihevc_quant_iquant_ssd_flat_scale_mat() |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include "ihevc_typedefs.h" |
| #include "ihevc_macros.h" |
| #include "ihevc_platform_macros.h" |
| #include "ihevc_defs.h" |
| #include "ihevc_debug.h" |
| #include "ihevc_trans_tables.h" |
| #include "ihevc_quant_iquant_ssd.h" |
| #include "ihevc_func_selector.h" |
| #include "ihevc_trans_macros.h" |
| #include <assert.h> |
| |
| /*****************************************************************************/ |
| /* Globals */ |
| /*****************************************************************************/ |
| |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * This function performs quantization, followed by Inverse |
| * quantization to find transform domain SSD |
| * |
| * @par Description: |
| * Performs quantization on coeffs |
| * |
| * @param[in] pi2_coeffs |
| * 4x4 Coeffs |
| * |
| * @param[in] pi2_quant_coeff |
| * Scaling Matrix |
| * |
| * @param[out] pi2_dst |
| * Output 4x4 coefficients |
| * |
| * @param[in] qp_div |
| * Quantization parameter / 6 |
| * |
| * @param[in] qp_rem |
| * Quantization parameter % 6 |
| * |
| * @param[in] src_strd |
| * Input stride |
| * |
| * @param[in] dst_strd |
| * Output Stride |
| * |
| * @param[out] csbf |
| * coded sub block flag |
| * |
| * @param[in] csbf_strd |
| * coded sub block flag |
| * |
| * @param[out] zero_col |
| * zero column flag |
| * |
| * @param[out] zero_row |
| * zero column flag |
| * |
| * @returns cbf |
| * coded block flag |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| |
| WORD32 ihevc_quant_iquant_ssd |
| ( |
| WORD16 *pi2_coeffs, |
| WORD16 *pi2_quant_coeff, |
| WORD16 *pi2_q_dst, |
| WORD16 *pi2_iq_dst, |
| WORD32 trans_size, |
| WORD32 qp_div,/* qpscaled / 6 */ |
| WORD32 qp_rem,/* qpscaled % 6 */ |
| WORD32 q_add, |
| WORD32 *pi4_quant_round_factor_0_1, |
| WORD32 *pi4_quant_round_factor_1_2, |
| WORD32 src_strd, |
| WORD32 dst_q_strd, |
| WORD32 dst_iq_strd, |
| UWORD8 *csbf, |
| WORD32 csbf_strd, |
| WORD32 *zero_col, |
| WORD32 *zero_row, |
| WORD16 *pi2_dequant_coeff, |
| LWORD64 *pi8_cost |
| ) |
| { |
| WORD32 i, j; |
| WORD32 log2_size; |
| WORD16 *pi2_q_dst_orig; |
| WORD32 cbf = 0; |
| WORD32 bit_depth,shift_iq; |
| WORD32 val; |
| WORD16 i2_temp; |
| WORD32 ssd_cost = 0; |
| |
| (void)pi4_quant_round_factor_0_1; |
| (void)pi4_quant_round_factor_1_2; |
| pi2_q_dst_orig = pi2_q_dst; |
| |
| /* Quant initialization */ |
| GETRANGE(log2_size, trans_size); |
| log2_size -= 1; |
| |
| bit_depth = 8 + 0; |
| shift_iq = bit_depth + log2_size - 5; |
| |
| for(i = 0; i < trans_size; i++) |
| { |
| for(j = 0; j < trans_size; j++) |
| { |
| /* Back up the coefficients before Quantization */ |
| i2_temp = pi2_coeffs[j]; |
| |
| /* Quantization */ |
| QUANT(pi2_q_dst[j], pi2_coeffs[j], |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, q_add); |
| |
| /* Inverse Quantization */ |
| IQUANT(pi2_iq_dst[j], |
| pi2_q_dst[j], /*pi2_src[index*src_strd]*/ |
| pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], |
| /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */ |
| shift_iq, |
| qp_div); |
| |
| /* SSD Computation & Accumulation */ |
| val = i2_temp - pi2_iq_dst[j]; |
| ssd_cost += val*val; |
| |
| } |
| |
| pi2_q_dst += dst_q_strd; |
| pi2_iq_dst += dst_iq_strd; |
| pi2_quant_coeff += trans_size; |
| pi2_coeffs += src_strd; |
| pi2_dequant_coeff += trans_size; |
| } |
| |
| /* Store the cost */ |
| *pi8_cost = ssd_cost; |
| |
| /* CSBF update */ |
| { |
| WORD32 block_row, block_col; |
| WORD32 row, col; |
| WORD16 *pi2_block; |
| UWORD32 temp_zero_col = 0; |
| UWORD32 temp_zero_row = 0; |
| |
| pi2_q_dst = pi2_q_dst_orig; |
| |
| for(block_row = 0; block_row < trans_size; block_row += 4) |
| { |
| //block_col is incrementing by 1 for easy update of csbf pointer |
| for(block_col = 0; block_col < trans_size / 4; block_col++) |
| { |
| pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4; |
| *(csbf + block_col) = 0; |
| |
| for(row = 0; row < 4; row++) |
| { |
| for(col = 0; col < 4; col++) |
| { |
| if(pi2_block[row * dst_q_strd + col] != 0) |
| { |
| *(csbf + block_col) = 1; |
| break; |
| } |
| } |
| if(*(csbf + block_col) == 1) |
| { |
| /* zero_col update *//* temp_zero_col = ~zero_col */ |
| temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4); |
| // zero col can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 colums of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| /* zero row update */ /* temp_zero_row = ~zero_row */ |
| temp_zero_row = (temp_zero_row) | (0xFU << block_row); |
| // zero row can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 rows of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| break; |
| } |
| } |
| |
| cbf = cbf || (*(csbf + block_col)); // cbf update |
| } |
| csbf += csbf_strd; |
| } |
| |
| *zero_col = ~temp_zero_col; //final zero_col storing |
| *zero_row = ~temp_zero_row; //final zero_row storing |
| } |
| |
| return cbf; |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * This function performs quantization, followed by Inverse |
| * quantization |
| * |
| * @par Description: |
| * Performs quantization on coeffs |
| * |
| * @param[in] pi2_coeffs |
| * 4x4 Coeffs |
| * |
| * @param[in] pi2_quant_coeff |
| * Scaling Matrix |
| * |
| * @param[out] pi2_dst |
| * Output 4x4 coefficients |
| * |
| * @param[in] qp_div |
| * Quantization parameter / 6 |
| * |
| * @param[in] qp_rem |
| * Quantization parameter % 6 |
| * |
| * @param[in] src_strd |
| * Input stride |
| * |
| * @param[in] dst_strd |
| * Output Stride |
| * |
| * @param[out] csbf |
| * coded sub block flag |
| * |
| * @param[in] csbf_strd |
| * coded sub block flag |
| * |
| * @param[out] zero_col |
| * zero column flag |
| * |
| * @param[out] zero_row |
| * zero column flag |
| * |
| * @returns cbf |
| * coded block flag |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| |
| WORD32 ihevc_quant_iquant |
| ( |
| WORD16 *pi2_coeffs, |
| WORD16 *pi2_quant_coeff, |
| WORD16 *pi2_q_dst, |
| WORD16 *pi2_iq_dst, |
| WORD32 trans_size, |
| WORD32 qp_div,/* qpscaled / 6 */ |
| WORD32 qp_rem,/* qpscaled % 6 */ |
| WORD32 q_add, |
| WORD32 *pi4_quant_round_factor_0_1, |
| WORD32 *pi4_quant_round_factor_1_2, |
| WORD32 src_strd, |
| WORD32 dst_q_strd, |
| WORD32 dst_iq_strd, |
| UWORD8 *csbf, |
| WORD32 csbf_strd, |
| WORD32 *zero_col, |
| WORD32 *zero_row, |
| WORD16 *pi2_dequant_coeff, |
| LWORD64 *pi8_cost |
| ) |
| { |
| WORD32 i, j; |
| WORD32 log2_size; |
| WORD16 *pi2_q_dst_orig; |
| WORD32 cbf = 0; |
| WORD32 bit_depth,shift_iq; |
| WORD16 i2_temp; |
| |
| (void)pi8_cost; |
| (void)pi4_quant_round_factor_0_1; |
| (void)pi4_quant_round_factor_1_2; |
| pi2_q_dst_orig = pi2_q_dst; |
| |
| /* Quant initialization */ |
| GETRANGE(log2_size, trans_size); |
| log2_size -= 1; |
| |
| bit_depth = 8; |
| shift_iq = bit_depth + log2_size - 5; |
| |
| for(i = 0; i < trans_size; i++) |
| { |
| for(j = 0; j < trans_size; j++) |
| { |
| /* Back up the coefficients before Quantization */ |
| i2_temp = pi2_coeffs[j]; |
| |
| /* Quantization */ |
| QUANT(pi2_q_dst[j], pi2_coeffs[j], |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, q_add); |
| |
| /* Inverse Quantization */ |
| IQUANT(pi2_iq_dst[j], |
| pi2_q_dst[j], /*pi2_src[index*src_strd]*/ |
| pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], |
| shift_iq, |
| qp_div); |
| } |
| |
| pi2_q_dst += dst_q_strd; |
| pi2_iq_dst += dst_iq_strd; |
| pi2_quant_coeff += trans_size; |
| pi2_coeffs += src_strd; |
| pi2_dequant_coeff += trans_size; |
| } |
| |
| /* CSBF update */ |
| { |
| WORD32 block_row, block_col; |
| WORD32 row, col; |
| WORD16 *pi2_block; |
| UWORD32 temp_zero_col = 0; |
| UWORD32 temp_zero_row = 0; |
| |
| pi2_q_dst = pi2_q_dst_orig; |
| |
| for(block_row = 0; block_row < trans_size; block_row += 4) |
| { |
| //block_col is incrementing by 1 for easy update of csbf pointer |
| for(block_col = 0; block_col < trans_size / 4; block_col++) |
| { |
| pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4; |
| *(csbf + block_col) = 0; |
| |
| for(row = 0; row < 4; row++) |
| { |
| for(col = 0; col < 4; col++) |
| { |
| if(pi2_block[row * dst_q_strd + col] != 0) |
| { |
| *(csbf + block_col) = 1; |
| break; |
| } |
| } |
| if(*(csbf + block_col) == 1) |
| { |
| /* zero_col update *//* temp_zero_col = ~zero_col */ |
| temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4); |
| // zero col can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 colums of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| /* zero row update */ /* temp_zero_row = ~zero_row */ |
| temp_zero_row = (temp_zero_row) | (0xFU << block_row); |
| // zero row can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 rows of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| break; |
| } |
| } |
| |
| cbf = cbf || (*(csbf + block_col)); // cbf update |
| } |
| |
| csbf += csbf_strd; |
| } |
| |
| *zero_col = ~temp_zero_col; //final zero_col storing |
| *zero_row = ~temp_zero_row; //final zero_row storing |
| } |
| |
| return cbf; |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * This function performs quantization, followed by Inverse |
| * quantization to find transform domain SSD |
| * |
| * @par Description: |
| * Performs quantization on coeffs |
| * |
| * @param[in] pi2_coeffs |
| * 4x4 Coeffs |
| * |
| * @param[in] pi2_quant_coeff |
| * Scaling Matrix |
| * |
| * @param[out] pi2_dst |
| * Output 4x4 coefficients |
| * |
| * @param[in] qp_div |
| * Quantization parameter / 6 |
| * |
| * @param[in] qp_rem |
| * Quantization parameter % 6 |
| * |
| * @param[in] src_strd |
| * Input stride |
| * |
| * @param[in] dst_strd |
| * Output Stride |
| * |
| * @param[out] csbf |
| * coded sub block flag |
| * |
| * @param[in] csbf_strd |
| * coded sub block flag |
| * |
| * @param[out] zero_col |
| * zero column flag |
| * |
| * @param[out] zero_row |
| * zero column flag |
| * |
| * @returns cbf |
| * coded block flag |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| |
| WORD32 ihevc_quant_iquant_ssd_rdoq |
| ( |
| WORD16 *pi2_coeffs, |
| WORD16 *pi2_quant_coeff, |
| WORD16 *pi2_q_dst, |
| WORD16 *pi2_iq_dst, |
| WORD32 trans_size, |
| WORD32 qp_div,/* qpscaled / 6 */ |
| WORD32 qp_rem,/* qpscaled % 6 */ |
| WORD32 q_add, |
| WORD32 *pi4_quant_round_factor_0_1, |
| WORD32 *pi4_quant_round_factor_1_2, |
| WORD32 src_strd, |
| WORD32 dst_q_strd, |
| WORD32 dst_iq_strd, |
| UWORD8 *csbf, |
| WORD32 csbf_strd, |
| WORD32 *zero_col, |
| WORD32 *zero_row, |
| WORD16 *pi2_dequant_coeff, |
| LWORD64 *pi8_cost |
| ) |
| { |
| WORD32 i, j; |
| WORD32 log2_size; |
| WORD16 *pi2_q_dst_orig; |
| WORD32 cbf = 0; |
| WORD32 bit_depth,shift_iq; |
| WORD32 val; |
| WORD16 i2_temp; |
| WORD32 ssd_cost = 0; |
| |
| (void)pi4_quant_round_factor_0_1; |
| (void)pi4_quant_round_factor_1_2; |
| pi2_q_dst_orig = pi2_q_dst; |
| |
| GETRANGE(log2_size, trans_size); |
| log2_size -= 1; |
| |
| bit_depth = 8 + 0; |
| shift_iq = bit_depth + log2_size - 5; |
| |
| for(i = 0; i < trans_size; i++) |
| { |
| for(j = 0; j < trans_size; j++) |
| { |
| /* Back up the coefficients before Quantization */ |
| i2_temp = pi2_coeffs[j]; |
| |
| /* Quantization */ |
| QUANT(pi2_q_dst[j], pi2_coeffs[j], |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, q_add); |
| |
| |
| if (abs(pi2_q_dst[j]) > 1) |
| { |
| QUANT(pi2_q_dst[j],i2_temp, |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2)); |
| |
| } |
| |
| |
| /* Inverse Quantization */ |
| IQUANT(pi2_iq_dst[j], |
| pi2_q_dst[j], /*pi2_src[index*src_strd]*/ |
| pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], |
| /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */ |
| shift_iq, |
| qp_div); |
| |
| /* SSD Computation & Accumulation */ |
| val = i2_temp - pi2_iq_dst[j]; |
| ssd_cost += val*val; |
| |
| } |
| |
| pi2_q_dst += dst_q_strd; |
| pi2_iq_dst += dst_iq_strd; |
| pi2_quant_coeff += trans_size; |
| pi2_coeffs += src_strd; |
| pi2_dequant_coeff += trans_size; |
| } |
| /* Store the cost */ |
| *pi8_cost = ssd_cost; |
| |
| /* CSBF update */ |
| { |
| WORD32 block_row, block_col; |
| WORD32 row, col; |
| WORD16 *pi2_block; |
| UWORD32 temp_zero_col = 0; |
| UWORD32 temp_zero_row = 0; |
| |
| pi2_q_dst = pi2_q_dst_orig; |
| |
| for(block_row = 0; block_row < trans_size; block_row += 4) |
| { |
| //block_col is incrementing by 1 for easy update of csbf pointer |
| for(block_col = 0; block_col < trans_size / 4; block_col++) |
| { |
| pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4; |
| *(csbf + block_col) = 0; |
| |
| for(row = 0; row < 4; row++) |
| { |
| for(col = 0; col < 4; col++) |
| { |
| if(pi2_block[row * dst_q_strd + col] != 0) |
| { |
| *(csbf + block_col) = 1; |
| break; |
| } |
| } |
| if(*(csbf + block_col) == 1) |
| { |
| /* zero_col update *//* temp_zero_col = ~zero_col */ |
| temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4); |
| // zero col can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 colums of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| /* zero row update */ /* temp_zero_row = ~zero_row */ |
| temp_zero_row = (temp_zero_row) | (0xFU << block_row); |
| // zero row can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 rows of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| break; |
| } |
| } |
| |
| cbf = cbf || (*(csbf + block_col)); // cbf update |
| } |
| csbf += csbf_strd; |
| } |
| |
| *zero_col = ~temp_zero_col; //final zero_col storing |
| *zero_row = ~temp_zero_row; //final zero_row storing |
| } |
| |
| return cbf; |
| } |
| |
| WORD32 ihevc_quant_iquant_rdoq |
| ( |
| WORD16 *pi2_coeffs, |
| WORD16 *pi2_quant_coeff, |
| WORD16 *pi2_q_dst, |
| WORD16 *pi2_iq_dst, |
| WORD32 trans_size, |
| WORD32 qp_div,/* qpscaled / 6 */ |
| WORD32 qp_rem,/* qpscaled % 6 */ |
| WORD32 q_add, |
| WORD32 *pi4_quant_round_factor_0_1, |
| WORD32 *pi4_quant_round_factor_1_2, |
| WORD32 src_strd, |
| WORD32 dst_q_strd, |
| WORD32 dst_iq_strd, |
| UWORD8 *csbf, |
| WORD32 csbf_strd, |
| WORD32 *zero_col, |
| WORD32 *zero_row, |
| WORD16 *pi2_dequant_coeff, |
| LWORD64 *pi8_cost |
| ) |
| { |
| WORD32 i, j; |
| WORD32 log2_size; |
| WORD16 *pi2_q_dst_orig; |
| WORD32 cbf = 0; |
| WORD32 bit_depth,shift_iq; |
| WORD16 i2_temp; |
| |
| (void)pi8_cost; |
| (void)pi4_quant_round_factor_0_1; |
| (void)pi4_quant_round_factor_1_2; |
| pi2_q_dst_orig = pi2_q_dst; |
| |
| GETRANGE(log2_size, trans_size); |
| log2_size -= 1; |
| |
| bit_depth = 8 + 0; |
| shift_iq = bit_depth + log2_size - 5; |
| |
| for(i = 0; i < trans_size; i++) |
| { |
| for(j = 0; j < trans_size; j++) |
| { |
| /* Back up the coefficients before Quantization */ |
| i2_temp = pi2_coeffs[j]; |
| |
| /* Quantization */ |
| QUANT(pi2_q_dst[j], pi2_coeffs[j], |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, q_add); |
| |
| if (abs(pi2_q_dst[j]) > 1) |
| { |
| QUANT(pi2_q_dst[j],i2_temp, |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2)); |
| } |
| |
| /* Inverse Quantization */ |
| IQUANT(pi2_iq_dst[j], |
| pi2_q_dst[j], /*pi2_src[index*src_strd]*/ |
| pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], |
| shift_iq, |
| qp_div); |
| } |
| |
| pi2_q_dst += dst_q_strd; |
| pi2_iq_dst += dst_iq_strd; |
| pi2_quant_coeff += trans_size; |
| pi2_coeffs += src_strd; |
| pi2_dequant_coeff += trans_size; |
| } |
| |
| /* CSBF update */ |
| { |
| WORD32 block_row, block_col; |
| WORD32 row, col; |
| WORD16 *pi2_block; |
| UWORD32 temp_zero_col = 0; |
| UWORD32 temp_zero_row = 0; |
| |
| pi2_q_dst = pi2_q_dst_orig; |
| |
| for(block_row = 0; block_row < trans_size; block_row += 4) |
| { |
| //block_col is incrementing by 1 for easy update of csbf pointer |
| for(block_col = 0; block_col < trans_size / 4; block_col++) |
| { |
| pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4; |
| *(csbf + block_col) = 0; |
| |
| for(row = 0; row < 4; row++) |
| { |
| for(col = 0; col < 4; col++) |
| { |
| if(pi2_block[row * dst_q_strd + col] != 0) |
| { |
| *(csbf + block_col) = 1; |
| break; |
| } |
| } |
| if(*(csbf + block_col) == 1) |
| { |
| /* zero_col update *//* temp_zero_col = ~zero_col */ |
| temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4); |
| // zero col can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 colums of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| /* zero row update */ /* temp_zero_row = ~zero_row */ |
| temp_zero_row = (temp_zero_row) | (0xFU << block_row); |
| // zero row can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 rows of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| break; |
| } |
| } |
| |
| cbf = cbf || (*(csbf + block_col)); // cbf update |
| } |
| csbf += csbf_strd; |
| } |
| |
| *zero_col = ~temp_zero_col; //final zero_col storing |
| *zero_row = ~temp_zero_row; //final zero_row storing |
| } |
| |
| return cbf; |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * This function performs quantization(using flat scale matrix), followed by |
| * inverse quantization to find transform domain SSD |
| * |
| * @par Description: |
| * Performs quantization on coeffs |
| * |
| * @param[in] pi2_coeffs |
| * 4x4 Coeffs |
| * |
| * @param[in] pi2_quant_coeff |
| * Scaling Matrix |
| * |
| * @param[out] pi2_dst |
| * Output 4x4 coefficients |
| * |
| * @param[in] qp_div |
| * Quantization parameter / 6 |
| * |
| * @param[in] qp_rem |
| * Quantization parameter % 6 |
| * |
| * @param[in] src_strd |
| * Input stride |
| * |
| * @param[in] dst_strd |
| * Output Stride |
| * |
| * @param[out] csbf |
| * coded sub block flag |
| * |
| * @param[in] csbf_strd |
| * coded sub block flag |
| * |
| * @param[out] zero_col |
| * zero column flag |
| * |
| * @param[out] zero_row |
| * zero column flag |
| * |
| * @returns cbf |
| * coded block flag |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| |
| WORD32 ihevc_quant_iquant_ssd_flat_scale_mat |
| ( |
| WORD16 *pi2_coeffs, |
| WORD16 *pi2_quant_coeff, |
| WORD16 *pi2_q_dst, |
| WORD16 *pi2_iq_dst, |
| WORD32 trans_size, |
| WORD32 qp_div,/* qpscaled / 6 */ |
| WORD32 qp_rem,/* qpscaled % 6 */ |
| WORD32 q_add, |
| WORD32 *pi4_quant_round_factor_0_1, |
| WORD32 *pi4_quant_round_factor_1_2, |
| WORD32 src_strd, |
| WORD32 dst_q_strd, |
| WORD32 dst_iq_strd, |
| UWORD8 *csbf, |
| WORD32 csbf_strd, |
| WORD32 *zero_col, |
| WORD32 *zero_row, |
| WORD16 *pi2_dequant_coeff, |
| LWORD64 *pi8_cost |
| ) |
| { |
| WORD32 i, j; |
| WORD32 log2_size; |
| WORD16 *pi2_q_dst_orig; |
| WORD32 cbf = 0; |
| WORD32 bit_depth,shift_iq; |
| WORD32 val; |
| WORD16 i2_temp; |
| /* Initialize cost to zero */ |
| WORD32 ssd_cost = 0; |
| |
| (void)pi4_quant_round_factor_0_1; |
| (void)pi4_quant_round_factor_1_2; |
| pi2_q_dst_orig = pi2_q_dst; |
| |
| /* Quant initialization */ |
| GETRANGE(log2_size, trans_size); |
| log2_size -= 1; |
| |
| bit_depth = 8 + 0; |
| shift_iq = bit_depth + log2_size - 5; |
| |
| for(i = 0; i < trans_size; i++) |
| { |
| for(j = 0; j < trans_size; j++) |
| { |
| /* Back up the coefficients before Quantization */ |
| i2_temp = pi2_coeffs[j]; |
| |
| /*QUANT(pi2_dst[j], pi2_coeffs[j], |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, q_add);*/ |
| |
| /* modified by 1028 */ |
| /* Quantization */ |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j], |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, q_add); |
| |
| if(pi2_q_dst[j] == 0) |
| { |
| pi2_iq_dst[j] = 0; |
| } |
| else |
| { |
| /* Inverse Quantization */ |
| IQUANT(pi2_iq_dst[j], |
| pi2_q_dst[j], /*pi2_src[index*src_strd]*/ |
| pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */ |
| shift_iq, |
| qp_div); |
| } |
| |
| /* SSD Computation & Accumulation */ |
| val = i2_temp - pi2_iq_dst[j]; |
| ssd_cost += val*val; |
| |
| } |
| |
| pi2_q_dst += dst_q_strd; |
| pi2_iq_dst += dst_iq_strd; |
| pi2_quant_coeff += trans_size; |
| pi2_coeffs += src_strd; |
| pi2_dequant_coeff += trans_size; |
| } |
| /* Store the cost */ |
| *pi8_cost = ssd_cost; |
| |
| /* CSBF update */ |
| { |
| WORD32 block_row, block_col; |
| WORD32 row, col; |
| WORD16 *pi2_block; |
| UWORD32 temp_zero_col = 0; |
| UWORD32 temp_zero_row = 0; |
| |
| pi2_q_dst = pi2_q_dst_orig; |
| |
| for(block_row = 0; block_row < trans_size; block_row += 4) |
| { |
| //block_col is incrementing by 1 for easy update of csbf pointer |
| for(block_col = 0; block_col < trans_size / 4; block_col++) |
| { |
| pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4; |
| *(csbf + block_col) = 0; |
| |
| for(row = 0; row < 4; row++) |
| { |
| for(col = 0; col < 4; col++) |
| { |
| if(pi2_block[row * dst_q_strd + col] != 0) |
| { |
| *(csbf + block_col) = 1; |
| break; |
| } |
| } |
| if(*(csbf + block_col) == 1) |
| { |
| /* zero_col update *//* temp_zero_col = ~zero_col */ |
| temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4); |
| // zero col can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 colums of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| /* zero row update */ /* temp_zero_row = ~zero_row */ |
| temp_zero_row = (temp_zero_row) | (0xFU << block_row); |
| // zero row can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 rows of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| break; |
| } |
| } |
| |
| cbf = cbf || (*(csbf + block_col)); // cbf update |
| } |
| csbf += csbf_strd; |
| } |
| |
| *zero_col = ~temp_zero_col; //final zero_col storing |
| *zero_row = ~temp_zero_row; //final zero_row storing |
| } |
| |
| return cbf; |
| } |
| |
| WORD32 ihevc_quant_iquant_flat_scale_mat |
| ( |
| WORD16 *pi2_coeffs, |
| WORD16 *pi2_quant_coeff, |
| WORD16 *pi2_q_dst, |
| WORD16 *pi2_iq_dst, |
| WORD32 trans_size, |
| WORD32 qp_div,/* qpscaled / 6 */ |
| WORD32 qp_rem,/* qpscaled % 6 */ |
| WORD32 q_add, |
| WORD32 *pi4_quant_round_factor_0_1, |
| WORD32 *pi4_quant_round_factor_1_2, |
| WORD32 src_strd, |
| WORD32 dst_q_strd, |
| WORD32 dst_iq_strd, |
| UWORD8 *csbf, |
| WORD32 csbf_strd, |
| WORD32 *zero_col, |
| WORD32 *zero_row, |
| WORD16 *pi2_dequant_coeff, |
| LWORD64 *pi8_cost |
| ) |
| { |
| WORD32 i, j; |
| WORD32 log2_size; |
| WORD16 *pi2_q_dst_orig; |
| WORD32 cbf = 0; |
| WORD32 bit_depth,shift_iq; |
| WORD16 i2_temp; |
| |
| (void)pi8_cost; |
| (void)pi4_quant_round_factor_0_1; |
| (void)pi4_quant_round_factor_1_2; |
| pi2_q_dst_orig = pi2_q_dst; |
| |
| /* Quant initialization */ |
| GETRANGE(log2_size, trans_size); |
| log2_size -= 1; |
| |
| bit_depth = 8 + 0; |
| shift_iq = bit_depth + log2_size - 5; |
| |
| for(i = 0; i < trans_size; i++) |
| { |
| for(j = 0; j < trans_size; j++) |
| { |
| /* Back up the coefficients before Quantization */ |
| i2_temp = pi2_coeffs[j]; |
| |
| /* Quantization */ |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j], |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, q_add); |
| |
| if(pi2_q_dst[j] == 0) |
| { |
| pi2_iq_dst[j] = 0; |
| } |
| else |
| { |
| /* Inverse Quantization */ |
| IQUANT(pi2_iq_dst[j], |
| pi2_q_dst[j], /*pi2_src[index*src_strd]*/ |
| pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */ |
| shift_iq, |
| qp_div); |
| } |
| } |
| |
| pi2_q_dst += dst_q_strd; |
| pi2_iq_dst += dst_iq_strd; |
| pi2_quant_coeff += trans_size; |
| pi2_coeffs += src_strd; |
| pi2_dequant_coeff += trans_size; |
| } |
| |
| /* CSBF update */ |
| { |
| WORD32 block_row, block_col; |
| WORD32 row, col; |
| WORD16 *pi2_block; |
| UWORD32 temp_zero_col = 0; |
| UWORD32 temp_zero_row = 0; |
| |
| pi2_q_dst = pi2_q_dst_orig; |
| |
| for(block_row = 0; block_row < trans_size; block_row += 4) |
| { |
| //block_col is incrementing by 1 for easy update of csbf pointer |
| for(block_col = 0; block_col < trans_size / 4; block_col++) |
| { |
| pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4; |
| *(csbf + block_col) = 0; |
| |
| for(row = 0; row < 4; row++) |
| { |
| for(col = 0; col < 4; col++) |
| { |
| if(pi2_block[row * dst_q_strd + col] != 0) |
| { |
| *(csbf + block_col) = 1; |
| break; |
| } |
| } |
| if(*(csbf + block_col) == 1) |
| { |
| /* zero_col update *//* temp_zero_col = ~zero_col */ |
| temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4); |
| // zero col can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 colums of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| /* zero row update */ /* temp_zero_row = ~zero_row */ |
| temp_zero_row = (temp_zero_row) | (0xFU << block_row); |
| // zero row can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 rows of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| break; |
| } |
| } |
| |
| cbf = cbf || (*(csbf + block_col)); // cbf update |
| } |
| csbf += csbf_strd; |
| } |
| |
| *zero_col = ~temp_zero_col; //final zero_col storing |
| *zero_row = ~temp_zero_row; //final zero_row storing |
| } |
| |
| return cbf; |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * This function performs quantization(using flat scale matrix), followed by |
| * inverse quantization to find transform domain SSD; when we perform RDOQ. |
| * In case the quantized value turns out to be grater than 1, we then requantize |
| * use half rounding. |
| * |
| * @par Description: |
| * Performs quantization on coeffs |
| * |
| * @param[in] pi2_coeffs |
| * 4x4 Coeffs |
| * |
| * @param[in] pi2_quant_coeff |
| * Scaling Matrix |
| * |
| * @param[out] pi2_dst |
| * Output 4x4 coefficients |
| * |
| * @param[in] qp_div |
| * Quantization parameter / 6 |
| * |
| * @param[in] qp_rem |
| * Quantization parameter % 6 |
| * |
| * @param[in] src_strd |
| * Input stride |
| * |
| * @param[in] dst_strd |
| * Output Stride |
| * |
| * @param[out] csbf |
| * coded sub block flag |
| * |
| * @param[in] csbf_strd |
| * coded sub block flag |
| * |
| * @param[out] zero_col |
| * zero column flag |
| * |
| * @param[out] zero_row |
| * zero column flag |
| * |
| * @returns cbf |
| * coded block flag |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| |
| WORD32 ihevc_quant_iquant_ssd_flat_scale_mat_rdoq |
| ( |
| WORD16 *pi2_coeffs, |
| WORD16 *pi2_quant_coeff, |
| WORD16 *pi2_q_dst, |
| WORD16 *pi2_iq_dst, |
| WORD32 trans_size, |
| WORD32 qp_div,/* qpscaled / 6 */ |
| WORD32 qp_rem,/* qpscaled % 6 */ |
| WORD32 q_add, |
| WORD32 *pi4_quant_round_factor_0_1, |
| WORD32 *pi4_quant_round_factor_1_2, |
| WORD32 src_strd, |
| WORD32 dst_q_strd, |
| WORD32 dst_iq_strd, |
| UWORD8 *csbf, |
| WORD32 csbf_strd, |
| WORD32 *zero_col, |
| WORD32 *zero_row, |
| WORD16 *pi2_dequant_coeff, |
| LWORD64 *pi8_cost |
| ) |
| { |
| WORD32 i, j; |
| WORD32 log2_size; |
| WORD16 *pi2_q_dst_orig; |
| WORD32 cbf = 0; |
| WORD32 bit_depth,shift_iq; |
| WORD32 val; |
| WORD16 i2_temp; |
| /* Initialize cost to zero */ |
| WORD32 ssd_cost = 0; |
| |
| (void)pi4_quant_round_factor_0_1; |
| (void)pi4_quant_round_factor_1_2; |
| pi2_q_dst_orig = pi2_q_dst; |
| |
| /* Quant initialization */ |
| GETRANGE(log2_size, trans_size); |
| log2_size -= 1; |
| |
| bit_depth = 8 + 0; |
| shift_iq = bit_depth + log2_size - 5; |
| |
| for(i = 0; i < trans_size; i++) |
| { |
| for(j = 0; j < trans_size; j++) |
| { |
| WORD16 i2_temp1; |
| /* Back up the coefficients before Quantization */ |
| i2_temp = pi2_coeffs[j]; |
| |
| /*QUANT(pi2_dst[j], pi2_coeffs[j], |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, q_add);*/ |
| |
| /* modified by 1028 */ |
| /* Quantization */ |
| |
| if (1) |
| { |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j], |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, q_add); |
| } |
| else |
| { \ |
| WORD16 inp = pi2_coeffs[j],out = pi2_q_dst[j]; |
| WORD32 quant_coeff = g_ihevc_quant_scales[qp_rem]; |
| WORD32 log2_trans_size = log2_size; |
| WORD32 tmp; \ |
| WORD32 sign; \ |
| WORD32 bit_depth,transform_shift; \ |
| WORD32 q_bits, quant_multiplier; \ |
| \ |
| /* q_bits and q_add calculation*/ \ |
| /* To be moved outside in neon. To be computer once per transform call */ \ |
| bit_depth = 8; \ |
| transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \ |
| quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \ |
| q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \ |
| \ |
| sign = (inp)<0 ? -1:1; \ |
| \ |
| tmp = (WORD32)(abs(inp)); \ |
| tmp = tmp * (quant_coeff); \ |
| tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \ |
| tmp = tmp >> q_bits; \ |
| \ |
| tmp = tmp * sign; \ |
| out = (WORD16) CLIP_S16(tmp); \ |
| } |
| i2_temp1 = pi2_q_dst[j]; |
| if (abs(pi2_q_dst[j]) > 1) |
| { |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp, |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2)); |
| } |
| |
| |
| ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1); |
| ASSERT(abs(i2_temp1) <= abs(pi2_q_dst[j])); |
| |
| |
| /* Inverse Quantization */ |
| IQUANT(pi2_iq_dst[j], |
| pi2_q_dst[j], /*pi2_src[index*src_strd]*/ |
| pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */ |
| shift_iq, |
| qp_div); |
| |
| /* SSD Computation & Accumulation */ |
| val = i2_temp - pi2_iq_dst[j]; |
| ssd_cost += val*val; |
| |
| } |
| |
| pi2_q_dst += dst_q_strd; |
| pi2_iq_dst += dst_iq_strd; |
| pi2_quant_coeff += trans_size; |
| pi2_coeffs += src_strd; |
| pi2_dequant_coeff += trans_size; |
| |
| } |
| /* Store the cost */ |
| *pi8_cost = ssd_cost; |
| |
| /* CSBF update */ |
| { |
| WORD32 block_row, block_col; |
| WORD32 row, col; |
| WORD16 *pi2_block; |
| UWORD32 temp_zero_col = 0; |
| UWORD32 temp_zero_row = 0; |
| |
| pi2_q_dst = pi2_q_dst_orig; |
| |
| for(block_row = 0; block_row < trans_size; block_row += 4) |
| { |
| //block_col is incrementing by 1 for easy update of csbf pointer |
| for(block_col = 0; block_col < trans_size / 4; block_col++) |
| { |
| pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4; |
| *(csbf + block_col) = 0; |
| |
| for(row = 0; row < 4; row++) |
| { |
| for(col = 0; col < 4; col++) |
| { |
| if(pi2_block[row * dst_q_strd + col] != 0) |
| { |
| *(csbf + block_col) = 1; |
| break; |
| } |
| } |
| if(*(csbf + block_col) == 1) |
| { |
| /* zero_col update *//* temp_zero_col = ~zero_col */ |
| temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4); |
| // zero col can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 colums of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| /* zero row update */ /* temp_zero_row = ~zero_row */ |
| temp_zero_row = (temp_zero_row) | (0xFU << block_row); |
| // zero row can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 rows of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| break; |
| } |
| } |
| |
| cbf = cbf || (*(csbf + block_col)); // cbf update |
| } |
| csbf += csbf_strd; |
| } |
| |
| *zero_col = ~temp_zero_col; //final zero_col storing |
| *zero_row = ~temp_zero_row; //final zero_row storing |
| } |
| return cbf; |
| } |
| |
| WORD32 ihevc_quant_iquant_flat_scale_mat_rdoq |
| ( |
| WORD16 *pi2_coeffs, |
| WORD16 *pi2_quant_coeff, |
| WORD16 *pi2_q_dst, |
| WORD16 *pi2_iq_dst, |
| WORD32 trans_size, |
| WORD32 qp_div,/* qpscaled / 6 */ |
| WORD32 qp_rem,/* qpscaled % 6 */ |
| WORD32 q_add, |
| WORD32 *pi4_quant_round_factor_0_1, |
| WORD32 *pi4_quant_round_factor_1_2, |
| WORD32 src_strd, |
| WORD32 dst_q_strd, |
| WORD32 dst_iq_strd, |
| UWORD8 *csbf, |
| WORD32 csbf_strd, |
| WORD32 *zero_col, |
| WORD32 *zero_row, |
| WORD16 *pi2_dequant_coeff, |
| LWORD64 *pi8_cost |
| ) |
| { |
| WORD32 i, j; |
| WORD32 log2_size; |
| WORD16 *pi2_q_dst_orig; |
| WORD32 cbf = 0; |
| WORD32 bit_depth,shift_iq; |
| WORD16 i2_temp; |
| |
| (void)pi8_cost; |
| (void)pi4_quant_round_factor_0_1; |
| (void)pi4_quant_round_factor_1_2; |
| pi2_q_dst_orig = pi2_q_dst; |
| |
| /* Quant initialization */ |
| GETRANGE(log2_size, trans_size); |
| log2_size -= 1; |
| |
| bit_depth = 8 + 0; |
| shift_iq = bit_depth + log2_size - 5; |
| |
| for(i = 0; i < trans_size; i++) |
| { |
| for(j = 0; j < trans_size; j++) |
| { |
| WORD16 i2_temp1; |
| /* Back up the coefficients before Quantization */ |
| i2_temp = pi2_coeffs[j]; |
| |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j], |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, q_add); |
| |
| i2_temp1 = pi2_q_dst[j]; |
| |
| if (abs(pi2_q_dst[j]) > 1) |
| { |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp, |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2)); |
| } |
| |
| ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1); |
| ASSERT(abs(i2_temp1) <= abs(pi2_q_dst[j])); |
| |
| IQUANT(pi2_iq_dst[j], |
| pi2_q_dst[j], /*pi2_src[index*src_strd]*/ |
| pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */ |
| shift_iq, |
| qp_div); |
| } |
| |
| pi2_q_dst += dst_q_strd; |
| pi2_iq_dst += dst_iq_strd; |
| pi2_quant_coeff += trans_size; |
| pi2_coeffs += src_strd; |
| pi2_dequant_coeff += trans_size; |
| } |
| |
| /* CSBF update */ |
| { |
| WORD32 block_row, block_col; |
| WORD32 row, col; |
| WORD16 *pi2_block; |
| UWORD32 temp_zero_col = 0; |
| UWORD32 temp_zero_row = 0; |
| |
| pi2_q_dst = pi2_q_dst_orig; |
| |
| for(block_row = 0; block_row < trans_size; block_row += 4) |
| { |
| //block_col is incrementing by 1 for easy update of csbf pointer |
| for(block_col = 0; block_col < trans_size / 4; block_col++) |
| { |
| pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4; |
| *(csbf + block_col) = 0; |
| |
| for(row = 0; row < 4; row++) |
| { |
| for(col = 0; col < 4; col++) |
| { |
| if(pi2_block[row * dst_q_strd + col] != 0) |
| { |
| *(csbf + block_col) = 1; |
| break; |
| } |
| } |
| if(*(csbf + block_col) == 1) |
| { |
| /* zero_col update *//* temp_zero_col = ~zero_col */ |
| temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4); |
| // zero col can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 colums of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| /* zero row update */ /* temp_zero_row = ~zero_row */ |
| temp_zero_row = (temp_zero_row) | (0xFU << block_row); |
| // zero row can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 rows of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| break; |
| } |
| } |
| |
| cbf = cbf || (*(csbf + block_col)); // cbf update |
| } |
| csbf += csbf_strd; |
| } |
| |
| *zero_col = ~temp_zero_col; //final zero_col storing |
| *zero_row = ~temp_zero_row; //final zero_row storing |
| } |
| |
| return cbf; |
| } |
| |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * This function performs quantization, followed by Inverse |
| * quantization to find transform domain SSD |
| * |
| * @par Description: |
| * Performs quantization on coeffs |
| * |
| * @param[in] pi2_coeffs |
| * 4x4 Coeffs |
| * |
| * @param[in] pi2_quant_coeff |
| * Scaling Matrix |
| * |
| * @param[out] pi2_dst |
| * Output 4x4 coefficients |
| * |
| * @param[in] qp_div |
| * Quantization parameter / 6 |
| * |
| * @param[in] qp_rem |
| * Quantization parameter % 6 |
| * |
| * @param[in] src_strd |
| * Input stride |
| * |
| * @param[in] dst_strd |
| * Output Stride |
| * |
| * @param[out] csbf |
| * coded sub block flag |
| * |
| * @param[in] csbf_strd |
| * coded sub block flag |
| * |
| * @param[out] zero_col |
| * zero column flag |
| * |
| * @param[out] zero_row |
| * zero column flag |
| * |
| * @returns cbf |
| * coded block flag |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| |
| WORD32 ihevc_q_iq_ssd_var_rnd_fact |
| ( |
| WORD16 *pi2_coeffs, |
| WORD16 *pi2_quant_coeff, |
| WORD16 *pi2_q_dst, |
| WORD16 *pi2_iq_dst, |
| WORD32 trans_size, |
| WORD32 qp_div,/* qpscaled / 6 */ |
| WORD32 qp_rem,/* qpscaled % 6 */ |
| WORD32 q_add, |
| WORD32 *pi4_quant_round_factor_0_1, |
| WORD32 *pi4_quant_round_factor_1_2, |
| WORD32 src_strd, |
| WORD32 dst_q_strd, |
| WORD32 dst_iq_strd, |
| UWORD8 *csbf, |
| WORD32 csbf_strd, |
| WORD32 *zero_col, |
| WORD32 *zero_row, |
| WORD16 *pi2_dequant_coeff, |
| LWORD64 *pi8_cost |
| ) |
| { |
| WORD32 i, j; |
| WORD32 log2_size; |
| WORD16 *pi2_q_dst_orig; |
| WORD32 cbf = 0; |
| WORD32 bit_depth,shift_iq; |
| WORD32 val; |
| WORD16 i2_temp; |
| //WORD16 i2_temp_1; |
| /* Initialize cost to zero */ |
| WORD32 ssd_cost = 0; |
| |
| (void)q_add; |
| pi2_q_dst_orig = pi2_q_dst; |
| |
| |
| /* Quant initialization */ |
| GETRANGE(log2_size, trans_size); |
| log2_size -= 1; |
| |
| bit_depth = 8 + 0; |
| shift_iq = bit_depth + log2_size - 5; |
| |
| for(i = 0; i < trans_size; i++) |
| { |
| for(j = 0; j < trans_size; j++) |
| { |
| /* Back up the coefficients before Quantization */ |
| i2_temp = pi2_coeffs[j]; |
| |
| |
| { |
| /* Quantization */ |
| QUANT(pi2_q_dst[j],i2_temp, |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, 0); |
| if (abs(pi2_q_dst[j]) >= 2) |
| { |
| QUANT(pi2_q_dst[j],i2_temp, |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2)); |
| |
| } |
| else if (abs(pi2_q_dst[j]) >= 1) |
| { |
| QUANT(pi2_q_dst[j],i2_temp, |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, *pi4_quant_round_factor_1_2); |
| } |
| |
| else |
| { |
| /* Quantization */ |
| QUANT(pi2_q_dst[j],i2_temp, |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, *pi4_quant_round_factor_0_1); |
| } |
| |
| } |
| |
| |
| |
| /* Inverse Quantization */ |
| IQUANT(pi2_iq_dst[j], |
| pi2_q_dst[j], /*pi2_src[index*src_strd]*/ |
| pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], |
| /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */ |
| shift_iq, |
| qp_div); |
| |
| /* SSD Computation & Accumulation */ |
| val = i2_temp - pi2_iq_dst[j]; |
| ssd_cost += val*val; |
| |
| pi4_quant_round_factor_0_1++; |
| pi4_quant_round_factor_1_2++; |
| } |
| |
| pi2_q_dst += dst_q_strd; |
| pi2_iq_dst += dst_iq_strd; |
| pi2_quant_coeff += trans_size; |
| pi2_coeffs += src_strd; |
| pi2_dequant_coeff += trans_size; |
| } |
| /* Store the cost */ |
| *pi8_cost = ssd_cost; |
| |
| /* CSBF update */ |
| { |
| WORD32 block_row, block_col; |
| WORD32 row, col; |
| WORD16 *pi2_block; |
| UWORD32 temp_zero_col = 0; |
| UWORD32 temp_zero_row = 0; |
| |
| pi2_q_dst = pi2_q_dst_orig; |
| |
| for(block_row = 0; block_row < trans_size; block_row += 4) |
| { |
| //block_col is incrementing by 1 for easy update of csbf pointer |
| for(block_col = 0; block_col < trans_size / 4; block_col++) |
| { |
| pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4; |
| *(csbf + block_col) = 0; |
| |
| for(row = 0; row < 4; row++) |
| { |
| for(col = 0; col < 4; col++) |
| { |
| if(pi2_block[row * dst_q_strd + col] != 0) |
| { |
| *(csbf + block_col) = 1; |
| break; |
| } |
| } |
| if(*(csbf + block_col) == 1) |
| { |
| /* zero_col update *//* temp_zero_col = ~zero_col */ |
| temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4); |
| // zero col can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 colums of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| /* zero row update */ /* temp_zero_row = ~zero_row */ |
| temp_zero_row = (temp_zero_row) | (0xFU << block_row); |
| // zero row can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 rows of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| break; |
| } |
| } |
| |
| cbf = cbf || (*(csbf + block_col)); // cbf update |
| } |
| csbf += csbf_strd; |
| } |
| |
| *zero_col = ~temp_zero_col; //final zero_col storing |
| *zero_row = ~temp_zero_row; //final zero_row storing |
| } |
| |
| return cbf; |
| } |
| |
| WORD32 ihevc_q_iq_var_rnd_fact |
| ( |
| WORD16 *pi2_coeffs, |
| WORD16 *pi2_quant_coeff, |
| WORD16 *pi2_q_dst, |
| WORD16 *pi2_iq_dst, |
| WORD32 trans_size, |
| WORD32 qp_div,/* qpscaled / 6 */ |
| WORD32 qp_rem,/* qpscaled % 6 */ |
| WORD32 q_add, |
| WORD32 *pi4_quant_round_factor_0_1, |
| WORD32 *pi4_quant_round_factor_1_2, |
| WORD32 src_strd, |
| WORD32 dst_q_strd, |
| WORD32 dst_iq_strd, |
| UWORD8 *csbf, |
| WORD32 csbf_strd, |
| WORD32 *zero_col, |
| WORD32 *zero_row, |
| WORD16 *pi2_dequant_coeff, |
| LWORD64 *pi8_cost |
| ) |
| { |
| WORD32 i, j; |
| WORD32 log2_size; |
| WORD16 *pi2_q_dst_orig; |
| WORD32 cbf = 0; |
| WORD32 bit_depth,shift_iq; |
| WORD16 i2_temp; |
| |
| (void)q_add; |
| (void)pi8_cost; |
| pi2_q_dst_orig = pi2_q_dst; |
| |
| GETRANGE(log2_size, trans_size); |
| log2_size -= 1; |
| |
| bit_depth = 8 + 0; |
| shift_iq = bit_depth + log2_size - 5; |
| |
| for(i = 0; i < trans_size; i++) |
| { |
| for(j = 0; j < trans_size; j++) |
| { |
| i2_temp = pi2_coeffs[j]; |
| |
| { |
| QUANT(pi2_q_dst[j],i2_temp, |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, 0); |
| |
| if (abs(pi2_q_dst[j]) >= 2) |
| { |
| QUANT(pi2_q_dst[j],i2_temp, |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2)); |
| } |
| else if (abs(pi2_q_dst[j]) >= 1) |
| { |
| QUANT(pi2_q_dst[j],i2_temp, |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, *pi4_quant_round_factor_1_2); |
| } |
| else |
| { |
| QUANT(pi2_q_dst[j],i2_temp, |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, *pi4_quant_round_factor_0_1); |
| } |
| } |
| |
| IQUANT(pi2_iq_dst[j], |
| pi2_q_dst[j], /*pi2_src[index*src_strd]*/ |
| pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], |
| shift_iq, |
| qp_div); |
| |
| pi4_quant_round_factor_0_1++; |
| pi4_quant_round_factor_1_2++; |
| } |
| |
| pi2_q_dst += dst_q_strd; |
| pi2_iq_dst += dst_iq_strd; |
| pi2_quant_coeff += trans_size; |
| pi2_coeffs += src_strd; |
| pi2_dequant_coeff += trans_size; |
| } |
| |
| /* CSBF update */ |
| { |
| WORD32 block_row, block_col; |
| WORD32 row, col; |
| WORD16 *pi2_block; |
| UWORD32 temp_zero_col = 0; |
| UWORD32 temp_zero_row = 0; |
| |
| pi2_q_dst = pi2_q_dst_orig; |
| |
| for(block_row = 0; block_row < trans_size; block_row += 4) |
| { |
| //block_col is incrementing by 1 for easy update of csbf pointer |
| for(block_col = 0; block_col < trans_size / 4; block_col++) |
| { |
| pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4; |
| *(csbf + block_col) = 0; |
| |
| for(row = 0; row < 4; row++) |
| { |
| for(col = 0; col < 4; col++) |
| { |
| if(pi2_block[row * dst_q_strd + col] != 0) |
| { |
| *(csbf + block_col) = 1; |
| break; |
| } |
| } |
| if(*(csbf + block_col) == 1) |
| { |
| /* zero_col update *//* temp_zero_col = ~zero_col */ |
| temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4); |
| // zero col can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 colums of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| /* zero row update */ /* temp_zero_row = ~zero_row */ |
| temp_zero_row = (temp_zero_row) | (0xFU << block_row); |
| // zero row can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 rows of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| break; |
| } |
| } |
| |
| cbf = cbf || (*(csbf + block_col)); // cbf update |
| } |
| csbf += csbf_strd; |
| } |
| |
| *zero_col = ~temp_zero_col; //final zero_col storing |
| *zero_row = ~temp_zero_row; //final zero_row storing |
| } |
| |
| return cbf; |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * This function performs quantization(using flat scale matrix), followed by |
| * inverse quantization to find transform domain SSD; when we perform RDOQ. |
| * In case the quantized value turns out to be grater than 1, we then requantize |
| * use half rounding. |
| * |
| * @par Description: |
| * Performs quantization on coeffs |
| * |
| * @param[in] pi2_coeffs |
| * 4x4 Coeffs |
| * |
| * @param[in] pi2_quant_coeff |
| * Scaling Matrix |
| * |
| * @param[out] pi2_dst |
| * Output 4x4 coefficients |
| * |
| * @param[in] qp_div |
| * Quantization parameter / 6 |
| * |
| * @param[in] qp_rem |
| * Quantization parameter % 6 |
| * |
| * @param[in] src_strd |
| * Input stride |
| * |
| * @param[in] dst_strd |
| * Output Stride |
| * |
| * @param[out] csbf |
| * coded sub block flag |
| * |
| * @param[in] csbf_strd |
| * coded sub block flag |
| * |
| * @param[out] zero_col |
| * zero column flag |
| * |
| * @param[out] zero_row |
| * zero column flag |
| * |
| * @returns cbf |
| * coded block flag |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| |
| WORD32 ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact |
| ( |
| WORD16 *pi2_coeffs, |
| WORD16 *pi2_quant_coeff, |
| WORD16 *pi2_q_dst, |
| WORD16 *pi2_iq_dst, |
| WORD32 trans_size, |
| WORD32 qp_div,/* qpscaled / 6 */ |
| WORD32 qp_rem,/* qpscaled % 6 */ |
| WORD32 q_add, |
| WORD32 *pi4_quant_round_factor_0_1, |
| WORD32 *pi4_quant_round_factor_1_2, |
| WORD32 src_strd, |
| WORD32 dst_q_strd, |
| WORD32 dst_iq_strd, |
| UWORD8 *csbf, |
| WORD32 csbf_strd, |
| WORD32 *zero_col, |
| WORD32 *zero_row, |
| WORD16 *pi2_dequant_coeff, |
| LWORD64 *pi8_cost |
| ) |
| { |
| WORD32 i, j; |
| WORD32 log2_size; |
| WORD16 *pi2_q_dst_orig; |
| WORD32 cbf = 0; |
| WORD32 bit_depth,shift_iq; |
| WORD32 val; |
| WORD16 i2_temp; |
| /* Initialize cost to zero */ |
| WORD32 ssd_cost = 0; |
| |
| (void)q_add; |
| pi2_q_dst_orig = pi2_q_dst; |
| |
| /* Quant initialization */ |
| GETRANGE(log2_size, trans_size); |
| log2_size -= 1; |
| |
| bit_depth = 8 + 0; |
| shift_iq = bit_depth + log2_size - 5; |
| |
| for(i = 0; i < trans_size; i++) |
| { |
| for(j = 0; j < trans_size; j++) |
| { |
| WORD16 i2_temp1; |
| /* Back up the coefficients before Quantization */ |
| i2_temp = pi2_coeffs[j]; |
| |
| /*QUANT(pi2_dst[j], pi2_coeffs[j], |
| pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, q_add);*/ |
| |
| /* modified by 1028 */ |
| /* Quantization */ |
| |
| |
| { |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j], |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, 0); |
| |
| i2_temp1 = pi2_q_dst[j]; |
| |
| if (abs(pi2_q_dst[j]) >= 2) |
| { |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp, |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2)); |
| } |
| else if (abs(pi2_q_dst[j]) >= 1) |
| { |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j], |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, *pi4_quant_round_factor_1_2); |
| } |
| |
| else |
| { |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j], |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, *pi4_quant_round_factor_0_1); |
| } |
| |
| } |
| |
| |
| |
| |
| ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1); |
| |
| |
| /* Inverse Quantization */ |
| IQUANT(pi2_iq_dst[j], |
| pi2_q_dst[j], /*pi2_src[index*src_strd]*/ |
| pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */ |
| shift_iq, |
| qp_div); |
| |
| /* SSD Computation & Accumulation */ |
| val = i2_temp - pi2_iq_dst[j]; |
| ssd_cost += val*val; |
| |
| pi4_quant_round_factor_0_1++; |
| pi4_quant_round_factor_1_2++; |
| } |
| |
| pi2_q_dst += dst_q_strd; |
| pi2_iq_dst += dst_iq_strd; |
| pi2_quant_coeff += trans_size; |
| pi2_coeffs += src_strd; |
| pi2_dequant_coeff += trans_size; |
| |
| } |
| /* Store the cost */ |
| *pi8_cost = ssd_cost; |
| |
| /* CSBF update */ |
| { |
| WORD32 block_row, block_col; |
| WORD32 row, col; |
| WORD16 *pi2_block; |
| UWORD32 temp_zero_col = 0; |
| UWORD32 temp_zero_row = 0; |
| |
| pi2_q_dst = pi2_q_dst_orig; |
| |
| for(block_row = 0; block_row < trans_size; block_row += 4) |
| { |
| //block_col is incrementing by 1 for easy update of csbf pointer |
| for(block_col = 0; block_col < trans_size / 4; block_col++) |
| { |
| pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4; |
| *(csbf + block_col) = 0; |
| |
| for(row = 0; row < 4; row++) |
| { |
| for(col = 0; col < 4; col++) |
| { |
| if(pi2_block[row * dst_q_strd + col] != 0) |
| { |
| *(csbf + block_col) = 1; |
| break; |
| } |
| } |
| if(*(csbf + block_col) == 1) |
| { |
| /* zero_col update *//* temp_zero_col = ~zero_col */ |
| temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4); |
| // zero col can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 colums of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| /* zero row update */ /* temp_zero_row = ~zero_row */ |
| temp_zero_row = (temp_zero_row) | (0xFU << block_row); |
| // zero row can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 rows of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| break; |
| } |
| } |
| |
| cbf = cbf || (*(csbf + block_col)); // cbf update |
| } |
| csbf += csbf_strd; |
| } |
| |
| *zero_col = ~temp_zero_col; //final zero_col storing |
| *zero_row = ~temp_zero_row; //final zero_row storing |
| } |
| return cbf; |
| } |
| |
| WORD32 ihevc_q_iq_flat_scale_mat_var_rnd_fact |
| ( |
| WORD16 *pi2_coeffs, |
| WORD16 *pi2_quant_coeff, |
| WORD16 *pi2_q_dst, |
| WORD16 *pi2_iq_dst, |
| WORD32 trans_size, |
| WORD32 qp_div,/* qpscaled / 6 */ |
| WORD32 qp_rem,/* qpscaled % 6 */ |
| WORD32 q_add, |
| WORD32 *pi4_quant_round_factor_0_1, |
| WORD32 *pi4_quant_round_factor_1_2, |
| WORD32 src_strd, |
| WORD32 dst_q_strd, |
| WORD32 dst_iq_strd, |
| UWORD8 *csbf, |
| WORD32 csbf_strd, |
| WORD32 *zero_col, |
| WORD32 *zero_row, |
| WORD16 *pi2_dequant_coeff, |
| LWORD64 *pi8_cost |
| ) |
| { |
| WORD32 i, j; |
| WORD32 log2_size; |
| WORD16 *pi2_q_dst_orig; |
| WORD32 cbf = 0; |
| WORD32 bit_depth,shift_iq; |
| WORD16 i2_temp; |
| |
| (void)q_add; |
| (void)pi8_cost; |
| pi2_q_dst_orig = pi2_q_dst; |
| |
| GETRANGE(log2_size, trans_size); |
| log2_size -= 1; |
| |
| bit_depth = 8 + 0; |
| shift_iq = bit_depth + log2_size - 5; |
| |
| for(i = 0; i < trans_size; i++) |
| { |
| for(j = 0; j < trans_size; j++) |
| { |
| WORD16 i2_temp1; |
| |
| i2_temp = pi2_coeffs[j]; |
| |
| { |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j], |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, 0); |
| |
| i2_temp1 = pi2_q_dst[j]; |
| |
| if (abs(pi2_q_dst[j]) >= 2) |
| { |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp, |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2)); |
| } |
| else if (abs(pi2_q_dst[j]) >= 1) |
| { |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j], |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, *pi4_quant_round_factor_1_2); |
| } |
| else |
| { |
| QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j], |
| g_ihevc_quant_scales[qp_rem], qp_div, |
| log2_size, *pi4_quant_round_factor_0_1); |
| } |
| } |
| |
| ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1); |
| |
| IQUANT(pi2_iq_dst[j], |
| pi2_q_dst[j], /*pi2_src[index*src_strd]*/ |
| pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], |
| shift_iq, |
| qp_div); |
| |
| pi4_quant_round_factor_0_1++; |
| pi4_quant_round_factor_1_2++; |
| } |
| |
| pi2_q_dst += dst_q_strd; |
| pi2_iq_dst += dst_iq_strd; |
| pi2_quant_coeff += trans_size; |
| pi2_coeffs += src_strd; |
| pi2_dequant_coeff += trans_size; |
| |
| } |
| |
| /* CSBF update */ |
| { |
| WORD32 block_row, block_col; |
| WORD32 row, col; |
| WORD16 *pi2_block; |
| UWORD32 temp_zero_col = 0; |
| UWORD32 temp_zero_row = 0; |
| |
| pi2_q_dst = pi2_q_dst_orig; |
| |
| for(block_row = 0; block_row < trans_size; block_row += 4) |
| { |
| //block_col is incrementing by 1 for easy update of csbf pointer |
| for(block_col = 0; block_col < trans_size / 4; block_col++) |
| { |
| pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4; |
| *(csbf + block_col) = 0; |
| |
| for(row = 0; row < 4; row++) |
| { |
| for(col = 0; col < 4; col++) |
| { |
| if(pi2_block[row * dst_q_strd + col] != 0) |
| { |
| *(csbf + block_col) = 1; |
| break; |
| } |
| } |
| if(*(csbf + block_col) == 1) |
| { |
| /* zero_col update *//* temp_zero_col = ~zero_col */ |
| temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4); |
| // zero col can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 colums of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| /* zero row update */ /* temp_zero_row = ~zero_row */ |
| temp_zero_row = (temp_zero_row) | (0xFU << block_row); |
| // zero row can be optimized further. Now clearing the |
| // entire 4 bits corresponding to 4 rows of 4x4 block |
| // even if any 4x4 csbf is set |
| |
| break; |
| } |
| } |
| |
| cbf = cbf || (*(csbf + block_col)); // cbf update |
| } |
| csbf += csbf_strd; |
| } |
| |
| *zero_col = ~temp_zero_col; //final zero_col storing |
| *zero_row = ~temp_zero_row; //final zero_row storing |
| } |
| return cbf; |
| } |