common/ihevc_trans_macros.h - platform/external/libhevc - Git at Google

 /******************************************************************************
 *
 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/
 /**
 *******************************************************************************
 * @file
 *  ihevc_trans_macros.h
 *
 * @brief
 *  Macros used in the forward transform and inverse transform functions
 *
 * @author
 *  Ittiam
 *
 * @remarks
 *  None
 *
 *******************************************************************************
 */
 #ifndef IHEVC_TRANS_MACROS_H_
 #define IHEVC_TRANS_MACROS_H_

 #define QUANT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
 {                                                                                                                                                                \
     LWORD64 tmp;                                                                                                                                                  \
     WORD32 sign;                                                                                                                                                 \
     WORD32 bit_depth,transform_shift;                                                                                                                            \
     WORD32  q_bits, quant_multiplier;                                                                                                                            \
                                                                                                                                                                  \
     /* q_bits and q_add calculation*/                                                                                                                            \
     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
     bit_depth = 8;                                                                                                                                               \
     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ;                                                                       \
                                                                                                                                                                  \
     sign = (inp)<0 ? -1:1;                                                                                                                                       \
                                                                                                                                                                  \
     tmp = (LWORD64)(abs(inp));                                                                                                                                    \
     tmp = tmp * (quant_coeff);                                                                                                                                   \
     tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
     tmp = tmp >> q_bits;                                                                                                                                         \
                                                                                                                                                                  \
     tmp = tmp * sign;                                                                                                                                            \
     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
 }                                                                                                                                                                \

 #define QUANT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
 {                                                                                                                                                                \
     LWORD64 tmp;                                                                                                                                                  \
     WORD32 sign;                                                                                                                                                 \
     WORD32 transform_shift;                                                                                                                                      \
     WORD32  q_bits, quant_multiplier;                                                                                                                            \
                                                                                                                                                                  \
     /* q_bits and q_add calculation*/                                                                                                                            \
     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
                                                                                                                                                                  \
     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ;                                                                       \
                                                                                                                                                                  \
     sign = (inp)<0 ? -1:1;                                                                                                                                       \
                                                                                                                                                                  \
     tmp = (LWORD64)(abs(inp));                                                                                                                                    \
     tmp = tmp * (quant_coeff);                                                                                                                                   \
     tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
     tmp = tmp >> q_bits;                                                                                                                                         \
                                                                                                                                                                  \
     tmp = tmp * sign;                                                                                                                                            \
     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
 }
 /* added by 100028 */
 #define QUANT_NO_WEIGHTMAT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
 {                                                                                                                                                                \
     WORD32 tmp;                                                                                                                                                  \
     WORD32 sign;                                                                                                                                                 \
     WORD32 bit_depth,transform_shift;                                                                                                                            \
     WORD32  q_bits, quant_multiplier;                                                                                                                            \
                                                                                                                                                                  \
     /* q_bits and q_add calculation*/                                                                                                                            \
     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
     bit_depth = 8;                                                                                                                                               \
     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */;                                                                       \
                                                                                                                                                                  \
     sign = (inp)<0 ? -1:1;                                                                                                                                       \
                                                                                                                                                                  \
     tmp = (WORD32)(abs(inp));                                                                                                                                    \
     tmp = tmp * (quant_coeff);                                                                                                                                   \
     tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
     tmp = tmp >> q_bits;                                                                                                                                         \
                                                                                                                                                                  \
     tmp = tmp * sign;                                                                                                                                            \
     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
 }

 #define QUANT_NO_WEIGHTMAT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
 {                                                                                                                                                                \
     WORD32 tmp;                                                                                                                                                  \
     WORD32 sign;                                                                                                                                                 \
     WORD32 transform_shift;                                                                                                                                      \
     WORD32  q_bits, quant_multiplier;                                                                                                                            \
                                                                                                                                                                  \
     /* q_bits and q_add calculation*/                                                                                                                            \
     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
                                                                                                                                                                  \
     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */;                                                                       \
                                                                                                                                                                  \
     sign = (inp)<0 ? -1:1;                                                                                                                                       \
                                                                                                                                                                  \
     tmp = (WORD32)(abs(inp));                                                                                                                                    \
     tmp = tmp * (quant_coeff);                                                                                                                                   \
     tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
     tmp = tmp >> q_bits;                                                                                                                                         \
                                                                                                                                                                  \
     tmp = tmp * sign;                                                                                                                                            \
     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
 }
 /* Reference Inverse Quantization: "pi2_src"(Coefficients) will be clipped to 15 or 14 bits when (qp_div > shift_iq). Spec doesn't have any clip mentioned  */

 /* Inverse quantization other than 4x4 */
 /* No clipping is needed for "pi2_src"(coefficients) */
 #define IQUANT(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div)       \
 {                                                                                                                                              \
     WORD32 tmp, add_iq;                                                                                                                        \
                                                                                                                                                \
     add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1));  /* To be moved outside in neon. To be computed once per transform call */                  \
                                                                                                                                                \
     tmp = coeff * dequant_coeff ;                                                                                                              \
     tmp = tmp + add_iq;                                                                                                                        \
     tmp = SHR_NEG(tmp,(shift_iq - qp_div));                                                                                                    \
                                                                                                                                                \
     res = CLIP_S16(tmp);                                                                                                                       \
 }

 /* 4x4 inverse quantization */
 /* Options : */
 /* 1. Clip "pi2_src"(coefficients) to 10 bits if "(qp_div >= shift_iq)" or 16 bits if "(qp_div < shift_iq)"*/
 /* 2. Increasing precision of "pi2_src"(coefficients) to 64 bits */

 #define IQUANT_4x4(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div)   \
 {                                                                                                                                              \
     WORD32 clip_coeff, tmp;                                                                                                                    \
     WORD32 coeff_min,coeff_max;                                                                                                                \
     WORD32 coeff_bit_range;                                                                                                                    \
     WORD32 add_iq;                                                                                                                             \
     add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1));  /* To be moved outside in neon. To be computed once per transform call */                  \
                                                                                                                                                \
     coeff_bit_range = 16;                                                                                                                      \
     if(qp_div > shift_iq)                                                                                                                      \
         coeff_bit_range = 10;                                                                                                                  \
                                                                                                                                                \
     coeff_min = -(1<<(coeff_bit_range-1));                                                                                                     \
     coeff_max = (1<<(coeff_bit_range-1)) - 1;                                                                                                  \
                                                                                                                                                \
     clip_coeff = CLIP3(coeff,coeff_min,coeff_max);                                                                                             \
                                                                                                                                                \
     tmp = clip_coeff * dequant_coeff ;                                                                                                         \
     tmp = tmp + add_iq;                                                                                                                        \
     tmp = SHR_NEG(tmp,(shift_iq - qp_div));                                                                                                    \
                                                                                                                                                \
     res = CLIP_S16(tmp);                                                                                                                       \
 }

 #endif /* IHEVC_TRANS_MACROS_H_ */
	/******************************************************************************
	*
	* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at:
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*
	******************************************************************************/
	/**
	*******************************************************************************
	* @file
	* ihevc_trans_macros.h
	*
	* @brief
	* Macros used in the forward transform and inverse transform functions
	*
	* @author
	* Ittiam
	*
	* @remarks
	* None
	*
	*******************************************************************************
	*/
	#ifndef IHEVC_TRANS_MACROS_H_
	#define IHEVC_TRANS_MACROS_H_

	#define QUANT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
	{ \
	LWORD64 tmp; \
	WORD32 sign; \
	WORD32 bit_depth,transform_shift; \
	WORD32 q_bits, quant_multiplier; \
	\
	/* q_bits and q_add calculation*/ \
	/* To be moved outside in neon. To be computer once per transform call */ \
	bit_depth = 8; \
	transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \
	quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \
	q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ; \
	\
	sign = (inp)<0 ? -1:1; \
	\
	tmp = (LWORD64)(abs(inp)); \
	tmp = tmp * (quant_coeff); \
	tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \
	tmp = tmp >> q_bits; \
	\
	tmp = tmp * sign; \
	out = (WORD16) CLIP_S16(tmp); \
	} \

	#define QUANT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
	{ \
	LWORD64 tmp; \
	WORD32 sign; \
	WORD32 transform_shift; \
	WORD32 q_bits, quant_multiplier; \
	\
	/* q_bits and q_add calculation*/ \
	/* To be moved outside in neon. To be computer once per transform call */ \
	\
	transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \
	quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \
	q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ; \
	\
	sign = (inp)<0 ? -1:1; \
	\
	tmp = (LWORD64)(abs(inp)); \
	tmp = tmp * (quant_coeff); \
	tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \
	tmp = tmp >> q_bits; \
	\
	tmp = tmp * sign; \
	out = (WORD16) CLIP_S16(tmp); \
	}
	/* added by 100028 */
	#define QUANT_NO_WEIGHTMAT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
	{ \
	WORD32 tmp; \
	WORD32 sign; \
	WORD32 bit_depth,transform_shift; \
	WORD32 q_bits, quant_multiplier; \
	\
	/* q_bits and q_add calculation*/ \
	/* To be moved outside in neon. To be computer once per transform call */ \
	bit_depth = 8; \
	transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \
	quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \
	q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \
	\
	sign = (inp)<0 ? -1:1; \
	\
	tmp = (WORD32)(abs(inp)); \
	tmp = tmp * (quant_coeff); \
	tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \
	tmp = tmp >> q_bits; \
	\
	tmp = tmp * sign; \
	out = (WORD16) CLIP_S16(tmp); \
	}

	#define QUANT_NO_WEIGHTMAT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
	{ \
	WORD32 tmp; \
	WORD32 sign; \
	WORD32 transform_shift; \
	WORD32 q_bits, quant_multiplier; \
	\
	/* q_bits and q_add calculation*/ \
	/* To be moved outside in neon. To be computer once per transform call */ \
	\
	transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \
	quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \
	q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \
	\
	sign = (inp)<0 ? -1:1; \
	\
	tmp = (WORD32)(abs(inp)); \
	tmp = tmp * (quant_coeff); \
	tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \
	tmp = tmp >> q_bits; \
	\
	tmp = tmp * sign; \
	out = (WORD16) CLIP_S16(tmp); \
	}
	/* Reference Inverse Quantization: "pi2_src"(Coefficients) will be clipped to 15 or 14 bits when (qp_div > shift_iq). Spec doesn't have any clip mentioned */

	/* Inverse quantization other than 4x4 */
	/* No clipping is needed for "pi2_src"(coefficients) */
	#define IQUANT(res, coeff /pi2_src[indexsrc_strd]/, dequant_coeff /pi2_dequant_coeff[indextrans_size] g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div) \
	{ \
	WORD32 tmp, add_iq; \
	\
	add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1)); /* To be moved outside in neon. To be computed once per transform call */ \
	\
	tmp = coeff * dequant_coeff ; \
	tmp = tmp + add_iq; \
	tmp = SHR_NEG(tmp,(shift_iq - qp_div)); \
	\
	res = CLIP_S16(tmp); \
	}

	/* 4x4 inverse quantization */
	/* Options : */
	/* 1. Clip "pi2_src"(coefficients) to 10 bits if "(qp_div >= shift_iq)" or 16 bits if "(qp_div < shift_iq)"*/
	/* 2. Increasing precision of "pi2_src"(coefficients) to 64 bits */

	#define IQUANT_4x4(res, coeff /pi2_src[indexsrc_strd]/, dequant_coeff /pi2_dequant_coeff[indextrans_size] g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div) \
	{ \
	WORD32 clip_coeff, tmp; \
	WORD32 coeff_min,coeff_max; \
	WORD32 coeff_bit_range; \
	WORD32 add_iq; \
	add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1)); /* To be moved outside in neon. To be computed once per transform call */ \
	\
	coeff_bit_range = 16; \
	if(qp_div > shift_iq) \
	coeff_bit_range = 10; \
	\
	coeff_min = -(1<<(coeff_bit_range-1)); \
	coeff_max = (1<<(coeff_bit_range-1)) - 1; \
	\
	clip_coeff = CLIP3(coeff,coeff_min,coeff_max); \
	\
	tmp = clip_coeff * dequant_coeff ; \
	tmp = tmp + add_iq; \
	tmp = SHR_NEG(tmp,(shift_iq - qp_div)); \
	\
	res = CLIP_S16(tmp); \
	}

	#endif /* IHEVC_TRANS_MACROS_H_ */