| // Quantized calculation utilities. |
| // TODO(vddang): Replace this with tensorflow/lite/kernels/internal/tensor_utils(common).h |
| // after TFLite module has been synced. |
| |
| #ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_QUANTUTILS_H |
| #define ANDROID_FRAMEWORKS_ML_NN_COMMON_QUANTUTILS_H |
| |
| #include <public/gemmlowp.h> |
| |
| #include <limits> |
| #include <memory> |
| |
| #include "LegacyUtils.h" |
| #include "OperationsUtils.h" |
| |
| namespace android { |
| namespace nn { |
| |
| inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift) { |
| using gemmlowp::RoundingDivideByPOT; |
| using gemmlowp::SaturatingRoundingDoublingHighMul; |
| int left_shift = shift > 0 ? shift : 0; |
| int right_shift = shift > 0 ? 0 : -shift; |
| return RoundingDivideByPOT( |
| SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), |
| right_shift); |
| } |
| |
| template <typename T> |
| void MatrixBatchVectorMultiplyAccumulate(const int8_t* input, const int32_t* bias, |
| const int8_t* input_to_gate_weights, int32_t multiplier, |
| int32_t shift, int32_t n_batch, int32_t n_input, |
| int32_t n_output, int32_t output_zp, T* output) { |
| const int16_t output_max = std::numeric_limits<T>::max(); |
| const int16_t output_min = std::numeric_limits<T>::min(); |
| for (int batch = 0; batch < n_batch; ++batch) { |
| for (int row = 0; row < n_output; ++row) { |
| int32_t acc = bias[row]; |
| for (int col = 0; col < n_input; ++col) { |
| int8_t input_val = input[batch * n_input + col]; |
| int8_t weights_val = input_to_gate_weights[row * n_input + col]; |
| acc += input_val * weights_val; |
| } |
| acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift); |
| acc += output_zp; |
| acc += output[batch * n_output + row]; |
| if (acc > output_max) { |
| acc = output_max; |
| } |
| if (acc < output_min) { |
| acc = output_min; |
| } |
| output[batch * n_output + row] = static_cast<T>(acc); |
| } |
| } |
| } |
| |
| template <typename T> |
| int CountLeadingZeros(T integer_input) { |
| static_assert(std::is_unsigned<T>::value, "Only unsigned integer types handled."); |
| #if defined(__GNUC__) |
| return integer_input ? __builtin_clz(integer_input) : std::numeric_limits<T>::digits; |
| #else |
| if (integer_input == 0) { |
| return std::numeric_limits<T>::digits; |
| } |
| |
| const T one_in_leading_positive = static_cast<T>(1) << (std::numeric_limits<T>::digits - 1); |
| int leading_zeros = 0; |
| while (integer_input < one_in_leading_positive) { |
| integer_input <<= 1; |
| ++leading_zeros; |
| } |
| return leading_zeros; |
| #endif |
| } |
| |
| inline bool GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift, |
| int32_t* output_inv_sqrt, int* output_shift) { |
| NN_RET_CHECK_GE(input, 0); |
| if (input <= 1) { |
| // Handle the input value 1 separately to avoid overflow in that case |
| // in the general computation below. Also handle 0 as if it |
| // were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid |
| // but rare/unrealistic input value. We can expect both to occur in some |
| // incompletely trained models, but probably not in fully trained models. |
| *output_inv_sqrt = std::numeric_limits<std::int32_t>::max(); |
| *output_shift = 0; |
| return true; |
| } |
| |
| *output_shift = 11; |
| while (input >= (1 << 29)) { |
| input /= 4; |
| ++*output_shift; |
| } |
| const unsigned max_left_shift_bits = CountLeadingZeros(static_cast<uint32_t>(input)) - 1; |
| const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2; |
| const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1; |
| *output_shift -= left_shift_bit_pairs; |
| input <<= 2 * left_shift_bit_pairs; |
| NN_RET_CHECK_GE(input, (1 << 27)); |
| NN_RET_CHECK_LT(input, (1 << 29)); |
| using gemmlowp::FixedPoint; |
| using gemmlowp::Rescale; |
| using gemmlowp::SaturatingRoundingMultiplyByPOT; |
| // Using 3 integer bits gives us enough room for the internal arithmetic in |
| // this Newton-Raphson iteration. |
| using F3 = FixedPoint<int32_t, 3>; |
| using F0 = FixedPoint<int32_t, 0>; |
| const F3 fixedpoint_input = F3::FromRaw(input >> 1); |
| const F3 fixedpoint_half_input = SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input); |
| const F3 fixedpoint_half_three = |
| GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5); |
| // Newton-Raphson iteration |
| // Naive unoptimized starting guess: x = 1 |
| F3 x = F3::One(); |
| // Naive unoptimized number of iterations: 5 |
| for (int i = 0; i < 5; i++) { |
| const F3 x3 = Rescale<3>(x * x * x); |
| x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3); |
| } |
| const F0 fixedpoint_half_sqrt_2 = |
| GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.); |
| x = x * fixedpoint_half_sqrt_2; |
| *output_inv_sqrt = x.raw(); |
| if (*output_shift < 0) { |
| *output_inv_sqrt <<= -*output_shift; |
| *output_shift = 0; |
| } |
| // Convert right shift (right is positive) to left shift. |
| *output_shift *= reverse_shift; |
| return true; |
| } |
| |
| void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights, const int32_t* bias, |
| int32_t layer_norm_scale_a, int32_t layer_norm_scale_b, int32_t variance_limit, |
| int n_batch, int n_input, int16_t* output); |
| |
| void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar, int32_t n_row, |
| int32_t n_col, int32_t* output); |
| |
| bool PrecomputeZeroPointTimesWeightWithBias(int32_t zero_point, const int8_t* weight_tensor, |
| const Shape& weight_shape, const int32_t* bias_tensor, |
| std::unique_ptr<int32_t[]>* output); |
| |
| void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input, int16_t* output); |
| |
| template <int IntegerBits> |
| void ApplyTanh(const int16_t* input, int32_t n_batch, int32_t n_input, int16_t* output) { |
| using FX = gemmlowp::FixedPoint<std::int16_t, IntegerBits>; |
| using F0 = gemmlowp::FixedPoint<std::int16_t, 0>; |
| for (int batch = 0; batch < n_batch; ++batch) { |
| for (int i = 0; i < n_input; ++i) { |
| const int index = batch * n_input + i; |
| FX tanh_input = FX::FromRaw(input[index]); |
| F0 tanh_output = gemmlowp::tanh(tanh_input); |
| output[index] = tanh_output.raw(); |
| } |
| } |
| } |
| |
| inline void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch, int32_t n_input, |
| int16_t* output) { |
| assert(integer_bits <= 6); |
| #define DISPATCH_TANH(i) \ |
| case i: \ |
| ApplyTanh<i>(input, n_batch, n_input, output); \ |
| break; |
| switch (integer_bits) { |
| DISPATCH_TANH(0); |
| DISPATCH_TANH(1); |
| DISPATCH_TANH(2); |
| DISPATCH_TANH(3); |
| DISPATCH_TANH(4); |
| DISPATCH_TANH(5); |
| DISPATCH_TANH(6); |
| default: |
| return; |
| } |
| #undef DISPATCH_TANH |
| } |
| |
| void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch, int n_input, int shift, |
| int16_t* output); |
| void CwiseMul(const int16_t* input_1, const int16_t* input_2, int32_t multiplier, int32_t shift, |
| int32_t n_batch, int32_t n_input, int32_t output_zp, int8_t* output); |
| |
| bool CheckedLog2(const float x, int* log2_result); |
| |
| void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch, int n_input, |
| int16_t* output); |
| |
| inline void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) { |
| static const int16_t kOne = 32767; |
| for (int v = 0; v < v_size; v++) { |
| *result++ = kOne - *vector++; |
| } |
| } |
| |
| void CwiseClipping(int16_t* input, const int16_t clipping_value, int32_t n_batch, int32_t n_input); |
| |
| void CwiseClipping(int8_t* input, const int8_t clipping_value, int32_t n_batch, int32_t n_input); |
| |
| void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size, |
| const int16_t* batch_vector, int n_batch, |
| int32_t multiplier, int shift, int16_t* result); |
| |
| } // namespace nn |
| } // namespace android |
| |
| #endif // ANDROID_FRAMEWORKS_ML_NN_COMMON_QUANTUTILS_H |