| /* |
| * Copyright (c) Meta Platforms, Inc. and affiliates. |
| * All rights reserved. |
| * |
| * This source code is licensed under the BSD-style license found in the |
| * LICENSE file in the root directory of this source tree. |
| */ |
| |
| #pragma once |
| #include <unistd.h> |
| #include <cmath> |
| #include <limits> |
| #include <vector> |
| |
| #include <executorch/runtime/core/error.h> |
| #include <executorch/runtime/core/exec_aten/exec_aten.h> |
| |
| #ifdef __aarch64__ |
| #include <arm_neon.h> |
| #endif |
| |
| namespace executorch { |
| namespace backends { |
| namespace xnnpack { |
| namespace utils { |
| |
| struct QuantizationParams { |
| double scale; |
| int32_t zero_point; |
| }; |
| |
| executorch::runtime::Error ChooseQuantizationParams( |
| float min, |
| float max, |
| int32_t qmin, |
| int32_t qmax, |
| QuantizationParams& result, |
| bool preserve_sparsity, |
| bool force_scale_power_of_two, |
| bool reduce_range); |
| |
| #if defined(__ANDROID__) && !defined(__NDK_MAJOR__) |
| template <class T> |
| inline float Round(const float x) { |
| return ::nearbyintf(x); |
| } |
| inline double Round(const double x) { |
| return ::nearbyint(x); |
| } |
| #else |
| template <class T> |
| inline T Round(const T x) { |
| return std::nearbyint(x); |
| } |
| #endif |
| |
| template <typename T> |
| T quantize_val(double scale, int64_t zero_point, float value) { |
| // std::nearbyint results in nearest integer value according to the current |
| // rounding mode and the default rounding mode is rounds to even in half-way |
| // cases in most popular processor architectures like x86 and ARM. This is |
| // typically faster than an alternatives like std::round that rounds half-way |
| // cases away from zero, and can be consistent with SIMD implementations for |
| // example in x86 using _mm512_cvtps_epi32 or mm512_round_ps with |
| // _MM_FROUND_CUR_DIRECTION option that also follow the current rounding mode. |
| int64_t qvalue; |
| constexpr int64_t qmin = std::numeric_limits<T>::min(); |
| constexpr int64_t qmax = std::numeric_limits<T>::max(); |
| float inv_scale = 1.0f / static_cast<float>(scale); |
| qvalue = static_cast<int64_t>(zero_point + Round(value * inv_scale)); |
| qvalue = std::max<int64_t>(qvalue, qmin); |
| qvalue = std::min<int64_t>(qvalue, qmax); |
| return static_cast<T>(qvalue); |
| } |
| |
| #ifdef __aarch64__ |
| template <typename Tx8> |
| Tx8 vqmov(int16x8_t vraw); |
| |
| template <typename T, typename Tx8> |
| void vst1(T* out, Tx8 vout); |
| |
| template <typename underlying_t, typename underlying_x8_t> |
| void quantize_tensor_arm64_q8( |
| const float* __restrict__ in, |
| underlying_t* __restrict__ out, |
| const int64_t N, |
| const float scale, |
| const int32_t zero_point) { |
| const float inv_scale = 1.0f / scale; |
| uint32_t i = 0; |
| underlying_t* out_underlying = reinterpret_cast<underlying_t*>(out); |
| const float32x4_t vinv_scale = vdupq_n_f32(inv_scale); |
| |
| const int16x8_t vzero_point = vdupq_n_s16((int16_t)(uint16_t)zero_point); |
| for (i = 0; i + 8 <= N; i += 8) { |
| const float32x4_t vin0123 = vld1q_f32(in); |
| in += 4; |
| const float32x4_t vin4567 = vld1q_f32(in); |
| in += 4; |
| const int32x4_t v0123_rounded = |
| vcvtnq_s32_f32(vmulq_f32(vin0123, vinv_scale)); |
| const int32x4_t v4567_rounded = |
| vcvtnq_s32_f32(vmulq_f32(vin4567, vinv_scale)); |
| const int16x8_t v01234567_packed = vqaddq_s16( |
| vqmovn_high_s32(vqmovn_s32(v0123_rounded), v4567_rounded), vzero_point); |
| const underlying_x8_t vout01234567 = |
| vqmov<underlying_x8_t>(v01234567_packed); |
| vst1<underlying_t, underlying_x8_t>(out_underlying, vout01234567); |
| out_underlying += 8; |
| } |
| for (; i < N; ++i) { |
| (*out_underlying++) = |
| quantize_val<underlying_t>(scale, zero_point, (*in++)); |
| } |
| } |
| |
| template <typename T> |
| void quantize_tensor_arm64_q8_wrapper( |
| const float* __restrict__ in, |
| T* __restrict__ out, |
| const int64_t N, |
| const float scale, |
| const int32_t zero_point); |
| |
| #endif /* __aarch64__ */ |
| |
| template <typename T = uint8_t> |
| executorch::runtime::Error QuantizePerTensor( |
| const executorch::aten::Tensor& rtensor, |
| executorch::aten::Tensor& qtensor, |
| double scale, |
| int zero_point) { |
| const float* rdata = rtensor.const_data_ptr<float>(); |
| int numel = rtensor.numel(); |
| ET_CHECK_OR_RETURN_ERROR( |
| (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value), |
| Internal, |
| "Expecting quantized output tensor of dtype uint8_t or int8_t"); |
| ET_CHECK_OR_RETURN_ERROR( |
| rtensor.numel() <= qtensor.numel(), |
| Internal, |
| "Expecting quantized output tensor of same or smaller size as input, %zd vs. %zd", |
| qtensor.numel(), |
| rtensor.numel()); |
| T* qdata = qtensor.mutable_data_ptr<T>(); |
| |
| #if defined(__aarch64__) |
| quantize_tensor_arm64_q8_wrapper<T>(rdata, qdata, numel, scale, zero_point); |
| #else |
| for (int i = 0; i < numel; ++i) { |
| qdata[i] = quantize_val<T>(scale, zero_point, rdata[i]); |
| } |
| #endif /* __aarch64__ */ |
| return executorch::runtime::Error::Ok; |
| } |
| |
| executorch::runtime::Error GenerateRequantizationScale( |
| const executorch::aten::Tensor& weight_scales, |
| float input_scale, |
| float output_scale, |
| std::vector<float>& requant_scales); |
| |
| std::pair<float, float> GetMinMax(const executorch::aten::Tensor& ft); |
| |
| } // namespace utils |
| } // namespace xnnpack |
| } // namespace backends |
| } // namespace executorch |