examples/xtensa/kernels/kernels.cpp - platform/external/executorch - Git at Google

 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */

 #include "kernels.h"
 #include "NatureDSP_Signal.h"
 #include "NatureDSP_Signal_vector.h"
 #include "xa_nnlib_common.h"
 #include "xa_nnlib_common_macros.h"

 namespace impl {
 namespace HiFi {
 namespace kernels {

 // Quantize a fp32 value to an int8_t/uint8_t value
 template <typename T>
 __attribute__((always_inline)) T
 quantize(const float x, float scale, int32_t zero_point) {
   constexpr float min_val = std::numeric_limits<T>::min();
   constexpr float max_val = std::numeric_limits<T>::max();
   float tmp = roundf(x * scale + zero_point);
   return std::max(std::min(tmp, max_val), min_val);
 }

 // Quantize a fp32 array to an int8_t/uint8_t/int16_t array
 template <typename T>
 void quantize(
     T* __restrict__ y,
     const float* __restrict__ x,
     float scale,
     int32_t zero_point,
     size_t size) {
   xtfloatx2 scale_vec = (xtfloatx2)scale;
   xtfloatx2 zero_vec = XT_FLOAT_SX2(zero_point, 0);

   constexpr float min_val = std::numeric_limits<T>::min();
   constexpr float max_val = std::numeric_limits<T>::max();

   const xtfloatx2* __restrict__ p0 = (const xtfloatx2* __restrict__)x;
   ae_valign va0 = XT_LASX2PP(p0);

   size_t i = 0;
   // Vectorize by 2
   for (; i < (size & ~1); i += 2) {
     xtfloatx2 in_vec;
     XT_LASX2IP(in_vec, va0, p0);
     xtfloatx2 acc = zero_vec;
     XT_MADD_SX2(acc, scale_vec, in_vec);
     xtfloatx2 t0 = XT_FIROUND_SX2(acc);
     ae_int32x2 t1 =
         XT_UTRUNC_SX2(XT_MAX_SX2(XT_MIN_SX2(t0, max_val), min_val), 0);
     y[i] = AE_MOVAD32_H(t1);
     y[i + 1] = AE_MOVAD32_L(t1);
   }
   // Handle residual iteration
   if (i < size) {
     y[i] = quantize<T>(x[i], scale, zero_point);
   }
 }

 // Dequantize an int8_t/uint8_t value to a fp32 value
 template <typename T>
 __attribute__((always_inline)) float
 dequantize(const T x, float scale, int32_t zero_point) {
   return scale * (x - zero_point);
 }

 // Deuantize an int8_t/uint8_t/int16_t array to an fp32 array
 template <typename T>
 void dequantize(
     float* __restrict__ y,
     const T* __restrict__ x,
     float scale,
     int32_t zero_point,
     size_t size) {
   xtfloatx2 scale_vec = (xtfloatx2)scale;
   xtfloatx2 zero_vec = XT_FLOAT_SX2(zero_point, 0);

   xtfloatx2* __restrict__ p0 = (xtfloatx2*)y;
   ae_valign va0 = AE_ZALIGN64();
   size_t i = 0;
   // Vectorize by 2
   for (size_t e = (size >> 1) << 1; i < e; i += 2) {
     xtfloatx2 in_vec = {(float)x[i], (float)x[i + 1]};
     xtfloatx2 t0 = XT_SUB_SX2(in_vec, zero_vec);
     xtfloatx2 t1 = XT_MUL_SX2(t0, scale_vec);
     XT_SASX2IP(t1, va0, p0);
   }
   // Flush the output stream
   XT_SASX2POSFP(va0, p0);

   // Handle residual iteration
   if (i < size) {
     y[i] = dequantize<T>(x[i], scale, zero_point);
   }
 }

 // Requantize the int8_t/uint8_t in value to a uint8_t/int8_t out value.
 // The scale and zero_point for requantization are in the args.
 template <typename IT, typename OT>
 __attribute__((always_inline)) OT requantize(
     const IT in,
     float in_scale,
     int32_t in_zero_point,
     float inv_out_scale,
     int32_t out_zero_point) {
   float dequant = dequantize<IT>(in, in_scale, in_zero_point);
   return quantize<OT>(dequant, inv_out_scale, out_zero_point);
 }

 // Requantize the int8_t/uint8_t in array to a uint8_t/int8_t out array.
 // The scale and zero_point for requantization are in the args.
 template <typename IT, typename OT>
 void requantize(
     OT* __restrict__ out,
     const IT* __restrict__ in,
     float in_scale,
     int32_t in_zero_point,
     float inv_out_scale,
     int32_t out_zero_point,
     size_t size) {
   xtfloatx2 in_scale_vec = (xtfloatx2)in_scale;
   xtfloatx2 in_zero_vec = XT_FLOAT_SX2(in_zero_point, 0);
   xtfloatx2 inv_out_scale_vec = (xtfloatx2)inv_out_scale;
   xtfloatx2 out_zero_vec = XT_FLOAT_SX2(out_zero_point, 0);

   float min_val = std::numeric_limits<OT>::min();
   float max_val = std::numeric_limits<OT>::max();

   size_t i = 0;
   // Vectorize by 2
   for (; i < (size & ~1); i += 2) {
     xtfloatx2 in_vec = {(float)in[i], (float)in[i + 1]};
     xtfloatx2 t0 = XT_SUB_SX2(in_vec, in_zero_vec);
     xtfloatx2 t1 = XT_MUL_SX2(t0, in_scale_vec);

     xtfloatx2 acc = out_zero_vec;
     XT_MADD_SX2(acc, inv_out_scale_vec, t1);
     xtfloatx2 t2 = XT_FIROUND_SX2(acc);
     ae_int32x2 t3 =
         XT_UTRUNC_SX2(XT_MAX_SX2(XT_MIN_SX2(t2, max_val), min_val), 0);
     out[i] = AE_MOVAD32_H(t3);
     out[i + 1] = AE_MOVAD32_L(t3);
   }
   // Handle residual iteration
   if (i < size) {
     out[i] = requantize<IT, OT>(
         in[i], in_scale, in_zero_point, inv_out_scale, out_zero_point);
   }
 }

 // explicit template instantiation

 #define typed_quantize_val(dtype)                         \
   template __attribute__((always_inline)) dtype quantize( \
       const float x, float inv_scale, int32_t zero_point);
 typed_quantize_val(int8_t);
 typed_quantize_val(uint8_t);
 typed_quantize_val(int16_t);
 #undef typed_quantize_val

 #define typed_quantize_vec(dtype)  \
   template void quantize(          \
       dtype* __restrict__ y,       \
       const float* __restrict__ x, \
       float inv_scale,             \
       int32_t zero_point,          \
       size_t size);
 typed_quantize_vec(int8_t);
 typed_quantize_vec(uint8_t);
 typed_quantize_vec(int16_t);
 typed_quantize_vec(int32_t);
 #undef typed_quantize_vec

 #define typed_dequantize_val(dtype)                         \
   template __attribute__((always_inline)) float dequantize( \
       const dtype x, float scale, int32_t zero_point);
 typed_dequantize_val(int8_t);
 typed_dequantize_val(uint8_t);
 typed_dequantize_val(int16_t);
 #undef typed_dequantize_val

 #define typed_dequantize_vec(dtype) \
   template void dequantize(         \
       float* __restrict__ y,        \
       const dtype* __restrict__ x,  \
       float scale,                  \
       int32_t zero_point,           \
       size_t size);
 typed_dequantize_vec(int8_t);
 typed_dequantize_vec(uint8_t);
 typed_dequantize_vec(int16_t);
 typed_dequantize_vec(int32_t);
 #undef typed_dequantize_vec

 #define typed_requantize_val(itype, otype)                  \
   template __attribute__((always_inline)) otype requantize( \
       const itype in,                                       \
       float in_scale,                                       \
       int32_t in_zero_point,                                \
       float inv_out_scale,                                  \
       int32_t out_zero_point);
 typed_requantize_val(int8_t, int8_t);
 typed_requantize_val(uint8_t, uint8_t);
 typed_requantize_val(int8_t, uint8_t);
 typed_requantize_val(uint8_t, int8_t);
 #undef typed_requantize_val

 #define typed_requantize_vec(itype, otype) \
   template void requantize(                \
       otype* __restrict__ out,             \
       const itype* __restrict__ in,        \
       float in_scale,                      \
       int32_t in_zero_point,               \
       float inv_out_scale,                 \
       int32_t out_zero_point,              \
       size_t size);
 typed_requantize_vec(int8_t, int8_t);
 typed_requantize_vec(uint8_t, uint8_t);
 typed_requantize_vec(int8_t, uint8_t);
 typed_requantize_vec(uint8_t, int8_t);
 #undef typed_requantize_vec

 }; // namespace kernels
 }; // namespace HiFi
 }; // namespace impl
	/*
	* Copyright (c) Meta Platforms, Inc. and affiliates.
	* All rights reserved.
	*
	* This source code is licensed under the BSD-style license found in the
	* LICENSE file in the root directory of this source tree.
	*/

	#include "kernels.h"
	#include "NatureDSP_Signal.h"
	#include "NatureDSP_Signal_vector.h"
	#include "xa_nnlib_common.h"
	#include "xa_nnlib_common_macros.h"

	namespace impl {
	namespace HiFi {
	namespace kernels {

	// Quantize a fp32 value to an int8_t/uint8_t value
	template <typename T>
	__attribute__((always_inline)) T
	quantize(const float x, float scale, int32_t zero_point) {
	constexpr float min_val = std::numeric_limits<T>::min();
	constexpr float max_val = std::numeric_limits<T>::max();
	float tmp = roundf(x * scale + zero_point);
	return std::max(std::min(tmp, max_val), min_val);
	}

	// Quantize a fp32 array to an int8_t/uint8_t/int16_t array
	template <typename T>
	void quantize(
	T* __restrict__ y,
	const float* __restrict__ x,
	float scale,
	int32_t zero_point,
	size_t size) {
	xtfloatx2 scale_vec = (xtfloatx2)scale;
	xtfloatx2 zero_vec = XT_FLOAT_SX2(zero_point, 0);

	constexpr float min_val = std::numeric_limits<T>::min();
	constexpr float max_val = std::numeric_limits<T>::max();

	const xtfloatx2* __restrict__ p0 = (const xtfloatx2* __restrict__)x;
	ae_valign va0 = XT_LASX2PP(p0);

	size_t i = 0;
	// Vectorize by 2
	for (; i < (size & ~1); i += 2) {
	xtfloatx2 in_vec;
	XT_LASX2IP(in_vec, va0, p0);
	xtfloatx2 acc = zero_vec;
	XT_MADD_SX2(acc, scale_vec, in_vec);
	xtfloatx2 t0 = XT_FIROUND_SX2(acc);
	ae_int32x2 t1 =
	XT_UTRUNC_SX2(XT_MAX_SX2(XT_MIN_SX2(t0, max_val), min_val), 0);
	y[i] = AE_MOVAD32_H(t1);
	y[i + 1] = AE_MOVAD32_L(t1);
	}
	// Handle residual iteration
	if (i < size) {
	y[i] = quantize<T>(x[i], scale, zero_point);
	}
	}

	// Dequantize an int8_t/uint8_t value to a fp32 value
	template <typename T>
	__attribute__((always_inline)) float
	dequantize(const T x, float scale, int32_t zero_point) {
	return scale * (x - zero_point);
	}

	// Deuantize an int8_t/uint8_t/int16_t array to an fp32 array
	template <typename T>
	void dequantize(
	float* __restrict__ y,
	const T* __restrict__ x,
	float scale,
	int32_t zero_point,
	size_t size) {
	xtfloatx2 scale_vec = (xtfloatx2)scale;
	xtfloatx2 zero_vec = XT_FLOAT_SX2(zero_point, 0);

	xtfloatx2* __restrict__ p0 = (xtfloatx2*)y;
	ae_valign va0 = AE_ZALIGN64();
	size_t i = 0;
	// Vectorize by 2
	for (size_t e = (size >> 1) << 1; i < e; i += 2) {
	xtfloatx2 in_vec = {(float)x[i], (float)x[i + 1]};
	xtfloatx2 t0 = XT_SUB_SX2(in_vec, zero_vec);
	xtfloatx2 t1 = XT_MUL_SX2(t0, scale_vec);
	XT_SASX2IP(t1, va0, p0);
	}
	// Flush the output stream
	XT_SASX2POSFP(va0, p0);

	// Handle residual iteration
	if (i < size) {
	y[i] = dequantize<T>(x[i], scale, zero_point);
	}
	}

	// Requantize the int8_t/uint8_t in value to a uint8_t/int8_t out value.
	// The scale and zero_point for requantization are in the args.
	template <typename IT, typename OT>
	__attribute__((always_inline)) OT requantize(
	const IT in,
	float in_scale,
	int32_t in_zero_point,
	float inv_out_scale,
	int32_t out_zero_point) {
	float dequant = dequantize<IT>(in, in_scale, in_zero_point);
	return quantize<OT>(dequant, inv_out_scale, out_zero_point);
	}

	// Requantize the int8_t/uint8_t in array to a uint8_t/int8_t out array.
	// The scale and zero_point for requantization are in the args.
	template <typename IT, typename OT>
	void requantize(
	OT* __restrict__ out,
	const IT* __restrict__ in,
	float in_scale,
	int32_t in_zero_point,
	float inv_out_scale,
	int32_t out_zero_point,
	size_t size) {
	xtfloatx2 in_scale_vec = (xtfloatx2)in_scale;
	xtfloatx2 in_zero_vec = XT_FLOAT_SX2(in_zero_point, 0);
	xtfloatx2 inv_out_scale_vec = (xtfloatx2)inv_out_scale;
	xtfloatx2 out_zero_vec = XT_FLOAT_SX2(out_zero_point, 0);

	float min_val = std::numeric_limits<OT>::min();
	float max_val = std::numeric_limits<OT>::max();

	size_t i = 0;
	// Vectorize by 2
	for (; i < (size & ~1); i += 2) {
	xtfloatx2 in_vec = {(float)in[i], (float)in[i + 1]};
	xtfloatx2 t0 = XT_SUB_SX2(in_vec, in_zero_vec);
	xtfloatx2 t1 = XT_MUL_SX2(t0, in_scale_vec);

	xtfloatx2 acc = out_zero_vec;
	XT_MADD_SX2(acc, inv_out_scale_vec, t1);
	xtfloatx2 t2 = XT_FIROUND_SX2(acc);
	ae_int32x2 t3 =
	XT_UTRUNC_SX2(XT_MAX_SX2(XT_MIN_SX2(t2, max_val), min_val), 0);
	out[i] = AE_MOVAD32_H(t3);
	out[i + 1] = AE_MOVAD32_L(t3);
	}
	// Handle residual iteration
	if (i < size) {
	out[i] = requantize<IT, OT>(
	in[i], in_scale, in_zero_point, inv_out_scale, out_zero_point);
	}
	}

	// explicit template instantiation

	#define typed_quantize_val(dtype) \
	template __attribute__((always_inline)) dtype quantize( \
	const float x, float inv_scale, int32_t zero_point);
	typed_quantize_val(int8_t);
	typed_quantize_val(uint8_t);
	typed_quantize_val(int16_t);
	#undef typed_quantize_val

	#define typed_quantize_vec(dtype) \
	template void quantize( \
	dtype* __restrict__ y, \
	const float* __restrict__ x, \
	float inv_scale, \
	int32_t zero_point, \
	size_t size);
	typed_quantize_vec(int8_t);
	typed_quantize_vec(uint8_t);
	typed_quantize_vec(int16_t);
	typed_quantize_vec(int32_t);
	#undef typed_quantize_vec

	#define typed_dequantize_val(dtype) \
	template __attribute__((always_inline)) float dequantize( \
	const dtype x, float scale, int32_t zero_point);
	typed_dequantize_val(int8_t);
	typed_dequantize_val(uint8_t);
	typed_dequantize_val(int16_t);
	#undef typed_dequantize_val

	#define typed_dequantize_vec(dtype) \
	template void dequantize( \
	float* __restrict__ y, \
	const dtype* __restrict__ x, \
	float scale, \
	int32_t zero_point, \
	size_t size);
	typed_dequantize_vec(int8_t);
	typed_dequantize_vec(uint8_t);
	typed_dequantize_vec(int16_t);
	typed_dequantize_vec(int32_t);
	#undef typed_dequantize_vec

	#define typed_requantize_val(itype, otype) \
	template __attribute__((always_inline)) otype requantize( \
	const itype in, \
	float in_scale, \
	int32_t in_zero_point, \
	float inv_out_scale, \
	int32_t out_zero_point);
	typed_requantize_val(int8_t, int8_t);
	typed_requantize_val(uint8_t, uint8_t);
	typed_requantize_val(int8_t, uint8_t);
	typed_requantize_val(uint8_t, int8_t);
	#undef typed_requantize_val

	#define typed_requantize_vec(itype, otype) \
	template void requantize( \
	otype* __restrict__ out, \
	const itype* __restrict__ in, \
	float in_scale, \
	int32_t in_zero_point, \
	float inv_out_scale, \
	int32_t out_zero_point, \
	size_t size);
	typed_requantize_vec(int8_t, int8_t);
	typed_requantize_vec(uint8_t, uint8_t);
	typed_requantize_vec(int8_t, uint8_t);
	typed_requantize_vec(uint8_t, int8_t);
	#undef typed_requantize_vec

	}; // namespace kernels
	}; // namespace HiFi
	}; // namespace impl