kernels/quantized/cpu/op_choose_qparams.cpp - platform/external/executorch - Git at Google

 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */

 #include <executorch/kernels/portable/cpu/vec_ops.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <algorithm>
 #include <cinttypes>
 #include <cmath>
 #include <tuple>
 /**
  * For an input tensor, use the scale and zero_point arguments to quantize it.
  */
 namespace torch {
 namespace executor {
 namespace native {

 using Tensor = exec_aten::Tensor;
 using Scalar = exec_aten::Scalar;
 using ScalarType = exec_aten::ScalarType;

 namespace {

 constexpr float SMALL_SCALE_THRESHOLD = 6.1e-5f;

 /**
  * Asserts that the parameters are valid.
  */
 void check_quantize_per_tensor_args(
     const Tensor& input,
     int64_t qmin,
     int64_t qmax,
     ScalarType dtype,
     Tensor& scale_out,
     Tensor& zero_point_out) {
   (void)dtype;
   ET_CHECK_MSG(
       qmin < qmax,
       "qmin should be less than qmax, but received min: %" PRId64
       ", max %" PRId64,
       qmin,
       qmax);
   ET_CHECK_MSG(
       input.scalar_type() == ScalarType::Float,
       "Expected input to be Float tensor received: %" PRId8,
       static_cast<int8_t>(input.scalar_type()));
   ET_CHECK_MSG(
       scale_out.scalar_type() == ScalarType::Double,
       "Expected scale to be Double tensor received: %" PRId8,
       static_cast<int8_t>(scale_out.scalar_type()));
   ET_CHECK_MSG(
       zero_point_out.scalar_type() == ScalarType::Long,
       "Expected scale to be Long tensor received: %" PRId8,
       static_cast<int8_t>(zero_point_out.scalar_type()));
   ET_CHECK_MSG(
       scale_out.numel() == 1,
       "Exepcted scale to only have one element received: %zd",
       ssize_t(scale_out.numel()));
   ET_CHECK_MSG(
       zero_point_out.numel() == 1,
       "Exepcted zero_point to only have one element received: %zd",
       ssize_t(zero_point_out.numel()));
 }

 void choose_qparams(
     const Tensor& input,
     int32_t qmin,
     int32_t qmax,
     Tensor& scale_out,
     Tensor& zero_point_out) {
   const float* x_fp32 = input.data_ptr<float>();
   // Compute x_min, x_max and q_params (scale, zero_point)
   float min = torch::executor::vec_minf(x_fp32, input.numel());
   float max = torch::executor::vec_maxf(x_fp32, input.numel());

   // We extend the [min, max] interval to ensure that it contains 0.
   // Otherwise, we would not meet the requirement that 0 be an exactly
   // representable value.
   min = std::min(min, 0.f);
   max = std::max(max, 0.f);

   // Use double precision for intermediate computation but use single precision
   // in final number to reflect the actual number used during quantization.
   double scale = (static_cast<double>(max) - min) / (qmax - qmin);
   // If scale is 0 or too small so its reciprocal is infinity, we arbitrary
   // adjust the scale to 0.1 . We want to avoid scale's reciprocal being
   // infinity because some of fbgemm code pre-computes scale's reciprocal to do
   // multiplication instead of division in the time critical part of code.
   if (float(scale) == 0.0f || std::isinf(1.0f / float(scale))) {
     scale = 0.1;
   }
   ET_CHECK_MSG(scale > 0, "quantization scale should be > 0");

   // Cut off small scale
   if (scale < SMALL_SCALE_THRESHOLD) {
     float org_scale = scale;
     scale = SMALL_SCALE_THRESHOLD;
     // Adjust the min and max based on the new scale
     if (min == 0.0f) {
       max = SMALL_SCALE_THRESHOLD * (qmax - qmin);
     } else if (max == 0.0f) {
       min = -SMALL_SCALE_THRESHOLD * (qmax - qmin);
     } else {
       float amplifier = SMALL_SCALE_THRESHOLD / org_scale;
       min *= amplifier;
       max *= amplifier;
     }
   }

   // Zero-point computation.
   // First the initial floating-point computation. The zero-point can be
   // determined from solving an affine equation for any known pair
   // (real value, corresponding quantized value).
   // We know two such pairs: (rmin, qmin) and (rmax, qmax).
   // The arithmetic error on the zero point computed from either pair
   // will be roughly machine_epsilon * (sum of absolute values of terms)
   // so we want to use the variant that adds the smaller terms.
   double zero_point_from_min = qmin - min / static_cast<double>(scale);
   double zero_point_from_max = qmax - max / static_cast<double>(scale);
   double zero_point_from_min_error =
       std::abs(qmin) - std::abs(min / static_cast<double>(scale));
   double zero_point_from_max_error =
       std::abs(qmax) - std::abs(max / static_cast<double>(scale));
   double initial_zero_point =
       zero_point_from_min_error < zero_point_from_max_error
       ? zero_point_from_min
       : zero_point_from_max;

   // Now we need to nudge the zero point to be an integer
   // (our zero points are integer, and this is motivated by the requirement
   // to be able to represent the real value "0" exactly as a quantized value,
   // which is required in multiple places, for example in Im2col with zero
   // padding).
   int32_t nudged_zero_point = 0;
   if (initial_zero_point < qmin) {
     nudged_zero_point = qmin;
   } else if (initial_zero_point > qmax) {
     nudged_zero_point = qmax;
   } else {
     nudged_zero_point = nearbyint(static_cast<float>(initial_zero_point));
   }

   scale_out.data_ptr<double>()[0] = scale;
   zero_point_out.data_ptr<int64_t>()[0] = nudged_zero_point;
 }
 } // namespace

 std::tuple<Tensor, Tensor> choose_qparams_tensor_out(
     const Tensor& input,
     int64_t quant_min,
     int64_t quant_max,
     __ET_UNUSED double eps,
     ScalarType dtype,
     Tensor& scale_out,
     Tensor& zero_point_out) {
   check_quantize_per_tensor_args(
       input, quant_min, quant_max, dtype, scale_out, zero_point_out);

   choose_qparams(input, quant_min, quant_max, scale_out, zero_point_out);
   return {scale_out, zero_point_out};
 }

 ::std::tuple<Tensor, Tensor> choose_qparams_tensor_out(
     RuntimeContext& context,
     const Tensor& input,
     int64_t quant_min,
     int64_t quant_max,
     double eps,
     ScalarType dtype,
     Tensor& scale_out,
     Tensor& zero_point_out) {
   // TODO(larryliu): Add a context arg to the real op function and remove this
   // wrapper
   (void)context;
   return choose_qparams_tensor_out(
       input, quant_min, quant_max, eps, dtype, scale_out, zero_point_out);
 }

 } // namespace native
 } // namespace executor
 } // namespace torch
	/*
	* Copyright (c) Meta Platforms, Inc. and affiliates.
	* All rights reserved.
	*
	* This source code is licensed under the BSD-style license found in the
	* LICENSE file in the root directory of this source tree.
	*/

	#include <executorch/kernels/portable/cpu/vec_ops.h>
	#include <executorch/runtime/kernel/kernel_includes.h>
	#include <algorithm>
	#include <cinttypes>
	#include <cmath>
	#include <tuple>
	/**
	* For an input tensor, use the scale and zero_point arguments to quantize it.
	*/
	namespace torch {
	namespace executor {
	namespace native {

	using Tensor = exec_aten::Tensor;
	using Scalar = exec_aten::Scalar;
	using ScalarType = exec_aten::ScalarType;

	namespace {

	constexpr float SMALL_SCALE_THRESHOLD = 6.1e-5f;

	/**
	* Asserts that the parameters are valid.
	*/
	void check_quantize_per_tensor_args(
	const Tensor& input,
	int64_t qmin,
	int64_t qmax,
	ScalarType dtype,
	Tensor& scale_out,
	Tensor& zero_point_out) {
	(void)dtype;
	ET_CHECK_MSG(
	qmin < qmax,
	"qmin should be less than qmax, but received min: %" PRId64
	", max %" PRId64,
	qmin,
	qmax);
	ET_CHECK_MSG(
	input.scalar_type() == ScalarType::Float,
	"Expected input to be Float tensor received: %" PRId8,
	static_cast<int8_t>(input.scalar_type()));
	ET_CHECK_MSG(
	scale_out.scalar_type() == ScalarType::Double,
	"Expected scale to be Double tensor received: %" PRId8,
	static_cast<int8_t>(scale_out.scalar_type()));
	ET_CHECK_MSG(
	zero_point_out.scalar_type() == ScalarType::Long,
	"Expected scale to be Long tensor received: %" PRId8,
	static_cast<int8_t>(zero_point_out.scalar_type()));
	ET_CHECK_MSG(
	scale_out.numel() == 1,
	"Exepcted scale to only have one element received: %zd",
	ssize_t(scale_out.numel()));
	ET_CHECK_MSG(
	zero_point_out.numel() == 1,
	"Exepcted zero_point to only have one element received: %zd",
	ssize_t(zero_point_out.numel()));
	}

	void choose_qparams(
	const Tensor& input,
	int32_t qmin,
	int32_t qmax,
	Tensor& scale_out,
	Tensor& zero_point_out) {
	const float* x_fp32 = input.data_ptr<float>();
	// Compute x_min, x_max and q_params (scale, zero_point)
	float min = torch::executor::vec_minf(x_fp32, input.numel());
	float max = torch::executor::vec_maxf(x_fp32, input.numel());

	// We extend the [min, max] interval to ensure that it contains 0.
	// Otherwise, we would not meet the requirement that 0 be an exactly
	// representable value.
	min = std::min(min, 0.f);
	max = std::max(max, 0.f);

	// Use double precision for intermediate computation but use single precision
	// in final number to reflect the actual number used during quantization.
	double scale = (static_cast<double>(max) - min) / (qmax - qmin);
	// If scale is 0 or too small so its reciprocal is infinity, we arbitrary
	// adjust the scale to 0.1 . We want to avoid scale's reciprocal being
	// infinity because some of fbgemm code pre-computes scale's reciprocal to do
	// multiplication instead of division in the time critical part of code.
	if (float(scale) == 0.0f \|\| std::isinf(1.0f / float(scale))) {
	scale = 0.1;
	}
	ET_CHECK_MSG(scale > 0, "quantization scale should be > 0");

	// Cut off small scale
	if (scale < SMALL_SCALE_THRESHOLD) {
	float org_scale = scale;
	scale = SMALL_SCALE_THRESHOLD;
	// Adjust the min and max based on the new scale
	if (min == 0.0f) {
	max = SMALL_SCALE_THRESHOLD * (qmax - qmin);
	} else if (max == 0.0f) {
	min = -SMALL_SCALE_THRESHOLD * (qmax - qmin);
	} else {
	float amplifier = SMALL_SCALE_THRESHOLD / org_scale;
	min *= amplifier;
	max *= amplifier;
	}
	}

	// Zero-point computation.
	// First the initial floating-point computation. The zero-point can be
	// determined from solving an affine equation for any known pair
	// (real value, corresponding quantized value).
	// We know two such pairs: (rmin, qmin) and (rmax, qmax).
	// The arithmetic error on the zero point computed from either pair
	// will be roughly machine_epsilon * (sum of absolute values of terms)
	// so we want to use the variant that adds the smaller terms.
	double zero_point_from_min = qmin - min / static_cast<double>(scale);
	double zero_point_from_max = qmax - max / static_cast<double>(scale);
	double zero_point_from_min_error =
	std::abs(qmin) - std::abs(min / static_cast<double>(scale));
	double zero_point_from_max_error =
	std::abs(qmax) - std::abs(max / static_cast<double>(scale));
	double initial_zero_point =
	zero_point_from_min_error < zero_point_from_max_error
	? zero_point_from_min
	: zero_point_from_max;

	// Now we need to nudge the zero point to be an integer
	// (our zero points are integer, and this is motivated by the requirement
	// to be able to represent the real value "0" exactly as a quantized value,
	// which is required in multiple places, for example in Im2col with zero
	// padding).
	int32_t nudged_zero_point = 0;
	if (initial_zero_point < qmin) {
	nudged_zero_point = qmin;
	} else if (initial_zero_point > qmax) {
	nudged_zero_point = qmax;
	} else {
	nudged_zero_point = nearbyint(static_cast<float>(initial_zero_point));
	}

	scale_out.data_ptr<double>()[0] = scale;
	zero_point_out.data_ptr<int64_t>()[0] = nudged_zero_point;
	}
	} // namespace

	std::tuple<Tensor, Tensor> choose_qparams_tensor_out(
	const Tensor& input,
	int64_t quant_min,
	int64_t quant_max,
	__ET_UNUSED double eps,
	ScalarType dtype,
	Tensor& scale_out,
	Tensor& zero_point_out) {
	check_quantize_per_tensor_args(
	input, quant_min, quant_max, dtype, scale_out, zero_point_out);

	choose_qparams(input, quant_min, quant_max, scale_out, zero_point_out);
	return {scale_out, zero_point_out};
	}

	::std::tuple<Tensor, Tensor> choose_qparams_tensor_out(
	RuntimeContext& context,
	const Tensor& input,
	int64_t quant_min,
	int64_t quant_max,
	double eps,
	ScalarType dtype,
	Tensor& scale_out,
	Tensor& zero_point_out) {
	// TODO(larryliu): Add a context arg to the real op function and remove this
	// wrapper
	(void)context;
	return choose_qparams_tensor_out(
	input, quant_min, quant_max, eps, dtype, scale_out, zero_point_out);
	}

	} // namespace native
	} // namespace executor
	} // namespace torch