| /* |
| * Copyright (c) Meta Platforms, Inc. and affiliates. |
| * All rights reserved. |
| * |
| * This source code is licensed under the BSD-style license found in the |
| * LICENSE file in the root directory of this source tree. |
| */ |
| |
| #include <executorch/kernels/portable/NativeFunctions.h> // Declares the aten operator |
| #include <executorch/kernels/quantized/NativeFunctions.h> // Declares the quantized operator |
| #include <executorch/runtime/core/exec_aten/exec_aten.h> |
| #include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h> |
| #include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h> |
| #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h> |
| #include <executorch/runtime/platform/runtime.h> |
| #include <executorch/test/utils/DeathTest.h> |
| |
| #include <gtest/gtest.h> |
| #include <limits> |
| |
| using namespace ::testing; |
| using exec_aten::ArrayRef; |
| using exec_aten::optional; |
| using exec_aten::RuntimeContext; |
| using exec_aten::Scalar; |
| using exec_aten::ScalarType; |
| using exec_aten::Tensor; |
| using torch::executor::native::add_out; |
| using torch::executor::native::dequantize_per_tensor_out; |
| using torch::executor::native::quantize_per_tensor_out; |
| using torch::executor::native::quantized_add_out; |
| |
| using torch::executor::testing::TensorFactory; |
| |
| /// A generic smoke test that works for any dtype that supports ones() and |
| /// zeros(). |
| template <exec_aten::ScalarType DTYPE> |
| void test_dtype() { |
| TensorFactory<ScalarType::Float> tf; |
| |
| Tensor input1 = tf.full({3, 5}, 3.5); |
| Tensor input2 = tf.full({3, 5}, 3.5); |
| double scale = 0.5; |
| |
| int64_t zero_point = 1; |
| int64_t quant_min = 0; |
| int64_t quant_max = 255; |
| |
| TensorFactory<DTYPE> tfo; |
| Tensor qinput1 = tfo.zeros({3, 5}); |
| Tensor qinput2 = tfo.zeros({3, 5}); |
| Tensor qoutput = tfo.zeros({3, 5}); |
| // 3.5 / 0.5 + 1 = 8 |
| quantize_per_tensor_out( |
| input1, |
| scale, |
| zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| qinput1); |
| |
| quantize_per_tensor_out( |
| input2, |
| scale, |
| zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| qinput2); |
| |
| quantized_add_out( |
| qinput1, |
| scale, |
| zero_point, |
| quant_min, |
| quant_max, |
| qinput2, |
| scale, |
| zero_point, |
| quant_min, |
| quant_max, |
| scale, |
| zero_point, |
| quant_min, |
| quant_max, |
| qoutput); |
| |
| // can lossessly dq here so retrive the full 3.5 in operation |
| // (3.5 + 3.5) / 0.5 + 1 = 15 |
| Tensor expected = tfo.full({3, 5}, 15.0); |
| |
| EXPECT_TENSOR_EQ(qoutput, expected); |
| } |
| |
| TEST(OpQuantizeAddTest, AllDtypesSupported) { |
| test_dtype<ScalarType::Byte>(); |
| } |
| |
| TEST(OpQuantizeAddTest, DifferentQParams) { |
| TensorFactory<ScalarType::Float> tf; |
| |
| Tensor input1 = tf.full({3, 5}, 3.5); |
| Tensor input2 = tf.full({3, 5}, 3.5); |
| double a_scale = 0.5; |
| int64_t a_zero_point = 1; |
| |
| double b_scale = 0.25; |
| int64_t b_zero_point = 2; |
| |
| double out_scale = 0.1; |
| int64_t out_zero_point = 5; |
| |
| int64_t quant_min = 0; |
| int64_t quant_max = 255; |
| |
| TensorFactory<ScalarType::Byte> tfo; |
| Tensor qinput1 = tfo.zeros({3, 5}); |
| Tensor qinput2 = tfo.zeros({3, 5}); |
| Tensor qoutput = tfo.zeros({3, 5}); |
| // 3.5 / 0.5 + 1 = 8 |
| quantize_per_tensor_out( |
| input1, |
| a_scale, |
| a_zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| qinput1); |
| |
| // 3.5 / 0.25 + 2 = 16 |
| quantize_per_tensor_out( |
| input2, |
| b_scale, |
| b_zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| qinput2); |
| |
| quantized_add_out( |
| qinput1, |
| a_scale, |
| a_zero_point, |
| quant_min, |
| quant_max, |
| qinput2, |
| b_scale, |
| b_zero_point, |
| quant_min, |
| quant_max, |
| out_scale, |
| out_zero_point, |
| quant_min, |
| quant_max, |
| qoutput); |
| |
| // can lossessly dq here so retrive the full 3.5 in operation |
| // (3.5 + 3.5) / 0.1 + 5 = 75 |
| Tensor expected = tfo.full({3, 5}, 75.0); |
| |
| EXPECT_TENSOR_EQ(qoutput, expected); |
| } |
| |
| // Q -> DQ -> FP ADD -> Q -> DQ should be == to Q -> QADD -> DQ |
| TEST(OpQuantizeAddTest, ConsitencyWithReferencePattern) { |
| TensorFactory<ScalarType::Float> tf; |
| |
| Tensor input1 = tf.full({3, 5}, 3.5); |
| Tensor input2 = tf.full({3, 5}, 3.5); |
| Tensor dq_input1 = tf.zeros({3, 5}); |
| Tensor dq_input2 = tf.zeros({3, 5}); |
| Tensor reference_op_output = tf.zeros({3, 5}); |
| Tensor reference_pattern_output = tf.zeros({3, 5}); |
| Tensor fp_output = tf.zeros({3, 5}); |
| |
| double a_scale = 0.5; |
| int64_t a_zero_point = 1; |
| |
| double b_scale = 0.25; |
| int64_t b_zero_point = 2; |
| |
| double out_scale = 0.1; |
| int64_t out_zero_point = 5; |
| |
| int64_t quant_min = 0; |
| int64_t quant_max = 255; |
| |
| TensorFactory<ScalarType::Byte> tfo; |
| Tensor qinput1 = tfo.zeros({3, 5}); |
| Tensor qinput2 = tfo.zeros({3, 5}); |
| Tensor qoutput = tfo.zeros({3, 5}); |
| |
| optional<ScalarType> out_dtype = optional<ScalarType>(); |
| |
| RuntimeContext context{}; |
| // q -> qadd -> dq |
| // 3.5 / 0.5 + 1 = 8 |
| quantize_per_tensor_out( |
| input1, |
| a_scale, |
| a_zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| qinput1); |
| |
| // 3.5 / 0.25 + 2 = 16 |
| quantize_per_tensor_out( |
| input2, |
| b_scale, |
| b_zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| qinput2); |
| |
| quantized_add_out( |
| qinput1, |
| a_scale, |
| a_zero_point, |
| quant_min, |
| quant_max, |
| qinput2, |
| b_scale, |
| b_zero_point, |
| quant_min, |
| quant_max, |
| out_scale, |
| out_zero_point, |
| quant_min, |
| quant_max, |
| qoutput); |
| dequantize_per_tensor_out( |
| qoutput, |
| out_scale, |
| out_zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| out_dtype, |
| reference_op_output); |
| |
| // now get results for q -> dq -> fp add -> q -> dq |
| dequantize_per_tensor_out( |
| qinput1, |
| a_scale, |
| a_zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| out_dtype, |
| dq_input1); |
| |
| dequantize_per_tensor_out( |
| qinput2, |
| b_scale, |
| b_zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| out_dtype, |
| dq_input2); |
| |
| add_out(context, dq_input1, dq_input2, 1.0, fp_output); |
| // reuse 'qoutput' tensor as an intermediate |
| quantize_per_tensor_out( |
| fp_output, |
| out_scale, |
| out_zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| qoutput); |
| |
| dequantize_per_tensor_out( |
| qoutput, |
| out_scale, |
| out_zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| out_dtype, |
| reference_pattern_output); |
| |
| Tensor expected = tf.full({3, 5}, 7.0); |
| |
| // Pattern and op results should both be equal to expected and each other, |
| // check all cases explicitly instead of relying on transitivity |
| EXPECT_TENSOR_EQ(reference_op_output, expected); |
| EXPECT_TENSOR_EQ(reference_pattern_output, expected); |
| EXPECT_TENSOR_EQ(reference_op_output, reference_pattern_output); |
| } |
| |
| TEST(OpQuantizeAddTest, InvalidMinMaxDies) { |
| TensorFactory<ScalarType::Float> tf; |
| |
| Tensor input1 = tf.full({3, 5}, 3.5); |
| Tensor input2 = tf.full({3, 5}, 3.5); |
| double scale = 0.5; |
| int64_t zero_point = 1; |
| |
| int64_t quant_min = 0; |
| int64_t quant_max = 255; |
| int64_t out_quant_min = -1; |
| int64_t out_quant_max = 256; |
| |
| TensorFactory<ScalarType::Byte> tfo; |
| Tensor qinput1 = tfo.zeros({3, 5}); |
| Tensor qinput2 = tfo.zeros({3, 5}); |
| Tensor qoutput = tfo.zeros({3, 5}); |
| // 3.5 / 0.5 + 1 = 8 |
| quantize_per_tensor_out( |
| input1, |
| scale, |
| zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| qinput1); |
| |
| // 3.5 / 0.25 + 2 = 16 |
| quantize_per_tensor_out( |
| input2, |
| scale, |
| zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| qinput2); |
| |
| ET_EXPECT_DEATH( |
| quantized_add_out( |
| qinput1, |
| scale, |
| zero_point, |
| quant_min, |
| quant_max, |
| qinput2, |
| scale, |
| zero_point, |
| quant_min, |
| quant_max, |
| scale, |
| zero_point, |
| out_quant_min, |
| out_quant_max, |
| qoutput), |
| ""); |
| } |
| |
| TEST(OpQuantizeAddTest, TopOfRangeTest) { |
| TensorFactory<ScalarType::Float> tf; |
| |
| Tensor input1 = tf.full({3, 5}, 255); |
| Tensor input2 = tf.full({3, 5}, 255); |
| double a_scale = 1; |
| int64_t a_zero_point = 0; |
| |
| double b_scale = 1; |
| int64_t b_zero_point = 0; |
| |
| double out_scale = 1; |
| int64_t out_zero_point = 0; |
| |
| int64_t quant_min = 0; |
| int64_t quant_max = 255; |
| |
| TensorFactory<ScalarType::Byte> tfo; |
| Tensor qinput1 = tfo.zeros({3, 5}); |
| Tensor qinput2 = tfo.zeros({3, 5}); |
| Tensor qoutput = tfo.zeros({3, 5}); |
| |
| quantize_per_tensor_out( |
| input1, |
| a_scale, |
| a_zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| qinput1); |
| |
| // 3.5 / 0.25 + 2 = 16 |
| quantize_per_tensor_out( |
| input2, |
| b_scale, |
| b_zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| qinput2); |
| |
| quantized_add_out( |
| qinput1, |
| a_scale, |
| a_zero_point, |
| quant_min, |
| quant_max, |
| qinput2, |
| b_scale, |
| b_zero_point, |
| quant_min, |
| quant_max, |
| out_scale, |
| out_zero_point, |
| quant_min, |
| quant_max, |
| qoutput); |
| |
| Tensor expected = tfo.full({3, 5}, 255); |
| |
| EXPECT_TENSOR_EQ(qoutput, expected); |
| } |