| /* |
| * Copyright (c) Meta Platforms, Inc. and affiliates. |
| * All rights reserved. |
| * |
| * This source code is licensed under the BSD-style license found in the |
| * LICENSE file in the root directory of this source tree. |
| */ |
| |
| #include <executorch/kernels/portable/NativeFunctions.h> // Declares the aten operator |
| #include <executorch/kernels/quantized/NativeFunctions.h> // Declares the quantized operator |
| #include <executorch/runtime/core/exec_aten/exec_aten.h> |
| #include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h> |
| #include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h> |
| #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h> |
| #include <executorch/runtime/platform/runtime.h> |
| #include <executorch/test/utils/DeathTest.h> |
| |
| #include <gtest/gtest.h> |
| #include <limits> |
| |
| using namespace ::testing; |
| using exec_aten::ArrayRef; |
| using exec_aten::optional; |
| using exec_aten::Scalar; |
| using exec_aten::ScalarType; |
| using exec_aten::Tensor; |
| using executorch::runtime::KernelRuntimeContext; |
| using torch::executor::native::dequantize_per_tensor_out; |
| using torch::executor::native::embedding_out; |
| using torch::executor::native::quantize_per_tensor_out; |
| using torch::executor::native::quantized_embedding_byte_out; |
| |
| using torch::executor::testing::TensorFactory; |
| |
| /// A generic smoke test that works for any dtype that supports ones() and |
| /// zeros(). |
| template <exec_aten::ScalarType DTYPE> |
| void test_dtype() { |
| TensorFactory<ScalarType::Float> tf; |
| TensorFactory<ScalarType::Long> tf_l; |
| |
| float scale = 0.5; |
| float zero_point = 1; |
| int64_t quant_min = 0; |
| int64_t quant_max = 255; |
| |
| // clang-format off |
| Tensor weight = tf.make({3, 2}, {3.5, 2.0, |
| 4, 1, |
| 5.5, 13.2}); |
| // clang-format on |
| // TODO make these different per dimension once per channel quant ops |
| // available |
| Tensor weight_scales = tf.full({3}, scale); |
| Tensor weight_zero_points = tf.full({3}, zero_point); |
| |
| Tensor indices = tf_l.make({2}, {0, 2}); |
| |
| Tensor out = tf.zeros({2, 2}); |
| |
| TensorFactory<DTYPE> tfo; |
| Tensor qweight = tfo.zeros({3, 2}); |
| |
| // 3.5 / 0.5 + 1 = 8 |
| // 2 / 0.5 + 1 = 5 |
| // 4 / 0.5 + 1 = 9 |
| // 1 / 0.5 + 1 = 3 |
| // 5.5 / 0.5 + 1 = 12 |
| // 13.2 / 0.5 + 1 = 27 |
| quantize_per_tensor_out( |
| weight, scale, (float)zero_point, quant_min, quant_max, DTYPE, qweight); |
| |
| quantized_embedding_byte_out( |
| qweight, |
| weight_scales, |
| weight_zero_points, |
| quant_min, |
| quant_max, |
| indices, |
| out); |
| |
| // (8 - 1) * 0.5 = 3.5 |
| // (5 - 1) * 0.5 = 2.0 |
| // (12 - 1) * 0.5 = 5.5 |
| // (27 - 1) * 0.5 = 13 |
| // clang-format off |
| Tensor expected = tf.make({2, 2}, {3.5, 2, |
| 5.5, 13}); |
| // clang-format on |
| |
| EXPECT_TENSOR_EQ(out, expected); |
| } |
| |
| TEST(OpQuantizedEmbeddingTest, AllDtypesSupported) { |
| test_dtype<ScalarType::Byte>(); |
| } |
| |
| // Q -> DQ -> FP Embedding should be == to Q -> QEmbedding Bytes |
| TEST(OpQuantizedEmbeddingTest, ConsitencyWithReferencePattern) { |
| TensorFactory<ScalarType::Float> tf; |
| TensorFactory<ScalarType::Int> tf_i; |
| TensorFactory<ScalarType::Long> tf_l; |
| |
| float scale = 0.5; |
| float zero_point = 1; |
| int64_t quant_min = 0; |
| int64_t quant_max = 255; |
| |
| // Do Q -> QEmbedding Bytes |
| Tensor weight = tf.make({3, 1}, {3.5, 5.5, 1.0}); |
| // TODO make these different per dimension once per channel quant ops |
| // available |
| Tensor weight_scales = tf.full({3}, scale); |
| Tensor weight_zero_points = tf.full({3}, zero_point); |
| |
| Tensor indices = tf_l.make({2}, {0, 2}); |
| |
| Tensor out = tf.zeros({2, 1}); |
| Tensor fp_out = tf.zeros({2, 1}); |
| |
| TensorFactory<ScalarType::Byte> tfo; |
| Tensor qweight = tfo.zeros({3, 1}); |
| KernelRuntimeContext context{}; |
| // 3.5 / 0.5 + 1 = 8 |
| // 5.5 / 0.5 + 1 = 12 |
| // 1 / 0.5 + 1 = 3 |
| quantize_per_tensor_out( |
| weight, |
| scale, |
| (int64_t)zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| qweight); |
| |
| quantized_embedding_byte_out( |
| qweight, |
| weight_scales, |
| weight_zero_points, |
| quant_min, |
| quant_max, |
| indices, |
| out); |
| |
| // Do Q DQ embedding |
| dequantize_per_tensor_out( |
| qweight, |
| scale, |
| (int64_t)zero_point, |
| quant_min, |
| quant_max, |
| ScalarType::Byte, |
| optional<ScalarType>(), |
| weight); |
| |
| embedding_out( |
| context, |
| weight, |
| indices, |
| /*padding_idx=*/0, |
| /*scale_grad_by_freq=*/false, |
| /*sparse=*/false, |
| fp_out); |
| |
| // can lossessly dq here so retrive the full information |
| // (8 - 1) * 0.5 = 3.5 |
| // (3 - 1) * 0.5 = 1 |
| Tensor expected = tf.make({2, 1}, {3.5, 1}); |
| EXPECT_TENSOR_EQ(out, fp_out); |
| EXPECT_TENSOR_EQ(out, expected); |
| } |
| |
| TEST(OpQuantizedEmbeddingTest, TestGroupWiseQuantizedEmbedding) { |
| et_pal_init(); |
| TensorFactory<ScalarType::Float> tf; |
| TensorFactory<ScalarType::Int> tf_i; |
| TensorFactory<ScalarType::Long> tf_l; |
| |
| int64_t quant_min = 0; |
| int64_t quant_max = 255; |
| |
| Tensor weight_scales = tf.make({3}, {0.5, 1.0, 1.5}); |
| Tensor weight_zero_points = tf.make({3}, {1, 5, 7}); |
| TensorFactory<ScalarType::Byte> tfo; |
| Tensor qweight = |
| tfo.make({3, 4}, {8, 10, 12, 14, 10, 12, 12, 14, 8, 9, 10, 12}); |
| |
| Tensor indices = tf_l.make({3}, {0, 2, 1}); |
| |
| Tensor out = tf.zeros({3, 4}); |
| Tensor expected = tf.make( |
| {3, 4}, {3.5, 4.5, 5.5, 6.5, 1.5, 3.0, 4.5, 7.5, 5.0, 7.0, 7.0, 9.0}); |
| |
| quantized_embedding_byte_out( |
| qweight, |
| weight_scales, |
| weight_zero_points, |
| quant_min, |
| quant_max, |
| indices, |
| out); |
| |
| EXPECT_TENSOR_EQ(out, expected); |
| |
| // Groupwise quantization. groupsize = 2 |
| weight_scales = tf.make({3, 2}, {0.5, 1.0, 1.5, 2.0, 2.5, 3.0}); |
| weight_zero_points = tf.make({3, 2}, {1, 5, 7, 9, 11, 13}); |
| /* |
| fp_weight = [3.5, 4.5, 7, 9, |
| 4.5, 7.5, 6, 10, |
| -7.5, -5.0, -9.0, -3.0] |
| */ |
| |
| out = tf.zeros({3, 4}); |
| expected = tf.make( |
| {3, 4}, {3.5, 4.5, 7, 9, -7.5, -5.0, -9.0, -3.0, 4.5, 7.5, 6, 10}); |
| |
| quantized_embedding_byte_out( |
| qweight, |
| weight_scales, |
| weight_zero_points, |
| quant_min, |
| quant_max, |
| indices, |
| out); |
| |
| EXPECT_TENSOR_EQ(out, expected); |
| } |
| |
| TEST(OpQuantizedEmbeddingTest, TestGroupWiseQuantizedEmbeddingDeath1) { |
| et_pal_init(); |
| TensorFactory<ScalarType::Float> tf; |
| TensorFactory<ScalarType::Int> tf_i; |
| TensorFactory<ScalarType::Long> tf_l; |
| |
| int64_t quant_min = 0; |
| int64_t quant_max = 255; |
| |
| Tensor weight_scales = tf.make({4}, {0.5, 1.0, 1.5, 3.3}); |
| Tensor weight_zero_points = tf.make({4}, {1, 5, 7, 5}); |
| TensorFactory<ScalarType::Byte> tfo; |
| Tensor qweight = |
| tfo.make({3, 4}, {8, 10, 12, 14, 10, 12, 12, 14, 8, 9, 10, 12}); |
| |
| Tensor indices = tf_l.make({3}, {0, 2, 1}); |
| |
| Tensor out = tf.zeros({3, 4}); |
| ET_EXPECT_DEATH( |
| quantized_embedding_byte_out( |
| qweight, |
| weight_scales, |
| weight_zero_points, |
| quant_min, |
| quant_max, |
| indices, |
| out), |
| ""); |
| } |
| |
| TEST(OpQuantizedEmbeddingTest, TestGroupWiseQuantizedEmbeddingDeath2) { |
| et_pal_init(); |
| TensorFactory<ScalarType::Float> tf; |
| TensorFactory<ScalarType::Int> tf_i; |
| TensorFactory<ScalarType::Long> tf_l; |
| |
| int64_t quant_min = 0; |
| int64_t quant_max = 255; |
| |
| Tensor weight_scales = tf.make({2}, {0.5, 1.0}); |
| Tensor weight_zero_points = tf.make({2}, {1, 5}); |
| TensorFactory<ScalarType::Byte> tfo; |
| Tensor qweight = |
| tfo.make({3, 4}, {8, 10, 12, 14, 10, 12, 12, 14, 8, 9, 10, 12}); |
| |
| Tensor indices = tf_l.make({3}, {0, 2, 1}); |
| |
| Tensor out = tf.zeros({3, 4}); |
| ET_EXPECT_DEATH( |
| quantized_embedding_byte_out( |
| qweight, |
| weight_scales, |
| weight_zero_points, |
| quant_min, |
| quant_max, |
| indices, |
| out), |
| ""); |
| } |
| |
| TEST(OpQuantizedEmbeddingTest, TestGroupWiseQuantizedEmbeddingDeath3) { |
| et_pal_init(); |
| TensorFactory<ScalarType::Float> tf; |
| TensorFactory<ScalarType::Int> tf_i; |
| TensorFactory<ScalarType::Long> tf_l; |
| |
| int64_t quant_min = 0; |
| int64_t quant_max = 255; |
| |
| Tensor weight_scales = tf.make({3, 2}, {0.5, 1.0, 1.5, 2.5, 3.5, 3.5}); |
| Tensor weight_zero_points = tf.make({3, 2}, {1, 5, 7, 9, 11, 13}); |
| TensorFactory<ScalarType::Byte> tfo; |
| Tensor qweight = tfo.make({3, 3}, {8, 10, 12, 14, 10, 12, 12, 14, 8}); |
| |
| Tensor indices = tf_l.make({3}, {0, 2, 1}); |
| |
| Tensor out = tf.zeros({3, 3}); |
| ET_EXPECT_DEATH( |
| quantized_embedding_byte_out( |
| qweight, |
| weight_scales, |
| weight_zero_points, |
| quant_min, |
| quant_max, |
| indices, |
| out), |
| ""); |
| } |
| |
| TEST(OpQuantizedEmbeddingTest, TestGroupWiseQuantizedEmbeddingDeath4) { |
| et_pal_init(); |
| TensorFactory<ScalarType::Float> tf; |
| TensorFactory<ScalarType::Int> tf_i; |
| TensorFactory<ScalarType::Long> tf_l; |
| |
| int64_t quant_min = 0; |
| int64_t quant_max = 255; |
| |
| Tensor weight_scales = tf.make({3, 2}, {0.5, 1.0, 1.5, 2.5, 3.5, 3.5}); |
| Tensor weight_zero_points = tf.make({3}, {1, 5, 7}); |
| TensorFactory<ScalarType::Byte> tfo; |
| Tensor qweight = tfo.make({3, 3}, {8, 10, 12, 14, 10, 12, 12, 14, 8}); |
| |
| Tensor indices = tf_l.make({3}, {0, 2, 1}); |
| |
| Tensor out = tf.zeros({3, 3}); |
| ET_EXPECT_DEATH( |
| quantized_embedding_byte_out( |
| qweight, |
| weight_scales, |
| weight_zero_points, |
| quant_min, |
| quant_max, |
| indices, |
| out), |
| ""); |
| } |
| |
| TEST(OpQuantizedEmbeddingTest, TestGroupWiseQuantizedEmbeddingDeath5) { |
| et_pal_init(); |
| TensorFactory<ScalarType::Float> tf; |
| TensorFactory<ScalarType::Int> tf_i; |
| TensorFactory<ScalarType::Long> tf_l; |
| |
| int64_t quant_min = 0; |
| int64_t quant_max = 255; |
| |
| Tensor weight_scales = tf.make({3, 2}, {0.5, 1.0, 1.5, 2.5, 3.5, 3.5}); |
| Tensor weight_zero_points = tf.make({3, 3}, {1, 5, 7, 1, 5, 7, 1, 5, 7}); |
| TensorFactory<ScalarType::Byte> tfo; |
| Tensor qweight = tfo.make({3, 3}, {8, 10, 12, 14, 10, 12, 12, 14, 8}); |
| |
| Tensor indices = tf_l.make({3}, {0, 2, 1}); |
| |
| Tensor out = tf.zeros({3, 3}); |
| ET_EXPECT_DEATH( |
| quantized_embedding_byte_out( |
| qweight, |
| weight_scales, |
| weight_zero_points, |
| quant_min, |
| quant_max, |
| indices, |
| out), |
| ""); |
| } |