| #include <gtest/gtest.h> |
| |
| #include <ATen/ATen.h> |
| #include <ATen/test/test_assert.h> |
| #include <cmath> |
| #include <iostream> |
| #include <limits> |
| #include <memory> |
| #include <sstream> |
| #include <type_traits> |
| // For quantize_val |
| #include <ATen/native/quantized/AffineQuantizer.h> |
| #include <c10/core/ScalarType.h> |
| #include <c10/util/irange.h> |
| #include <ATen/quantized/Quantizer.h> |
| |
| using namespace at; |
| #ifndef ATEN_CPU_STATIC_DISPATCH |
| |
| TEST(TestQTensor, QuantDequantAPIs) { |
| auto num_elements = 10; |
| Tensor r = at::ones({num_elements}); |
| const double scale = 1.0; |
| const int64_t zero_point = 2; |
| const Tensor qr = at::quantize_per_tensor(r, scale, zero_point, kQUInt8); |
| ASSERT_EQ(qr.q_scale(), scale); |
| ASSERT_EQ(qr.q_zero_point(), zero_point); |
| ASSERT_TRUE(qr.is_quantized()); |
| ASSERT_FALSE(r.is_quantized()); |
| |
| // int_repr |
| Tensor int_repr = qr.int_repr(); |
| auto* int_repr_data = int_repr.data_ptr<uint8_t>(); |
| for (const auto i : c10::irange(num_elements)) { |
| ASSERT_EQ(int_repr_data[i], 3); |
| } |
| |
| // Check for correct quantization |
| auto r_data = r.data_ptr<float>(); |
| auto qr_data = qr.data_ptr<quint8>(); |
| for (const auto i : c10::irange(num_elements)) { |
| ASSERT_EQ( |
| native::quantize_val<quint8>(scale, zero_point, r_data[i]).val_, |
| qr_data[i].val_); |
| } |
| |
| // Check for correct dequantization |
| Tensor rqr = qr.dequantize(); |
| auto rqr_data = rqr.data_ptr<float>(); |
| for (const auto i : c10::irange(num_elements)) { |
| ASSERT_EQ(r_data[i], rqr_data[i]); |
| } |
| for (const auto i : c10::irange(num_elements)) { |
| ASSERT_EQ( |
| r_data[i], |
| native::dequantize_val(qr.q_scale(), qr.q_zero_point(), qr_data[i])); |
| } |
| |
| // Check for correct requantization |
| double new_scale = 2.0; |
| int64_t new_zero_point = 1; |
| Tensor reqr = at::quantize_per_tensor(r, new_scale, new_zero_point, kQInt8); |
| auto reqr_data = reqr.data_ptr<qint8>(); |
| for (const auto i : c10::irange(num_elements)) { |
| reqr_data[i].val_ = |
| native::requantize_val<quint8, qint8>( |
| scale, zero_point, new_scale, new_zero_point, qr_data[i]) |
| .val_; |
| const qint8 expected = |
| native::quantize_val<qint8>(new_scale, new_zero_point, rqr_data[i]); |
| ASSERT_EQ(expected.val_, reqr_data[i].val_); |
| } |
| } |
| |
| TEST(TestQTensor, RoundingMode) { |
| // We assume that quantization is defined as: |
| // qx = clamp(zero_point + round(x / scale)) |
| // If the zero_point is added before rounding, the result will be wrong. |
| int32_t zero_point = 5; |
| std::vector<float> x_values{ |
| -5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5}; |
| std::vector<uint8_t> qx_expect{ |
| 0, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11}; // scale = 1.0 |
| |
| Tensor x = from_blob(x_values.data(), x_values.size()); |
| Tensor qx = at::quantize_per_tensor(x, /*scale=*/1.0, zero_point, kQUInt8); |
| |
| auto qx_data = qx.data_ptr<quint8>(); |
| for (const auto idx : c10::irange(x_values.size())) { |
| ASSERT_EQ(qx_expect[idx], qx_data[idx].val_) |
| << "Tie breaking during rounding element " << idx << " failed!"; |
| } |
| } |
| |
| TEST(TestQTensor, Item) { |
| Tensor r = at::ones({1}); |
| const float scale = 1; |
| const int32_t zero_point = 2; |
| Tensor qr = at::quantize_per_tensor(r, scale, zero_point, kQUInt8); |
| ASSERT_EQ(r.item().to<float>(), qr.item().to<float>()); |
| } |
| |
| TEST(TestQTensor, EmptyQuantized) { |
| float scale = 0.5; |
| int zero_point = 10; |
| int val = 100; |
| int numel = 10; |
| Tensor q = at::_empty_affine_quantized( |
| {numel}, at::device(at::kCPU).dtype(kQUInt8), scale, zero_point); |
| // Assigning to QTensor |
| auto* q_data = q.data_ptr<quint8>(); |
| for (const auto i : c10::irange(numel)) { |
| q_data[i].val_ = val; |
| } |
| |
| // dequantize |
| auto r = q.dequantize(); |
| auto* r_data = r.data_ptr<float>(); |
| for (const auto i : c10::irange(numel)) { |
| ASSERT_EQ(r_data[i], (val - zero_point) * scale); |
| } |
| } |
| |
| TEST(TestQTensor, EmptyPerchannelQuantized) { |
| int numel = 10; |
| auto scales = rand({numel}).toType(kDouble); |
| auto zero_points = randint(10, {10}).toType(kLong); |
| int val = 100; |
| int ch_axis = 0; |
| Tensor q = at::_empty_per_channel_affine_quantized( |
| {numel}, |
| scales, |
| zero_points, |
| ch_axis, |
| at::device(at::kCPU).dtype(kQUInt8)); |
| // Assigning to QTensor |
| auto* q_data = q.data_ptr<quint8>(); |
| for (const auto i : c10::irange(numel)) { |
| q_data[i].val_ = val; |
| } |
| |
| // dequantize |
| auto r = q.dequantize(); |
| auto* r_data = r.data_ptr<float>(); |
| for (const auto i : c10::irange(numel)) { |
| ASSERT_EQ( |
| r_data[i], |
| (val - zero_points[i].item().to<int>()) * scales[i].item().to<float>()); |
| } |
| } |
| |
| TEST(TestQTensor, QuantizePerChannel4d) { |
| int C = 64, H = 10, W = 10; |
| auto scales = rand({C}).toType(kDouble); |
| auto zero_points = randint(10, {C}).toType(kLong); |
| int ch_axis = 1; |
| // create 4d tensor where each H x W image is a range(0, H*W) |
| Tensor tensor = at::empty({1, C, H, W}, at::device(at::kCPU).dtype(kFloat)); |
| auto* tensor_data = tensor.data_ptr<float>(); |
| for (int c = 0, i = 0; c < C; ++c) { |
| for (int e = 0; e < H * W; ++e, ++i) { |
| tensor_data[i] = e; |
| } |
| } |
| // quantize and check values |
| Tensor q = at::native::quantize_per_channel( |
| tensor, scales, zero_points, ch_axis, kQUInt8); |
| auto* q_data = (uint8_t*)q.data_ptr<quint8>(); |
| for (int c = 0, i = 0; c < C; ++c) { |
| float inv_scale = 1.0f / static_cast<float>(scales[c].item<double>()); |
| int64_t zero_point = zero_points[c].item<int64_t>(); |
| for (int e = 0; e < H * W; ++e, ++i) { |
| // downsize qval to 255 if val is greater than max uint8_t value |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers,bugprone-narrowing-conversions) |
| int qval = std::min<int>(zero_point + std::nearbyint(e * inv_scale), 255); |
| ASSERT_EQ((int)q_data[i], qval); |
| } |
| } |
| } |
| |
| TEST(TestQTensor, QuantizePerChannel4dChannelsLast) { |
| int C = 64, H = 10, W = 10; |
| auto scales = rand({C}).toType(kDouble); |
| auto zero_points = randint(10, {C}).toType(kLong); |
| int ch_axis = 1; |
| // create 4d tensor where each H x W image is a range(0, H*W) |
| Tensor tensor = at::empty( |
| {1, C, H, W}, |
| at::device(at::kCPU).dtype(kFloat).memory_format( |
| at::MemoryFormat::ChannelsLast)); |
| auto* tensor_data = tensor.data_ptr<float>(); |
| for (int e = 0, i = 0; e < H * W; ++e) { |
| for (int c = 0; c < C; ++c, ++i) { |
| tensor_data[i] = e; |
| } |
| } |
| |
| // quantize and check values |
| Tensor q = at::native::quantize_per_channel( |
| tensor, scales, zero_points, ch_axis, kQUInt8); |
| auto* q_data = (uint8_t*)q.data_ptr<quint8>(); |
| for (int e = 0, i = 0; e < H * W; ++e) { |
| for (int c = 0; c < C; ++c, ++i) { |
| float inv_scale = 1.0f / static_cast<float>(scales[c].item<double>()); |
| int64_t zero_point = zero_points[c].item<int64_t>(); |
| // downsize qval to 255 if val is greater than max uint8_t value |
| // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers,bugprone-narrowing-conversions) |
| int qval = std::min<int>(zero_point + std::nearbyint(e * inv_scale), 255); |
| ASSERT_EQ((int)q_data[i], qval); |
| } |
| } |
| } |
| |
| TEST(TestQTensor, FromBlobQuantizedPerTensor) { |
| const float scale = 0.1; |
| const int64_t zero_point = 10; |
| std::vector<int64_t> shape = {5, 10}; |
| auto numel = c10::multiply_integers(shape); |
| |
| TensorOptions options(at::kQUInt8); |
| |
| auto custom_vec = std::make_unique<std::vector<uint8_t>>(); |
| custom_vec->reserve(numel); |
| |
| uint8_t* custom_data = custom_vec->data(); |
| for (const auto i : c10::irange(numel)) { |
| custom_data[i] = i; |
| } |
| bool customDataDeleted{false}; |
| auto deleteWhenDone = custom_vec.release(); |
| auto deleter = [deleteWhenDone, custom_data, &customDataDeleted](void* inp) { |
| ASSERT_EQ((void*)inp, (void*)custom_data); |
| delete deleteWhenDone; |
| customDataDeleted = true; |
| }; |
| { |
| Tensor qtensor = at::from_blob_quantized_per_tensor_affine(custom_data, shape, deleter, scale, zero_point, options); |
| |
| uint8_t* q_data = (uint8_t*)qtensor.data_ptr<quint8>(); |
| for (const auto i : c10::irange(numel)) { |
| ASSERT_EQ((int)custom_data[i], (int)q_data[i]); |
| } |
| for (int h = 0, i = 0; h < shape[0]; ++h) { |
| for (int w = 0; w < shape[1]; ++w, ++i) { |
| ASSERT_EQ( |
| qtensor[h][w].item<float>(), |
| (custom_data[i] - zero_point) * scale); |
| } |
| } |
| ASSERT_EQ((float)qtensor.q_scale(), (float)scale); |
| ASSERT_EQ(qtensor.q_zero_point(), zero_point); |
| } |
| TORCH_CHECK(customDataDeleted); |
| } |
| |
| TEST(TestQTensor, FromBlobQuantizedPerChannel) { |
| int C = 64, H = 10, W = 5; |
| std::vector<int64_t> shape = {1, C, H, W}; |
| auto scales = rand({C}).toType(kDouble); |
| auto zero_points = randint(10, {C}).toType(kLong); |
| auto numel = c10::multiply_integers(shape); |
| int ch_axis = 1; |
| TensorOptions options(at::kQUInt8); |
| |
| auto custom_vec = std::make_unique<std::vector<uint8_t>>(); |
| custom_vec->reserve(numel); |
| |
| uint8_t* custom_data = custom_vec->data(); |
| for (const auto i : c10::irange(numel)) { |
| custom_data[i] = i; |
| } |
| bool customDataDeleted{false}; |
| auto deleteWhenDone = custom_vec.release(); |
| auto deleter = [deleteWhenDone, custom_data, &customDataDeleted](void* inp) { |
| ASSERT_EQ((void*)inp, (void*)custom_data); |
| delete deleteWhenDone; |
| customDataDeleted = true; |
| }; |
| { |
| Tensor qtensor = at::from_blob_quantized_per_channel_affine(custom_data, shape, deleter, scales, zero_points, ch_axis, options); |
| uint8_t* q_data = (uint8_t*)qtensor.data_ptr<quint8>(); |
| for (const auto i : c10::irange(numel)) { |
| ASSERT_EQ((int)custom_data[i], (int)q_data[i]); |
| } |
| ASSERT_TRUE(at::allclose(qtensor.q_per_channel_scales(), scales)); |
| ASSERT_TRUE(at::allclose(qtensor.q_per_channel_zero_points(), zero_points)); |
| ASSERT_TRUE(qtensor.is_quantized()); |
| } |
| TORCH_CHECK(customDataDeleted); |
| } |
| |
| #if defined(__ARM_NEON__) || defined(__aarch64__) |
| TEST(TestQTensor, TestArmVectorizedQuantizeDequantize) { |
| const float scale = 7; |
| const int numel = 132; |
| |
| std::vector<float> x_values; |
| for (const auto i : c10::irange(numel)) { |
| x_values.push_back(9 * i); |
| } |
| |
| const Tensor x = from_blob(x_values.data(), x_values.size()); |
| |
| auto test_for_datatype = [&]( |
| const ScalarType scalar_type, |
| const auto get_data_ptr, |
| const auto quantize_val_with_datatype, |
| const int zero_point_min, |
| const int zero_point_max) { |
| for (int zero_point : {zero_point_min, 10, zero_point_max}) { |
| const Tensor q = at::quantize_per_tensor(x, scale, zero_point, scalar_type); |
| auto* q_data = get_data_ptr(q); |
| for (const auto i : c10::irange(numel)) { |
| ASSERT_EQ( |
| q_data[i].val_, |
| quantize_val_with_datatype(scale, zero_point, x_values[i]).val_); |
| } |
| const Tensor r = q.dequantize(); |
| const float* r_data = r.data_ptr<float>(); |
| for (const auto i : c10::irange(numel)) { |
| ASSERT_FLOAT_EQ( |
| r_data[i], |
| native::dequantize_val(scale, zero_point, q_data[i])); |
| } |
| } |
| }; |
| |
| // Unsigned Int 8 |
| test_for_datatype( |
| kQUInt8, |
| [](Tensor q) { return q.data_ptr<quint8>(); }, |
| native::quantize_val<quint8>, |
| std::numeric_limits<uint8_t>::min(), |
| std::numeric_limits<uint8_t>::max()); |
| |
| // Signed Int 8 |
| test_for_datatype( |
| kQInt8, |
| [](Tensor q) { return q.data_ptr<qint8>(); }, |
| native::quantize_val<qint8>, |
| std::numeric_limits<int8_t>::min(), |
| std::numeric_limits<int8_t>::max()); |
| |
| // Signed Int 32 (not optimized with vectorization) |
| test_for_datatype( |
| kQInt32, |
| [](Tensor q) { return q.data_ptr<qint32>(); }, |
| native::quantize_val<qint32>, |
| std::numeric_limits<int32_t>::min(), |
| std::numeric_limits<int32_t>::max()); |
| } |
| #endif // (__ARM_NEON__) || defined(__aarch64__) |
| |
| #endif // ATEN_CPU_STATIC_DISPATCH |