blob: 2f782b513241ec7f5368fabbf1ed13dd03634797 [file] [log] [blame]
from __future__ import absolute_import, division, print_function, unicode_literals
import torch
import torch.cuda
import torch.jit
import numpy as np
import unittest
from common_utils import run_tests
import torch.nn._intrinsic as _intrinsic
# Reference method for quantizing a tensor.
def _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits):
quant_min, quant_max = 0, 2 ** num_bits - 1
res = (np.clip(np.round(X / scale) + zero_point, quant_min, quant_max) - zero_point) * scale
res = res.reshape(X.shape)
return res
# Reference method for the gradient of the quantizer.
def _fake_quantize_per_tensor_affine_grad_reference(X, dY, scale, zero_point, num_bits):
Xq = np.round(X / scale) + zero_point
quant_min, quant_max = 0, 2 ** num_bits - 1
mask = np.logical_and(Xq >= quant_min, Xq <= quant_max)
res = dY[mask].reshape(dY.shape)
return res
NP_RANDOM_SEED = 19
class TestFakeQuantizePerTensorAffine(unittest.TestCase):
"""Tests the forward path of the FakeQuantizePerTensorAffine op."""
def test_forward(self):
np.random.seed(NP_RANDOM_SEED)
scale = 3
zero_point = 2
num_bits = 8
X = np.random.rand(20, 20) * 125
X_torch = torch.from_numpy(X).float()
Y = _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits)
Y_prime = _intrinsic.fq_per_tensor_affine_forward(
X=X_torch, scale=scale, zero_point=zero_point, num_bits=num_bits,
quant_delay=0, iter=0)
tolerance = 1e-6
np.testing.assert_allclose(Y, Y_prime, rtol=tolerance, atol=tolerance)
"""Tests the backward method. Note that this runs the reference quantization
and thus the errors might be originating there."""
def test_backward(self):
np.random.seed(NP_RANDOM_SEED)
fake_quantize_per_tensor_affine_backward = torch.ops.quantized.fake_quantize_per_tensor_affine_backward
scale = 3
zero_point = 2
num_bits = 8
X = np.random.rand(20, 20) * 125
Y = _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits)
dY = Y - X # Fake gradient
dX = _fake_quantize_per_tensor_affine_grad_reference(X, dY, scale, zero_point, num_bits)
X_torch = torch.from_numpy(X).float()
dY_torch = torch.from_numpy(dY).float()
dX_prime = _intrinsic.fq_per_tensor_affine_backward(
X=X_torch, dY=dY_torch, scale=scale, zero_point=zero_point,
num_bits=num_bits, quant_delay=0, iter=0)
tolerance = 1e-6
np.testing.assert_allclose(dX, dX_prime, rtol=tolerance, atol=tolerance)
def test_numerical_consistency(self):
'''
Comparing numerical consistency between CPU quantize/dequantize op and the CPU fake quantize op
'''
np.random.seed(NP_RANDOM_SEED)
fake_quantize_per_tensor_affine_forward = torch.ops.quantized.fake_quantize_per_tensor_affine_forward
scale = 3
zero_point = 2
num_bits = 8
X = np.random.rand(20, 20) * 125
X_torch = torch.from_numpy(X).float()
Y = torch.dequantize(torch.quantize_linear(X_torch, scale, zero_point, torch.qint8))
Y_prime = _intrinsic.fq_per_tensor_affine_forward(
X=X_torch, scale=scale, zero_point=zero_point, num_bits=num_bits,
quant_delay=0, iter=0)
tolerance = 1e-6
np.testing.assert_allclose(Y, Y_prime, rtol=tolerance, atol=tolerance)
"""Tests the forward path of the FakeQuantizePerTensorAffine CUDA op."""
@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
def test_forward_cuda(self):
np.random.seed(NP_RANDOM_SEED)
scale = 3
zero_point = 2
num_bits = 8
X = np.random.rand(20, 20) * 125
X_torch = torch.from_numpy(X).float().cuda()
Y = _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits)
Y_prime = _intrinsic.fq_per_tensor_affine_forward(
X=X_torch, scale=scale, zero_point=zero_point, num_bits=num_bits,
quant_delay=0, iter=0)
tolerance = 1e-6
np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
"""Tests the backward method. Note that this runs the reference quantization
and thus the errors might be originating there."""
@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
def test_backward_cuda(self):
np.random.seed(NP_RANDOM_SEED)
scale = 3
zero_point = 2
num_bits = 8
X = np.random.rand(20, 20) * 125
Y = _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits)
dY = Y - X # Fake gradient
dX = _fake_quantize_per_tensor_affine_grad_reference(X, dY, scale, zero_point, num_bits)
X_torch = torch.from_numpy(X).float().cuda()
dY_torch = torch.from_numpy(dY).float().cuda()
dX_prime = _intrinsic.fq_per_tensor_affine_backward(
X=X_torch, dY=dY_torch, scale=scale, zero_point=zero_point,
num_bits=num_bits, quant_delay=0, iter=0)
tolerance = 1e-6
np.testing.assert_allclose(dX, dX_prime.cpu(), rtol=tolerance, atol=tolerance)
@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
def test_numerical_consistency_cuda(self):
'''
Comparing numerical consistency between CPU quantize/dequantize op and the CUDA fake quantize op
'''
np.random.seed(NP_RANDOM_SEED)
scale = 3
zero_point = 2
num_bits = 8
X = np.random.rand(20, 20) * 125
X_torch = torch.from_numpy(X).float()
Y = torch.dequantize(torch.quantize_linear(X_torch, scale, zero_point, torch.qint8))
Y_prime = _intrinsic.fq_per_tensor_affine_forward(
X=X_torch.cuda(), scale=scale, zero_point=zero_point, num_bits=num_bits,
quant_delay=0, iter=0)
tolerance = 1e-6
np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
if __name__ == '__main__':
run_tests()