test/test_fake_quant.py - platform/external/pytorch - Git at Google

 from __future__ import absolute_import, division, print_function, unicode_literals

 import torch
 import torch.cuda
 import torch.jit
 import numpy as np
 import unittest
 from common_utils import run_tests
 import torch.nn._intrinsic as _intrinsic


 # Reference method for quantizing a tensor.
 def _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits):
     quant_min, quant_max = 0, 2 ** num_bits - 1
     res = (np.clip(np.round(X / scale) + zero_point, quant_min, quant_max) - zero_point) * scale
     res = res.reshape(X.shape)
     return res


 # Reference method for the gradient of the quantizer.
 def _fake_quantize_per_tensor_affine_grad_reference(X, dY, scale, zero_point, num_bits):
     Xq = np.round(X / scale) + zero_point
     quant_min, quant_max = 0, 2 ** num_bits - 1
     mask = np.logical_and(Xq >= quant_min, Xq <= quant_max)
     res = dY[mask].reshape(dY.shape)
     return res

 NP_RANDOM_SEED = 19


 class TestFakeQuantizePerTensorAffine(unittest.TestCase):
     """Tests the forward path of the FakeQuantizePerTensorAffine op."""
     def test_forward(self):
         np.random.seed(NP_RANDOM_SEED)

         scale = 3
         zero_point = 2
         num_bits = 8
         X = np.random.rand(20, 20) * 125
         X_torch = torch.from_numpy(X).float()
         Y = _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits)
         Y_prime = _intrinsic.fq_per_tensor_affine_forward(
             X=X_torch, scale=scale, zero_point=zero_point, num_bits=num_bits,
             quant_delay=0, iter=0)
         tolerance = 1e-6
         np.testing.assert_allclose(Y, Y_prime, rtol=tolerance, atol=tolerance)

     """Tests the backward method. Note that this runs the reference quantization
     and thus the errors might be originating there."""
     def test_backward(self):
         np.random.seed(NP_RANDOM_SEED)
         fake_quantize_per_tensor_affine_backward = torch.ops.quantized.fake_quantize_per_tensor_affine_backward

         scale = 3
         zero_point = 2
         num_bits = 8
         X = np.random.rand(20, 20) * 125
         Y = _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits)
         dY = Y - X  # Fake gradient
         dX = _fake_quantize_per_tensor_affine_grad_reference(X, dY, scale, zero_point, num_bits)
         X_torch = torch.from_numpy(X).float()
         dY_torch = torch.from_numpy(dY).float()
         dX_prime = _intrinsic.fq_per_tensor_affine_backward(
             X=X_torch, dY=dY_torch, scale=scale, zero_point=zero_point,
             num_bits=num_bits, quant_delay=0, iter=0)
         tolerance = 1e-6
         np.testing.assert_allclose(dX, dX_prime, rtol=tolerance, atol=tolerance)

     def test_numerical_consistency(self):
         '''
         Comparing numerical consistency between CPU quantize/dequantize op and the CPU fake quantize op
         '''
         np.random.seed(NP_RANDOM_SEED)
         fake_quantize_per_tensor_affine_forward = torch.ops.quantized.fake_quantize_per_tensor_affine_forward

         scale = 3
         zero_point = 2
         num_bits = 8
         X = np.random.rand(20, 20) * 125
         X_torch = torch.from_numpy(X).float()
         Y = torch.dequantize(torch.quantize_linear(X_torch, scale, zero_point, torch.qint8))
         Y_prime = _intrinsic.fq_per_tensor_affine_forward(
             X=X_torch, scale=scale, zero_point=zero_point, num_bits=num_bits,
             quant_delay=0, iter=0)
         tolerance = 1e-6
         np.testing.assert_allclose(Y, Y_prime, rtol=tolerance, atol=tolerance)

     """Tests the forward path of the FakeQuantizePerTensorAffine CUDA op."""
     @unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
     def test_forward_cuda(self):
         np.random.seed(NP_RANDOM_SEED)
         scale = 3
         zero_point = 2
         num_bits = 8
         X = np.random.rand(20, 20) * 125
         X_torch = torch.from_numpy(X).float().cuda()
         Y = _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits)
         Y_prime = _intrinsic.fq_per_tensor_affine_forward(
             X=X_torch, scale=scale, zero_point=zero_point, num_bits=num_bits,
             quant_delay=0, iter=0)
         tolerance = 1e-6
         np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)

     """Tests the backward method. Note that this runs the reference quantization
     and thus the errors might be originating there."""
     @unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
     def test_backward_cuda(self):
         np.random.seed(NP_RANDOM_SEED)

         scale = 3
         zero_point = 2
         num_bits = 8
         X = np.random.rand(20, 20) * 125
         Y = _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits)
         dY = Y - X  # Fake gradient
         dX = _fake_quantize_per_tensor_affine_grad_reference(X, dY, scale, zero_point, num_bits)
         X_torch = torch.from_numpy(X).float().cuda()
         dY_torch = torch.from_numpy(dY).float().cuda()
         dX_prime = _intrinsic.fq_per_tensor_affine_backward(
             X=X_torch, dY=dY_torch, scale=scale, zero_point=zero_point,
             num_bits=num_bits, quant_delay=0, iter=0)
         tolerance = 1e-6
         np.testing.assert_allclose(dX, dX_prime.cpu(), rtol=tolerance, atol=tolerance)

     @unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
     def test_numerical_consistency_cuda(self):
         '''
         Comparing numerical consistency between CPU quantize/dequantize op and the CUDA fake quantize op
         '''
         np.random.seed(NP_RANDOM_SEED)

         scale = 3
         zero_point = 2
         num_bits = 8
         X = np.random.rand(20, 20) * 125
         X_torch = torch.from_numpy(X).float()
         Y = torch.dequantize(torch.quantize_linear(X_torch, scale, zero_point, torch.qint8))
         Y_prime = _intrinsic.fq_per_tensor_affine_forward(
             X=X_torch.cuda(), scale=scale, zero_point=zero_point, num_bits=num_bits,
             quant_delay=0, iter=0)
         tolerance = 1e-6
         np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)

 if __name__ == '__main__':
     run_tests()
	from __future__ import absolute_import, division, print_function, unicode_literals

	import torch
	import torch.cuda
	import torch.jit
	import numpy as np
	import unittest
	from common_utils import run_tests
	import torch.nn._intrinsic as _intrinsic


	# Reference method for quantizing a tensor.
	def _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits):
	quant_min, quant_max = 0, 2 ** num_bits - 1
	res = (np.clip(np.round(X / scale) + zero_point, quant_min, quant_max) - zero_point) * scale
	res = res.reshape(X.shape)
	return res


	# Reference method for the gradient of the quantizer.
	def _fake_quantize_per_tensor_affine_grad_reference(X, dY, scale, zero_point, num_bits):
	Xq = np.round(X / scale) + zero_point
	quant_min, quant_max = 0, 2 ** num_bits - 1
	mask = np.logical_and(Xq >= quant_min, Xq <= quant_max)
	res = dY[mask].reshape(dY.shape)
	return res

	NP_RANDOM_SEED = 19


	class TestFakeQuantizePerTensorAffine(unittest.TestCase):
	"""Tests the forward path of the FakeQuantizePerTensorAffine op."""
	def test_forward(self):
	np.random.seed(NP_RANDOM_SEED)

	scale = 3
	zero_point = 2
	num_bits = 8
	X = np.random.rand(20, 20) * 125
	X_torch = torch.from_numpy(X).float()
	Y = _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits)
	Y_prime = _intrinsic.fq_per_tensor_affine_forward(
	X=X_torch, scale=scale, zero_point=zero_point, num_bits=num_bits,
	quant_delay=0, iter=0)
	tolerance = 1e-6
	np.testing.assert_allclose(Y, Y_prime, rtol=tolerance, atol=tolerance)

	"""Tests the backward method. Note that this runs the reference quantization
	and thus the errors might be originating there."""
	def test_backward(self):
	np.random.seed(NP_RANDOM_SEED)
	fake_quantize_per_tensor_affine_backward = torch.ops.quantized.fake_quantize_per_tensor_affine_backward

	scale = 3
	zero_point = 2
	num_bits = 8
	X = np.random.rand(20, 20) * 125
	Y = _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits)
	dY = Y - X # Fake gradient
	dX = _fake_quantize_per_tensor_affine_grad_reference(X, dY, scale, zero_point, num_bits)
	X_torch = torch.from_numpy(X).float()
	dY_torch = torch.from_numpy(dY).float()
	dX_prime = _intrinsic.fq_per_tensor_affine_backward(
	X=X_torch, dY=dY_torch, scale=scale, zero_point=zero_point,
	num_bits=num_bits, quant_delay=0, iter=0)
	tolerance = 1e-6
	np.testing.assert_allclose(dX, dX_prime, rtol=tolerance, atol=tolerance)

	def test_numerical_consistency(self):
	'''
	Comparing numerical consistency between CPU quantize/dequantize op and the CPU fake quantize op
	'''
	np.random.seed(NP_RANDOM_SEED)
	fake_quantize_per_tensor_affine_forward = torch.ops.quantized.fake_quantize_per_tensor_affine_forward

	scale = 3
	zero_point = 2
	num_bits = 8
	X = np.random.rand(20, 20) * 125
	X_torch = torch.from_numpy(X).float()
	Y = torch.dequantize(torch.quantize_linear(X_torch, scale, zero_point, torch.qint8))
	Y_prime = _intrinsic.fq_per_tensor_affine_forward(
	X=X_torch, scale=scale, zero_point=zero_point, num_bits=num_bits,
	quant_delay=0, iter=0)
	tolerance = 1e-6
	np.testing.assert_allclose(Y, Y_prime, rtol=tolerance, atol=tolerance)

	"""Tests the forward path of the FakeQuantizePerTensorAffine CUDA op."""
	@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
	def test_forward_cuda(self):
	np.random.seed(NP_RANDOM_SEED)
	scale = 3
	zero_point = 2
	num_bits = 8
	X = np.random.rand(20, 20) * 125
	X_torch = torch.from_numpy(X).float().cuda()
	Y = _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits)
	Y_prime = _intrinsic.fq_per_tensor_affine_forward(
	X=X_torch, scale=scale, zero_point=zero_point, num_bits=num_bits,
	quant_delay=0, iter=0)
	tolerance = 1e-6
	np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)

	"""Tests the backward method. Note that this runs the reference quantization
	and thus the errors might be originating there."""
	@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
	def test_backward_cuda(self):
	np.random.seed(NP_RANDOM_SEED)

	scale = 3
	zero_point = 2
	num_bits = 8
	X = np.random.rand(20, 20) * 125
	Y = _fake_quantize_per_tensor_affine_reference(X, scale, zero_point, num_bits)
	dY = Y - X # Fake gradient
	dX = _fake_quantize_per_tensor_affine_grad_reference(X, dY, scale, zero_point, num_bits)
	X_torch = torch.from_numpy(X).float().cuda()
	dY_torch = torch.from_numpy(dY).float().cuda()
	dX_prime = _intrinsic.fq_per_tensor_affine_backward(
	X=X_torch, dY=dY_torch, scale=scale, zero_point=zero_point,
	num_bits=num_bits, quant_delay=0, iter=0)
	tolerance = 1e-6
	np.testing.assert_allclose(dX, dX_prime.cpu(), rtol=tolerance, atol=tolerance)

	@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
	def test_numerical_consistency_cuda(self):
	'''
	Comparing numerical consistency between CPU quantize/dequantize op and the CUDA fake quantize op
	'''
	np.random.seed(NP_RANDOM_SEED)

	scale = 3
	zero_point = 2
	num_bits = 8
	X = np.random.rand(20, 20) * 125
	X_torch = torch.from_numpy(X).float()
	Y = torch.dequantize(torch.quantize_linear(X_torch, scale, zero_point, torch.qint8))
	Y_prime = _intrinsic.fq_per_tensor_affine_forward(
	X=X_torch.cuda(), scale=scale, zero_point=zero_point, num_bits=num_bits,
	quant_delay=0, iter=0)
	tolerance = 1e-6
	np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)

	if __name__ == '__main__':
	run_tests()