test/test_mkldnn.py - platform/external/pytorch - Git at Google

 from __future__ import absolute_import, division, print_function, unicode_literals
 import copy
 import unittest

 try:
     import torchvision
     HAS_TORCHVISION = True
 except ImportError:
     HAS_TORCHVISION = False

 skipIfNoTorchVision = unittest.skipIf(not HAS_TORCHVISION, "no torchvision")

 import torch
 import torch.jit
 import torch.backends.mkldnn
 from torch.utils import mkldnn as mkldnn_utils
 from torch.testing._internal.common_utils import TestCase, run_tests, TemporaryFileName

 from torch.autograd.gradcheck import gradgradcheck, gradcheck


 # Comment the line below to find out the CI machines having MKL-DNN build disabled
 @unittest.skipIf(not torch._C.has_mkldnn, "MKL-DNN build is disabled")
 class TestMkldnn(TestCase):
     def test_conversion(self):
         for cpu_tensor in [torch.randn((1, 2, 3, 4),
                                        dtype=torch.float, device=torch.device('cpu')),
                            torch.randn((1, 2, 3, 4, 5),
                                        dtype=torch.float, device=torch.device('cpu'))[:, :, :, :, 1]]:
             cpu_tensor.requires_grad_()
             mkldnn_tensor = cpu_tensor.to_mkldnn()
             cpu_tensor_1 = mkldnn_tensor.to_dense()
             self.assertEqual(cpu_tensor, cpu_tensor_1)
             self.assertEqual(mkldnn_tensor.dtype, torch.float)
             self.assertEqual(mkldnn_tensor.device, torch.device('cpu'))
             self.assertEqual(mkldnn_tensor.size(), torch.Size([1, 2, 3, 4]))
             self.assertEqual(mkldnn_tensor.numel(), cpu_tensor.numel())
             self.assertEqual(mkldnn_tensor.element_size(), cpu_tensor.element_size())
             self.assertRaisesRegex(RuntimeError,
                                    "Cannot access data pointer of Tensor that doesn't have storage",
                                    lambda: mkldnn_tensor.data_ptr() != 0)

     def test_unsupported(self):
         # unsupported types and unsupported types with gpu
         for dtype in [torch.double, torch.half, torch.uint8, torch.int8,
                       torch.short, torch.int, torch.long]:
             with self.assertRaises(RuntimeError) as context:
                 torch.randn(1, 2, 3, 4, dtype=dtype, device=torch.device('cpu')).to_mkldnn()
             if torch.cuda.is_available():
                 with self.assertRaises(RuntimeError) as context:
                     torch.randn(1, 2, 3, 4, dtype=dtype, device=torch.device('cuda')).to_mkldnn()
         # supported type with gpu
         if torch.cuda.is_available():
             with self.assertRaises(RuntimeError) as context:
                 torch.randn(1, 2, 3, 4, dtype=torch.float, device=torch.device('cuda')).to_mkldnn()
         # some factory functions
         for creator in [torch.ones, torch.randn, torch.rand]:
             with self.assertRaises(RuntimeError) as context:
                 creator(1, 2, 3, 4, dtype=torch.float, device=torch.device('cpu'), layout=torch._mkldnn)

     def test_autograd_to_mkldnn(self):
         # MKLDNN only supports float32
         root = torch.randn(4, 5, dtype=torch.float32, requires_grad=True)

         def func(root):
             return root.to_mkldnn().to_dense()

         # because MKLDNN only supports float32, we need to lessen the precision.
         # these numbers are just empirical results that seem to work.
         self.assertWarnsRegex(UserWarning,
                               'double precision floating point',
                               lambda: gradcheck(func, [root], atol=4e-2, rtol=1e-2))
         self.assertWarnsRegex(UserWarning,
                               'double precision floating point',
                               lambda: gradgradcheck(func, [root], atol=4e-2, rtol=1e-2))

     def test_autograd_from_mkldnn(self):
         # MKLDNN only supports float32
         root = torch.randn(4, 5, dtype=torch.float32).to_mkldnn().requires_grad_()

         def func(root):
             return root.to_dense()

         # because MKLDNN only supports float32, we need to lessen the precision.
         # these numbers are just empirical results that seem to work.
         self.assertWarnsRegex(UserWarning,
                               'double precision floating point',
                               lambda: gradcheck(func, [root], atol=4e-2, rtol=1e-2))

     def test_detach(self):
         root = torch.randn(4, 5, dtype=torch.float32).to_mkldnn().requires_grad_()

         detach = root.detach()
         self.assertEqual((4, 5), detach.size())
         self.assertFalse(detach.requires_grad)
         self.assertTrue(root.requires_grad)

         detach_ = root.detach_()
         self.assertEqual((4, 5), detach_.size())
         self.assertFalse(detach_.requires_grad)
         self.assertFalse(root.requires_grad)

     def test_repr(self):
         self.assertTrue("layout=torch._mkldnn" in str(torch.randn((1, 2, 3, 4),
                                                                   dtype=torch.float, device=torch.device('cpu')).to_mkldnn()))

     def test_conv2d(self):
         for groups in [1, 4]:
             N = torch.randint(3, 10, (1,)).item()
             C = torch.randint(1, 3, (1,)).item() * groups
             M = torch.randint(1, 3, (1,)).item() * groups
             x = torch.randn(N, C, 224, 224, dtype=torch.float32)
             for bias in [True, False]:
                 conv2d = torch.nn.Conv2d(in_channels=C,
                                          out_channels=M,
                                          kernel_size=3,
                                          stride=2,
                                          padding=1,
                                          bias=bias,
                                          groups=groups).float()
                 mkldnn_conv2d = mkldnn_utils.to_mkldnn(copy.deepcopy(conv2d))
                 with torch.backends.mkldnn.flags(enabled=False):
                     y_aten = conv2d(x)
                 y_mkldnn = mkldnn_conv2d(x.to_mkldnn()).to_dense()
                 self.assertEqual(y_aten, y_mkldnn)

                 self._test_serialization(mkldnn_conv2d, (x.to_mkldnn(),))
                 self._test_tracing(mkldnn_conv2d, (x.to_mkldnn(),))

     def test_conv2d_legacy_jit_model(self):
         """
         MKLDNN integration used to serialize models with 5d weight for grouped
         convolutions, we'd like to preserve this behavior
         """
         g = 4
         conv2d = torch.nn.Conv2d(16, 16, 3, groups=g)
         conv2d_mkldnn = torch.utils.mkldnn.to_mkldnn(conv2d)

         # contrive legacy conv2d module with a 5-d weight
         o, i, h, w = conv2d.weight.shape
         weight_5d = conv2d.weight.reshape((g, o // g, i, h, w))
         conv2d_mkldnn.weight = weight_5d.to_mkldnn()

         x = torch.randn(1, 16, 8, 8)

         with TemporaryFileName() as fname:
             torch.jit.save(conv2d_mkldnn, fname)
             conv2d_loaded = torch.jit.load(fname)

             self.assertEqual(conv2d_mkldnn.weight.ndimension(), 5)
             self.assertEqual(conv2d_loaded.weight.ndimension(), 4)
             self.assertEqual(
                 conv2d(x),
                 conv2d_loaded(x.to_mkldnn()).to_dense())

     def test_relu(self):
         x = torch.randn((4, 5), dtype=torch.float32) * 10
         self.assertEqual(torch.relu(x), torch.relu(x.to_mkldnn()).to_dense())

     def test_relu_(self):
         x1 = torch.randn((4, 5), dtype=torch.float32) * 10
         x2 = x1.clone().to_mkldnn()
         self.assertEqual(torch.relu_(x1), torch.relu_(x2).to_dense())

     def test_max_pool2d(self):
         N = torch.randint(3, 10, (1,)).item()
         C = torch.randint(3, 10, (1,)).item()

         for stride in [1, 2, 3]:
             for H, W in [(64, 64), (35, 39), (16, 19), [7, 8]]:
                 x = torch.randn(N, C, H, W, dtype=torch.float32) * 10

                 for ceil_mode in [False, True]:
                     max_pool2d = torch.nn.MaxPool2d(
                         kernel_size=3 if not ceil_mode else 7,
                         stride=stride,
                         padding=1,
                         ceil_mode=ceil_mode)

                     self.assertEqual(
                         max_pool2d(x),
                         max_pool2d(x.to_mkldnn()).to_dense())

     def test_avg_pool2d(self):
         N = torch.randint(3, 10, (1,)).item()
         C = torch.randint(3, 10, (1,)).item()
         x = torch.randn(N, C, 64, 64, dtype=torch.float32) * 10

         for count_include_pad in [True, False]:
             avg_pool2d = torch.nn.AvgPool2d(
                 kernel_size=3,
                 stride=2,
                 padding=1,
                 count_include_pad=count_include_pad)

             self.assertEqual(
                 avg_pool2d(x),
                 avg_pool2d(x.to_mkldnn()).to_dense())

     def test_adaptive_avg_pool2d(self):
         N = torch.randint(3, 10, (1,)).item()
         C = torch.randint(3, 10, (1,)).item()
         x = torch.randn(N, C, 224, 224, dtype=torch.float32) * 100

         adaptive_avg_pool2d = torch.nn.AdaptiveAvgPool2d(7)

         self.assertEqual(
             adaptive_avg_pool2d(x),
             adaptive_avg_pool2d(x.to_mkldnn()).to_dense())

     def test_batch_norm2d(self):
         N = torch.randint(3, 10, (1,)).item()
         C = torch.randint(3, 100, (1,)).item()
         x = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10

         # TODO: support training
         for train in [False]:
             bn = torch.nn.BatchNorm2d(C).float().train(train)
             mkldnn_bn = mkldnn_utils.to_mkldnn(copy.deepcopy(bn))
             self.assertEqual(
                 bn(x),
                 mkldnn_bn(x.to_mkldnn()).to_dense())

             self._test_serialization(mkldnn_bn, (x.to_mkldnn(),))
             self._test_tracing(mkldnn_bn, (x.to_mkldnn(),))

     def test_add(self):
         N = torch.randint(3, 10, (1,)).item()
         C = torch.randint(3, 100, (1,)).item()
         alpha = torch.randn(1, dtype=torch.float32).item()

         x = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10
         y = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10
         mx = x.to_mkldnn()
         my = y.to_mkldnn()

         # add
         self.assertEqual(
             x + y,
             (mx + my).to_dense())

         self.assertEqual(
             torch.add(x, y, alpha=alpha),
             torch.add(mx, my, alpha=alpha).to_dense())

         # add_
         x += y
         mx += my
         self.assertEqual(x, mx.to_dense())

         # add_out
         out = x.clone()
         mkldnn_out = out.to_mkldnn()
         torch.add(x, y, alpha=alpha, out=out)
         torch.add(mx, my, alpha=alpha, out=mkldnn_out)
         self.assertEqual(out, mkldnn_out.to_dense())

     def test_mul(self):
         N = torch.randint(3, 10, (1,)).item()
         C = torch.randint(3, 100, (1,)).item()
         value = torch.randn(1, dtype=torch.float32).item()

         x = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10
         y = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10
         mx = x.to_mkldnn()
         my = y.to_mkldnn()

         # mul
         self.assertEqual(
             x * y,
             (mx * my).to_dense())

         self.assertEqual(
             x * value,
             (mx * value).to_dense())

         self.assertEqual(
             torch.mul(x, y),
             torch.mul(mx, my).to_dense())

         self.assertEqual(
             torch.mul(x, value),
             torch.mul(mx, value).to_dense())

         # mul_
         x *= y
         mx *= my
         self.assertEqual(x, mx.to_dense())

         x *= value
         mx *= value
         self.assertEqual(x, mx.to_dense())

         # mul_out
         out = x.clone()
         mkldnn_out = out.to_mkldnn()
         torch.mul(x, y, out=out)
         torch.mul(mx, my, out=mkldnn_out)
         self.assertEqual(out, mkldnn_out.to_dense())

         out = x.clone()
         mkldnn_out = out.to_mkldnn()
         torch.mul(x, value, out=out)
         torch.mul(mx, value, out=mkldnn_out)
         self.assertEqual(out, mkldnn_out.to_dense())

     def test_view(self):
         x = torch.randn(3, 4, 5, dtype=torch.float32).to_mkldnn()
         self.assertRaisesRegex(RuntimeError,
                                "Change to use reshape",
                                lambda: x.view(x.size(0), -1))

     def test_reshape(self):
         x = torch.randn(3, 4, 5, dtype=torch.float32) * 10
         size = (x.size(0), -1)

         self.assertEqual(
             x.reshape(size),
             x.to_mkldnn().reshape(size).to_dense(),
         )
         # test whether share same memory for plain format tensor
         y = x.to_mkldnn()
         z = y.reshape(size).add_(y.reshape(size))
         self.assertEqual(
             y.reshape(size).to_dense(),
             z.to_dense(),
         )

     def test_reshape_blocked_format(self):
         # construct an mkldnn blocked tensor with mkldnn conv2d
         C = 7
         m = mkldnn_utils.to_mkldnn(torch.nn.Conv2d(C, C, 3))
         x = torch.randn(1, C, 8, 8).to_mkldnn()

         # mkldnn tensor w/ blocked format
         y_block = m(x)
         # aten tensor w/ plain format
         y_plain = y_block.to_dense()

         y_block_reshape = y_block.reshape(C, -1)
         y_plain_reshape = y_plain.reshape(C, -1)

         self.assertEqual(y_plain_reshape, y_block_reshape.to_dense())

     def test_clone(self):
         x = torch.randn(4, 5, dtype=torch.float32) * 10
         self.assertEqual(
             x.clone(),
             x.to_mkldnn().clone().to_dense(),
         )
         # test whether share same memory
         y = x.to_mkldnn()
         z = y.clone().add_(y)
         self.assertNotEqual(
             y.to_dense(),
             z.to_dense(),
         )

     def test_transpose(self):
         x = torch.randn(3, 4, 5, dtype=torch.float32) * 10
         for dim1 in range(x.ndim):
             for dim2 in range(x.ndim):
                 self.assertEqual(
                     x.transpose(dim1, dim2),
                     x.to_mkldnn().transpose(dim1, dim2).to_dense(),
                 )

     def test_linear(self):
         in_features = torch.randint(3, 10, (1,)).item()
         out_features = torch.randint(3, 100, (1,)).item()
         x = torch.randn(3, in_features, dtype=torch.float32) * 10

         for bias in [True, False]:
             linear = torch.nn.Linear(in_features, out_features, bias=bias).float()
             mkldnn_linear = mkldnn_utils.to_mkldnn(copy.deepcopy(linear))
             self.assertEqual(
                 linear(x),
                 mkldnn_linear(x.to_mkldnn()).to_dense())

             self._test_serialization(mkldnn_linear, (x.to_mkldnn(),))
             self._test_tracing(mkldnn_linear, (x.to_mkldnn(),))

     def test_softmax(self):
         x = torch.randn(3, 4, 5, dtype=torch.float32) * 10
         for dim in range(x.ndim):
             softmax = torch.nn.Softmax(dim=dim)
             self.assertEqual(
                 softmax(x),
                 softmax(x.to_mkldnn()).to_dense())

     def test_sigmoid(self):
         x = torch.randn(4, 5, dtype=torch.float32) * 10
         mkldnn_x = x.to_mkldnn()
         self.assertEqual(
             torch.sigmoid(x),
             torch.sigmoid(mkldnn_x).to_dense(),
         )
         # inplace
         torch.sigmoid_(x)
         torch.sigmoid_(mkldnn_x)
         self.assertEqual(x, mkldnn_x.to_dense())

     def _test_serialization(self, module, inputs):
         with TemporaryFileName() as fname:
             torch.jit.save(module, fname)
             loaded = torch.jit.load(fname)
             self.assertEqual(
                 module(*inputs).to_dense(),
                 loaded(*inputs).to_dense())

     def _test_tracing(self, module, inputs):
         traced = torch.jit.trace(module, inputs, check_trace=False)
         self.assertEqual(
             module(*inputs).to_dense(),
             traced(*inputs).to_dense())

     def test_set_data_tensorimpl_type(self):
         # Dense tensor has impl of type `TensorImpl`, while MKL-DNN tensor has impl
         # of type `OpaqueTensorImpl<IDeepTensorWrapperPtr>`.
         x = torch.randn((1, 2), dtype=torch.float, device=torch.device('cpu'))
         x_mkldnn = x.to_mkldnn()
         with self.assertRaisesRegex(RuntimeError, 'incompatible tensor type'):
             x.data = x_mkldnn

     def test_empty(self):
         x1 = torch.empty(4, 5, 2, 3, dtype=torch.float32)
         x2 = torch.empty(4, 5, 2, 3, dtype=torch.float32, layout=torch._mkldnn)
         self.assertEqual(x1.size(), x2.to_dense().size())
         self.assertEqual(x1.dtype, x2.to_dense().dtype)

     def test_zero_(self):
         x1 = torch.randn(4, 5, dtype=torch.float32) * 10
         x2 = x1.clone().to_mkldnn()
         self.assertEqual(
             x1.zero_(),
             x2.zero_().to_dense(),
         )

     def test_is_mkldnn(self):
         x = torch.randn(1, dtype=torch.float32)
         self.assertFalse(x.is_mkldnn)
         self.assertTrue(x.to_mkldnn().is_mkldnn)

     # legacy constructor/new doesn't support mkldnn tensors
     def test_legacy_new_failure(self):
         x = torch.randn(1, dtype=torch.float32)
         x_mkldnn = x.to_mkldnn()
         self.assertRaises(RuntimeError, lambda: x_mkldnn.new(device='cpu'))
         self.assertRaises(RuntimeError, lambda: x_mkldnn.new(x.storage()))
         self.assertRaises(RuntimeError, lambda: x_mkldnn.new(x))
         self.assertRaises(RuntimeError, lambda: x_mkldnn.new(torch.Size([2, 3])))
         self.assertRaises(RuntimeError, lambda: x_mkldnn.new([6]))

     def test_is_mkldnn_jit(self):
         class EnsureMkldnn(torch.jit.ScriptModule):
             @torch.jit.script_method
             def forward(self, x):
                 if not x.is_mkldnn:
                     x = x.to_mkldnn()
                 return x

         m = EnsureMkldnn()
         x = torch.randn(1, dtype=torch.float32)
         self.assertTrue(m(x).is_mkldnn)
         self.assertTrue(m(x.to_mkldnn()).is_mkldnn)

     def _test_imagenet_model(self, model):
         model = model.train(False).float()
         mkldnn_model = mkldnn_utils.to_mkldnn(copy.deepcopy(model))
         x = torch.randn(1, 3, 224, 224, dtype=torch.float32)
         with torch.no_grad():
             self.assertEqual(
                 model(x),
                 mkldnn_model(x.to_mkldnn()).to_dense(),
             )

     @skipIfNoTorchVision
     def test_resnet18(self):
         model = torchvision.models.resnet.resnet18(pretrained=False)
         self._test_imagenet_model(model)

     @skipIfNoTorchVision
     def test_resnext50_32x4d(self):
         model = torchvision.models.resnet.resnext50_32x4d(pretrained=False)
         self._test_imagenet_model(model)


 if __name__ == '__main__':
     run_tests()
	from __future__ import absolute_import, division, print_function, unicode_literals
	import copy
	import unittest

	try:
	import torchvision
	HAS_TORCHVISION = True
	except ImportError:
	HAS_TORCHVISION = False

	skipIfNoTorchVision = unittest.skipIf(not HAS_TORCHVISION, "no torchvision")

	import torch
	import torch.jit
	import torch.backends.mkldnn
	from torch.utils import mkldnn as mkldnn_utils
	from torch.testing._internal.common_utils import TestCase, run_tests, TemporaryFileName

	from torch.autograd.gradcheck import gradgradcheck, gradcheck


	# Comment the line below to find out the CI machines having MKL-DNN build disabled
	@unittest.skipIf(not torch._C.has_mkldnn, "MKL-DNN build is disabled")
	class TestMkldnn(TestCase):
	def test_conversion(self):
	for cpu_tensor in [torch.randn((1, 2, 3, 4),
	dtype=torch.float, device=torch.device('cpu')),
	torch.randn((1, 2, 3, 4, 5),
	dtype=torch.float, device=torch.device('cpu'))[:, :, :, :, 1]]:
	cpu_tensor.requires_grad_()
	mkldnn_tensor = cpu_tensor.to_mkldnn()
	cpu_tensor_1 = mkldnn_tensor.to_dense()
	self.assertEqual(cpu_tensor, cpu_tensor_1)
	self.assertEqual(mkldnn_tensor.dtype, torch.float)
	self.assertEqual(mkldnn_tensor.device, torch.device('cpu'))
	self.assertEqual(mkldnn_tensor.size(), torch.Size([1, 2, 3, 4]))
	self.assertEqual(mkldnn_tensor.numel(), cpu_tensor.numel())
	self.assertEqual(mkldnn_tensor.element_size(), cpu_tensor.element_size())
	self.assertRaisesRegex(RuntimeError,
	"Cannot access data pointer of Tensor that doesn't have storage",
	lambda: mkldnn_tensor.data_ptr() != 0)

	def test_unsupported(self):
	# unsupported types and unsupported types with gpu
	for dtype in [torch.double, torch.half, torch.uint8, torch.int8,
	torch.short, torch.int, torch.long]:
	with self.assertRaises(RuntimeError) as context:
	torch.randn(1, 2, 3, 4, dtype=dtype, device=torch.device('cpu')).to_mkldnn()
	if torch.cuda.is_available():
	with self.assertRaises(RuntimeError) as context:
	torch.randn(1, 2, 3, 4, dtype=dtype, device=torch.device('cuda')).to_mkldnn()
	# supported type with gpu
	if torch.cuda.is_available():
	with self.assertRaises(RuntimeError) as context:
	torch.randn(1, 2, 3, 4, dtype=torch.float, device=torch.device('cuda')).to_mkldnn()
	# some factory functions
	for creator in [torch.ones, torch.randn, torch.rand]:
	with self.assertRaises(RuntimeError) as context:
	creator(1, 2, 3, 4, dtype=torch.float, device=torch.device('cpu'), layout=torch._mkldnn)

	def test_autograd_to_mkldnn(self):
	# MKLDNN only supports float32
	root = torch.randn(4, 5, dtype=torch.float32, requires_grad=True)

	def func(root):
	return root.to_mkldnn().to_dense()

	# because MKLDNN only supports float32, we need to lessen the precision.
	# these numbers are just empirical results that seem to work.
	self.assertWarnsRegex(UserWarning,
	'double precision floating point',
	lambda: gradcheck(func, [root], atol=4e-2, rtol=1e-2))
	self.assertWarnsRegex(UserWarning,
	'double precision floating point',
	lambda: gradgradcheck(func, [root], atol=4e-2, rtol=1e-2))

	def test_autograd_from_mkldnn(self):
	# MKLDNN only supports float32
	root = torch.randn(4, 5, dtype=torch.float32).to_mkldnn().requires_grad_()

	def func(root):
	return root.to_dense()

	# because MKLDNN only supports float32, we need to lessen the precision.
	# these numbers are just empirical results that seem to work.
	self.assertWarnsRegex(UserWarning,
	'double precision floating point',
	lambda: gradcheck(func, [root], atol=4e-2, rtol=1e-2))

	def test_detach(self):
	root = torch.randn(4, 5, dtype=torch.float32).to_mkldnn().requires_grad_()

	detach = root.detach()
	self.assertEqual((4, 5), detach.size())
	self.assertFalse(detach.requires_grad)
	self.assertTrue(root.requires_grad)

	detach_ = root.detach_()
	self.assertEqual((4, 5), detach_.size())
	self.assertFalse(detach_.requires_grad)
	self.assertFalse(root.requires_grad)

	def test_repr(self):
	self.assertTrue("layout=torch._mkldnn" in str(torch.randn((1, 2, 3, 4),
	dtype=torch.float, device=torch.device('cpu')).to_mkldnn()))

	def test_conv2d(self):
	for groups in [1, 4]:
	N = torch.randint(3, 10, (1,)).item()
	C = torch.randint(1, 3, (1,)).item() * groups
	M = torch.randint(1, 3, (1,)).item() * groups
	x = torch.randn(N, C, 224, 224, dtype=torch.float32)
	for bias in [True, False]:
	conv2d = torch.nn.Conv2d(in_channels=C,
	out_channels=M,
	kernel_size=3,
	stride=2,
	padding=1,
	bias=bias,
	groups=groups).float()
	mkldnn_conv2d = mkldnn_utils.to_mkldnn(copy.deepcopy(conv2d))
	with torch.backends.mkldnn.flags(enabled=False):
	y_aten = conv2d(x)
	y_mkldnn = mkldnn_conv2d(x.to_mkldnn()).to_dense()
	self.assertEqual(y_aten, y_mkldnn)

	self._test_serialization(mkldnn_conv2d, (x.to_mkldnn(),))
	self._test_tracing(mkldnn_conv2d, (x.to_mkldnn(),))

	def test_conv2d_legacy_jit_model(self):
	"""
	MKLDNN integration used to serialize models with 5d weight for grouped
	convolutions, we'd like to preserve this behavior
	"""
	g = 4
	conv2d = torch.nn.Conv2d(16, 16, 3, groups=g)
	conv2d_mkldnn = torch.utils.mkldnn.to_mkldnn(conv2d)

	# contrive legacy conv2d module with a 5-d weight
	o, i, h, w = conv2d.weight.shape
	weight_5d = conv2d.weight.reshape((g, o // g, i, h, w))
	conv2d_mkldnn.weight = weight_5d.to_mkldnn()

	x = torch.randn(1, 16, 8, 8)

	with TemporaryFileName() as fname:
	torch.jit.save(conv2d_mkldnn, fname)
	conv2d_loaded = torch.jit.load(fname)

	self.assertEqual(conv2d_mkldnn.weight.ndimension(), 5)
	self.assertEqual(conv2d_loaded.weight.ndimension(), 4)
	self.assertEqual(
	conv2d(x),
	conv2d_loaded(x.to_mkldnn()).to_dense())

	def test_relu(self):
	x = torch.randn((4, 5), dtype=torch.float32) * 10
	self.assertEqual(torch.relu(x), torch.relu(x.to_mkldnn()).to_dense())

	def test_relu_(self):
	x1 = torch.randn((4, 5), dtype=torch.float32) * 10
	x2 = x1.clone().to_mkldnn()
	self.assertEqual(torch.relu_(x1), torch.relu_(x2).to_dense())

	def test_max_pool2d(self):
	N = torch.randint(3, 10, (1,)).item()
	C = torch.randint(3, 10, (1,)).item()

	for stride in [1, 2, 3]:
	for H, W in [(64, 64), (35, 39), (16, 19), [7, 8]]:
	x = torch.randn(N, C, H, W, dtype=torch.float32) * 10

	for ceil_mode in [False, True]:
	max_pool2d = torch.nn.MaxPool2d(
	kernel_size=3 if not ceil_mode else 7,
	stride=stride,
	padding=1,
	ceil_mode=ceil_mode)

	self.assertEqual(
	max_pool2d(x),
	max_pool2d(x.to_mkldnn()).to_dense())

	def test_avg_pool2d(self):
	N = torch.randint(3, 10, (1,)).item()
	C = torch.randint(3, 10, (1,)).item()
	x = torch.randn(N, C, 64, 64, dtype=torch.float32) * 10

	for count_include_pad in [True, False]:
	avg_pool2d = torch.nn.AvgPool2d(
	kernel_size=3,
	stride=2,
	padding=1,
	count_include_pad=count_include_pad)

	self.assertEqual(
	avg_pool2d(x),
	avg_pool2d(x.to_mkldnn()).to_dense())

	def test_adaptive_avg_pool2d(self):
	N = torch.randint(3, 10, (1,)).item()
	C = torch.randint(3, 10, (1,)).item()
	x = torch.randn(N, C, 224, 224, dtype=torch.float32) * 100

	adaptive_avg_pool2d = torch.nn.AdaptiveAvgPool2d(7)

	self.assertEqual(
	adaptive_avg_pool2d(x),
	adaptive_avg_pool2d(x.to_mkldnn()).to_dense())

	def test_batch_norm2d(self):
	N = torch.randint(3, 10, (1,)).item()
	C = torch.randint(3, 100, (1,)).item()
	x = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10

	# TODO: support training
	for train in [False]:
	bn = torch.nn.BatchNorm2d(C).float().train(train)
	mkldnn_bn = mkldnn_utils.to_mkldnn(copy.deepcopy(bn))
	self.assertEqual(
	bn(x),
	mkldnn_bn(x.to_mkldnn()).to_dense())

	self._test_serialization(mkldnn_bn, (x.to_mkldnn(),))
	self._test_tracing(mkldnn_bn, (x.to_mkldnn(),))

	def test_add(self):
	N = torch.randint(3, 10, (1,)).item()
	C = torch.randint(3, 100, (1,)).item()
	alpha = torch.randn(1, dtype=torch.float32).item()

	x = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10
	y = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10
	mx = x.to_mkldnn()
	my = y.to_mkldnn()

	# add
	self.assertEqual(
	x + y,
	(mx + my).to_dense())

	self.assertEqual(
	torch.add(x, y, alpha=alpha),
	torch.add(mx, my, alpha=alpha).to_dense())

	# add_
	x += y
	mx += my
	self.assertEqual(x, mx.to_dense())

	# add_out
	out = x.clone()
	mkldnn_out = out.to_mkldnn()
	torch.add(x, y, alpha=alpha, out=out)
	torch.add(mx, my, alpha=alpha, out=mkldnn_out)
	self.assertEqual(out, mkldnn_out.to_dense())

	def test_mul(self):
	N = torch.randint(3, 10, (1,)).item()
	C = torch.randint(3, 100, (1,)).item()
	value = torch.randn(1, dtype=torch.float32).item()

	x = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10
	y = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10
	mx = x.to_mkldnn()
	my = y.to_mkldnn()

	# mul
	self.assertEqual(
	x * y,
	(mx * my).to_dense())

	self.assertEqual(
	x * value,
	(mx * value).to_dense())

	self.assertEqual(
	torch.mul(x, y),
	torch.mul(mx, my).to_dense())

	self.assertEqual(
	torch.mul(x, value),
	torch.mul(mx, value).to_dense())

	# mul_
	x *= y
	mx *= my
	self.assertEqual(x, mx.to_dense())

	x *= value
	mx *= value
	self.assertEqual(x, mx.to_dense())

	# mul_out
	out = x.clone()
	mkldnn_out = out.to_mkldnn()
	torch.mul(x, y, out=out)
	torch.mul(mx, my, out=mkldnn_out)
	self.assertEqual(out, mkldnn_out.to_dense())

	out = x.clone()
	mkldnn_out = out.to_mkldnn()
	torch.mul(x, value, out=out)
	torch.mul(mx, value, out=mkldnn_out)
	self.assertEqual(out, mkldnn_out.to_dense())

	def test_view(self):
	x = torch.randn(3, 4, 5, dtype=torch.float32).to_mkldnn()
	self.assertRaisesRegex(RuntimeError,
	"Change to use reshape",
	lambda: x.view(x.size(0), -1))

	def test_reshape(self):
	x = torch.randn(3, 4, 5, dtype=torch.float32) * 10
	size = (x.size(0), -1)

	self.assertEqual(
	x.reshape(size),
	x.to_mkldnn().reshape(size).to_dense(),
	)
	# test whether share same memory for plain format tensor
	y = x.to_mkldnn()
	z = y.reshape(size).add_(y.reshape(size))
	self.assertEqual(
	y.reshape(size).to_dense(),
	z.to_dense(),
	)

	def test_reshape_blocked_format(self):
	# construct an mkldnn blocked tensor with mkldnn conv2d
	C = 7
	m = mkldnn_utils.to_mkldnn(torch.nn.Conv2d(C, C, 3))
	x = torch.randn(1, C, 8, 8).to_mkldnn()

	# mkldnn tensor w/ blocked format
	y_block = m(x)
	# aten tensor w/ plain format
	y_plain = y_block.to_dense()

	y_block_reshape = y_block.reshape(C, -1)
	y_plain_reshape = y_plain.reshape(C, -1)

	self.assertEqual(y_plain_reshape, y_block_reshape.to_dense())

	def test_clone(self):
	x = torch.randn(4, 5, dtype=torch.float32) * 10
	self.assertEqual(
	x.clone(),
	x.to_mkldnn().clone().to_dense(),
	)
	# test whether share same memory
	y = x.to_mkldnn()
	z = y.clone().add_(y)
	self.assertNotEqual(
	y.to_dense(),
	z.to_dense(),
	)

	def test_transpose(self):
	x = torch.randn(3, 4, 5, dtype=torch.float32) * 10
	for dim1 in range(x.ndim):
	for dim2 in range(x.ndim):
	self.assertEqual(
	x.transpose(dim1, dim2),
	x.to_mkldnn().transpose(dim1, dim2).to_dense(),
	)

	def test_linear(self):
	in_features = torch.randint(3, 10, (1,)).item()
	out_features = torch.randint(3, 100, (1,)).item()
	x = torch.randn(3, in_features, dtype=torch.float32) * 10

	for bias in [True, False]:
	linear = torch.nn.Linear(in_features, out_features, bias=bias).float()
	mkldnn_linear = mkldnn_utils.to_mkldnn(copy.deepcopy(linear))
	self.assertEqual(
	linear(x),
	mkldnn_linear(x.to_mkldnn()).to_dense())

	self._test_serialization(mkldnn_linear, (x.to_mkldnn(),))
	self._test_tracing(mkldnn_linear, (x.to_mkldnn(),))

	def test_softmax(self):
	x = torch.randn(3, 4, 5, dtype=torch.float32) * 10
	for dim in range(x.ndim):
	softmax = torch.nn.Softmax(dim=dim)
	self.assertEqual(
	softmax(x),
	softmax(x.to_mkldnn()).to_dense())

	def test_sigmoid(self):
	x = torch.randn(4, 5, dtype=torch.float32) * 10
	mkldnn_x = x.to_mkldnn()
	self.assertEqual(
	torch.sigmoid(x),
	torch.sigmoid(mkldnn_x).to_dense(),
	)
	# inplace
	torch.sigmoid_(x)
	torch.sigmoid_(mkldnn_x)
	self.assertEqual(x, mkldnn_x.to_dense())

	def _test_serialization(self, module, inputs):
	with TemporaryFileName() as fname:
	torch.jit.save(module, fname)
	loaded = torch.jit.load(fname)
	self.assertEqual(
	module(*inputs).to_dense(),
	loaded(*inputs).to_dense())

	def _test_tracing(self, module, inputs):
	traced = torch.jit.trace(module, inputs, check_trace=False)
	self.assertEqual(
	module(*inputs).to_dense(),
	traced(*inputs).to_dense())

	def test_set_data_tensorimpl_type(self):
	# Dense tensor has impl of type `TensorImpl`, while MKL-DNN tensor has impl
	# of type `OpaqueTensorImpl<IDeepTensorWrapperPtr>`.
	x = torch.randn((1, 2), dtype=torch.float, device=torch.device('cpu'))
	x_mkldnn = x.to_mkldnn()
	with self.assertRaisesRegex(RuntimeError, 'incompatible tensor type'):
	x.data = x_mkldnn

	def test_empty(self):
	x1 = torch.empty(4, 5, 2, 3, dtype=torch.float32)
	x2 = torch.empty(4, 5, 2, 3, dtype=torch.float32, layout=torch._mkldnn)
	self.assertEqual(x1.size(), x2.to_dense().size())
	self.assertEqual(x1.dtype, x2.to_dense().dtype)

	def test_zero_(self):
	x1 = torch.randn(4, 5, dtype=torch.float32) * 10
	x2 = x1.clone().to_mkldnn()
	self.assertEqual(
	x1.zero_(),
	x2.zero_().to_dense(),
	)

	def test_is_mkldnn(self):
	x = torch.randn(1, dtype=torch.float32)
	self.assertFalse(x.is_mkldnn)
	self.assertTrue(x.to_mkldnn().is_mkldnn)

	# legacy constructor/new doesn't support mkldnn tensors
	def test_legacy_new_failure(self):
	x = torch.randn(1, dtype=torch.float32)
	x_mkldnn = x.to_mkldnn()
	self.assertRaises(RuntimeError, lambda: x_mkldnn.new(device='cpu'))
	self.assertRaises(RuntimeError, lambda: x_mkldnn.new(x.storage()))
	self.assertRaises(RuntimeError, lambda: x_mkldnn.new(x))
	self.assertRaises(RuntimeError, lambda: x_mkldnn.new(torch.Size([2, 3])))
	self.assertRaises(RuntimeError, lambda: x_mkldnn.new([6]))

	def test_is_mkldnn_jit(self):
	class EnsureMkldnn(torch.jit.ScriptModule):
	@torch.jit.script_method
	def forward(self, x):
	if not x.is_mkldnn:
	x = x.to_mkldnn()
	return x

	m = EnsureMkldnn()
	x = torch.randn(1, dtype=torch.float32)
	self.assertTrue(m(x).is_mkldnn)
	self.assertTrue(m(x.to_mkldnn()).is_mkldnn)

	def _test_imagenet_model(self, model):
	model = model.train(False).float()
	mkldnn_model = mkldnn_utils.to_mkldnn(copy.deepcopy(model))
	x = torch.randn(1, 3, 224, 224, dtype=torch.float32)
	with torch.no_grad():
	self.assertEqual(
	model(x),
	mkldnn_model(x.to_mkldnn()).to_dense(),
	)

	@skipIfNoTorchVision
	def test_resnet18(self):
	model = torchvision.models.resnet.resnet18(pretrained=False)
	self._test_imagenet_model(model)

	@skipIfNoTorchVision
	def test_resnext50_32x4d(self):
	model = torchvision.models.resnet.resnext50_32x4d(pretrained=False)
	self._test_imagenet_model(model)


	if __name__ == '__main__':
	run_tests()