| import copy | 
 | import itertools | 
 | import unittest | 
 |  | 
 | try: | 
 |     import torchvision | 
 |     HAS_TORCHVISION = True | 
 | except ImportError: | 
 |     HAS_TORCHVISION = False | 
 |  | 
 | skipIfNoTorchVision = unittest.skipIf(not HAS_TORCHVISION, "no torchvision") | 
 |  | 
 | import torch | 
 | import torch.nn.functional as F | 
 | import torch.jit | 
 | import torch.backends.mkldnn | 
 | from torch.utils import mkldnn as mkldnn_utils | 
 | from torch.testing._internal.common_utils import TestCase, run_tests, TemporaryFileName | 
 |  | 
 | from torch.autograd.gradcheck import gradgradcheck, gradcheck | 
 |  | 
 |  | 
 | # Comment the line below to find out the CI machines having MKL-DNN build disabled | 
 | @unittest.skipIf(not torch._C.has_mkldnn, "MKL-DNN build is disabled") | 
 | class TestMkldnn(TestCase): | 
 |     def test_conversion(self): | 
 |         for cpu_tensor in [torch.randn((1, 2, 3, 4), | 
 |                                        dtype=torch.float, device=torch.device('cpu')), | 
 |                            torch.randn((1, 2, 3, 4, 5), | 
 |                                        dtype=torch.float, device=torch.device('cpu'))[:, :, :, :, 1]]: | 
 |             cpu_tensor.requires_grad_() | 
 |             mkldnn_tensor = cpu_tensor.to_mkldnn() | 
 |             cpu_tensor_1 = mkldnn_tensor.to_dense() | 
 |             self.assertEqual(cpu_tensor, cpu_tensor_1) | 
 |             self.assertEqual(mkldnn_tensor.dtype, torch.float) | 
 |             self.assertEqual(mkldnn_tensor.device, torch.device('cpu')) | 
 |             self.assertEqual(mkldnn_tensor.size(), torch.Size([1, 2, 3, 4])) | 
 |             self.assertEqual(mkldnn_tensor.numel(), cpu_tensor.numel()) | 
 |             self.assertEqual(mkldnn_tensor.element_size(), cpu_tensor.element_size()) | 
 |             self.assertRaisesRegex(RuntimeError, | 
 |                                    "Cannot access data pointer of Tensor that doesn't have storage", | 
 |                                    lambda: mkldnn_tensor.data_ptr() != 0) | 
 |  | 
 |     def test_unsupported(self): | 
 |         # unsupported types and unsupported types with gpu | 
 |         for dtype in [torch.double, torch.half, torch.uint8, torch.int8, | 
 |                       torch.short, torch.int, torch.long]: | 
 |             with self.assertRaises(RuntimeError) as context: | 
 |                 torch.randn(1, 2, 3, 4, dtype=dtype, device=torch.device('cpu')).to_mkldnn() | 
 |             if torch.cuda.is_available(): | 
 |                 with self.assertRaises(RuntimeError) as context: | 
 |                     torch.randn(1, 2, 3, 4, dtype=dtype, device=torch.device('cuda')).to_mkldnn() | 
 |         # supported type with gpu | 
 |         if torch.cuda.is_available(): | 
 |             with self.assertRaises(RuntimeError) as context: | 
 |                 torch.randn(1, 2, 3, 4, dtype=torch.float, device=torch.device('cuda')).to_mkldnn() | 
 |         # some factory functions | 
 |         for creator in [torch.ones, torch.randn, torch.rand]: | 
 |             with self.assertRaises(RuntimeError) as context: | 
 |                 creator(1, 2, 3, 4, dtype=torch.float, device=torch.device('cpu'), layout=torch._mkldnn) | 
 |  | 
 |     def test_autograd_to_mkldnn(self): | 
 |         # MKLDNN only supports float32 | 
 |         root = torch.randn(4, 5, dtype=torch.float32, requires_grad=True) | 
 |  | 
 |         def func(root): | 
 |             return root.to_mkldnn().to_dense() | 
 |  | 
 |         # because MKLDNN only supports float32, we need to lessen the precision. | 
 |         # these numbers are just empirical results that seem to work. | 
 |         self.assertWarnsRegex(UserWarning, | 
 |                               'double precision floating point', | 
 |                               lambda: gradcheck(func, [root], atol=4e-2, rtol=1e-2)) | 
 |         self.assertWarnsRegex(UserWarning, | 
 |                               'double precision floating point', | 
 |                               lambda: gradgradcheck(func, [root], atol=4e-2, rtol=1e-2)) | 
 |  | 
 |     def test_autograd_from_mkldnn(self): | 
 |         # MKLDNN only supports float32 | 
 |         root = torch.randn(4, 5, dtype=torch.float32).to_mkldnn().requires_grad_() | 
 |  | 
 |         def func(root): | 
 |             return root.to_dense() | 
 |  | 
 |         # because MKLDNN only supports float32, we need to lessen the precision. | 
 |         # these numbers are just empirical results that seem to work. | 
 |         self.assertWarnsRegex(UserWarning, | 
 |                               'double precision floating point', | 
 |                               lambda: gradcheck(func, [root], atol=4e-2, rtol=1e-2)) | 
 |  | 
 |     def test_detach(self): | 
 |         root = torch.randn(4, 5, dtype=torch.float32).to_mkldnn().requires_grad_() | 
 |  | 
 |         detach = root.detach() | 
 |         self.assertEqual((4, 5), detach.size()) | 
 |         self.assertFalse(detach.requires_grad) | 
 |         self.assertTrue(root.requires_grad) | 
 |  | 
 |         detach_ = root.detach_() | 
 |         self.assertEqual((4, 5), detach_.size()) | 
 |         self.assertFalse(detach_.requires_grad) | 
 |         self.assertFalse(root.requires_grad) | 
 |  | 
 |     def test_repr(self): | 
 |         self.assertTrue("layout=torch._mkldnn" in str(torch.randn((1, 2, 3, 4), | 
 |                                                                   dtype=torch.float, device=torch.device('cpu')).to_mkldnn())) | 
 |  | 
 |     def test_conv1d(self): | 
 |         options = itertools.product([1, 4], [True, False], [1, 2]) | 
 |         for groups, bias, dilation in options: | 
 |             N = torch.randint(3, 10, (1,)).item() | 
 |             C = torch.randint(1, 3, (1,)).item() * groups | 
 |             M = torch.randint(1, 3, (1,)).item() * groups | 
 |             x = torch.randn(N, C, 224, dtype=torch.float32) | 
 |             conv1d = torch.nn.Conv1d(in_channels=C, | 
 |                                      out_channels=M, | 
 |                                      kernel_size=3, | 
 |                                      stride=2, | 
 |                                      padding=1, | 
 |                                      dilation=dilation, | 
 |                                      bias=bias, | 
 |                                      groups=groups).float() | 
 |             mkldnn_conv1d = mkldnn_utils.to_mkldnn(copy.deepcopy(conv1d)) | 
 |             with torch.backends.mkldnn.flags(enabled=False): | 
 |                 y_aten = conv1d(x) | 
 |             y_mkldnn = mkldnn_conv1d(x.to_mkldnn()).to_dense() | 
 |             self.assertEqual(y_aten, y_mkldnn) | 
 |  | 
 |             self._test_serialization(mkldnn_conv1d, (x.to_mkldnn(),)) | 
 |             self._test_tracing(mkldnn_conv1d, (x.to_mkldnn(),)) | 
 |  | 
 |     def test_conv2d(self): | 
 |         options = itertools.product([1, 4], [True, False], [1, 2]) | 
 |         for groups, bias, dilation in options: | 
 |             N = torch.randint(3, 10, (1,)).item() | 
 |             C = torch.randint(1, 3, (1,)).item() * groups | 
 |             M = torch.randint(1, 3, (1,)).item() * groups | 
 |             x = torch.randn(N, C, 224, 224, dtype=torch.float32) | 
 |             conv2d = torch.nn.Conv2d(in_channels=C, | 
 |                                      out_channels=M, | 
 |                                      kernel_size=3, | 
 |                                      stride=2, | 
 |                                      padding=1, | 
 |                                      dilation=dilation, | 
 |                                      bias=bias, | 
 |                                      groups=groups).float() | 
 |             mkldnn_conv2d = mkldnn_utils.to_mkldnn(copy.deepcopy(conv2d)) | 
 |             with torch.backends.mkldnn.flags(enabled=False): | 
 |                 y_aten = conv2d(x) | 
 |             y_mkldnn = mkldnn_conv2d(x.to_mkldnn()).to_dense() | 
 |             self.assertEqual(y_aten, y_mkldnn) | 
 |  | 
 |             self._test_serialization(mkldnn_conv2d, (x.to_mkldnn(),)) | 
 |             self._test_tracing(mkldnn_conv2d, (x.to_mkldnn(),)) | 
 |  | 
 |     def test_conv2d_legacy_jit_model(self): | 
 |         """ | 
 |         MKLDNN integration used to serialize models with 5d weight for grouped | 
 |         convolutions, we'd like to preserve this behavior | 
 |         """ | 
 |         g = 4 | 
 |         conv2d = torch.nn.Conv2d(16, 16, 3, groups=g) | 
 |         conv2d_mkldnn = torch.utils.mkldnn.to_mkldnn(conv2d) | 
 |  | 
 |         # contrive legacy conv2d module with a 5-d weight | 
 |         o, i, h, w = conv2d.weight.shape | 
 |         weight_5d = conv2d.weight.reshape((g, o // g, i, h, w)) | 
 |         conv2d_mkldnn.weight = weight_5d.to_mkldnn() | 
 |  | 
 |         x = torch.randn(1, 16, 8, 8) | 
 |  | 
 |         with TemporaryFileName() as fname: | 
 |             torch.jit.save(conv2d_mkldnn, fname) | 
 |             conv2d_loaded = torch.jit.load(fname) | 
 |  | 
 |             self.assertEqual(conv2d_mkldnn.weight.ndimension(), 5) | 
 |             self.assertEqual(conv2d_loaded.weight.ndimension(), 4) | 
 |             self.assertEqual( | 
 |                 conv2d(x), | 
 |                 conv2d_loaded(x.to_mkldnn()).to_dense()) | 
 |  | 
 |     def test_conv3d(self): | 
 |         for groups in [1, 4]: | 
 |             N = torch.randint(3, 10, (1,)).item() | 
 |             C = torch.randint(1, 3, (1,)).item() * groups | 
 |             M = torch.randint(1, 3, (1,)).item() * groups | 
 |             x = torch.randn(N, C, 55, 55, 55, dtype=torch.float32) | 
 |             for bias in [True, False]: | 
 |                 conv3d = torch.nn.Conv3d(in_channels=C, | 
 |                                          out_channels=M, | 
 |                                          kernel_size=3, | 
 |                                          stride=2, | 
 |                                          padding=1, | 
 |                                          bias=bias, | 
 |                                          groups=groups).float() | 
 |                 mkldnn_conv3d = mkldnn_utils.to_mkldnn(copy.deepcopy(conv3d)) | 
 |                 with torch.backends.mkldnn.flags(enabled=False): | 
 |                     y_aten = conv3d(x) | 
 |                 y_mkldnn = mkldnn_conv3d(x.to_mkldnn()).to_dense() | 
 |                 self.assertEqual(y_aten, y_mkldnn) | 
 |  | 
 |                 self._test_serialization(mkldnn_conv3d, (x.to_mkldnn(),)) | 
 |                 self._test_tracing(mkldnn_conv3d, (x.to_mkldnn(),)) | 
 |  | 
 |     def test_relu(self): | 
 |         x = torch.randn((4, 5), dtype=torch.float32) * 10 | 
 |         self.assertEqual(torch.relu(x), torch.relu(x.to_mkldnn()).to_dense()) | 
 |  | 
 |     def test_relu_(self): | 
 |         x1 = torch.randn((4, 5), dtype=torch.float32) * 10 | 
 |         x2 = x1.clone().to_mkldnn() | 
 |         self.assertEqual(torch.relu_(x1), torch.relu_(x2).to_dense()) | 
 |  | 
 |     def test_max_pool2d(self): | 
 |         N = torch.randint(3, 10, (1,)).item() | 
 |         C = torch.randint(3, 10, (1,)).item() | 
 |  | 
 |         for stride in [1, 2, 3]: | 
 |             for H, W in [(64, 64), (35, 39), (16, 19), [7, 8]]: | 
 |                 x = torch.randn(N, C, H, W, dtype=torch.float32) * 10 | 
 |  | 
 |                 for ceil_mode in [False, True]: | 
 |                     max_pool2d = torch.nn.MaxPool2d( | 
 |                         kernel_size=3 if not ceil_mode else 7, | 
 |                         stride=stride, | 
 |                         padding=1, | 
 |                         ceil_mode=ceil_mode) | 
 |  | 
 |                     self.assertEqual( | 
 |                         max_pool2d(x), | 
 |                         max_pool2d(x.to_mkldnn()).to_dense()) | 
 |  | 
 |     def test_max_pool2d_stride_none(self): | 
 |         N = torch.randint(3, 10, (1,)).item() | 
 |         C = torch.randint(3, 10, (1,)).item() | 
 |  | 
 |         for H, W in [(64, 64), (35, 39), (16, 19), [7, 8]]: | 
 |             x = torch.randn(N, C, H, W, dtype=torch.float32) * 10 | 
 |             for ceil_mode in [False, True]: | 
 |                 y1 = F.max_pool2d( | 
 |                     x, | 
 |                     kernel_size=3 if not ceil_mode else 7, | 
 |                     stride=None, | 
 |                     padding=1, | 
 |                     ceil_mode=ceil_mode) | 
 |  | 
 |                 y2 = F.max_pool2d( | 
 |                     x.to_mkldnn(), | 
 |                     kernel_size=3 if not ceil_mode else 7, | 
 |                     stride=None, | 
 |                     padding=1, | 
 |                     ceil_mode=ceil_mode) | 
 |  | 
 |                 self.assertEqual(y1, y2.to_dense()) | 
 |  | 
 |     def test_max_pool3d(self): | 
 |         N = torch.randint(3, 10, (1,)).item() | 
 |         C = torch.randint(3, 10, (1,)).item() | 
 |  | 
 |         for stride in [1, 2, 3]: | 
 |             for D, H, W in [(64, 64, 64), (35, 39, 35), (16, 19, 20), [7, 8, 9]]: | 
 |                 x = torch.randn(N, C, D, H, W, dtype=torch.float32) * 10 | 
 |  | 
 |                 for ceil_mode in [False, True]: | 
 |                     max_pool3d = torch.nn.MaxPool3d( | 
 |                         kernel_size=3 if not ceil_mode else 7, | 
 |                         stride=stride, | 
 |                         padding=1, | 
 |                         ceil_mode=ceil_mode) | 
 |  | 
 |                     self.assertEqual( | 
 |                         max_pool3d(x), | 
 |                         max_pool3d(x.to_mkldnn()).to_dense()) | 
 |  | 
 |     def test_max_pool_unsupported(self): | 
 |         # OneDNN not support dilation max_pooling, will be avilabled in v2.0. | 
 |         N = torch.randint(3, 10, (1,)).item() | 
 |         C = torch.randint(3, 10, (1,)).item() | 
 |  | 
 |         # 2d dilation case | 
 |         x = torch.randn(N, C, 7, 7, dtype=torch.float32).to_mkldnn() | 
 |         max_pool2d = torch.nn.MaxPool2d( | 
 |             kernel_size=3, | 
 |             stride=3, | 
 |             padding=1, | 
 |             dilation=2) | 
 |         self.assertRaisesRegex(RuntimeError, | 
 |                                'mkldnn_max_pool2d does not support dilation case', | 
 |                                lambda: max_pool2d(x)) | 
 |  | 
 |         # 3d dilation case | 
 |         x = torch.randn(N, C, 7, 7, 7, dtype=torch.float32).to_mkldnn() | 
 |         max_pool3d = torch.nn.MaxPool3d( | 
 |             kernel_size=3, | 
 |             stride=3, | 
 |             padding=1, | 
 |             dilation=2) | 
 |         self.assertRaisesRegex(RuntimeError, | 
 |                                'mkldnn_max_pool3d does not support dilation case', | 
 |                                lambda: max_pool3d(x)) | 
 |  | 
 |     def test_avg_pool2d(self): | 
 |         N = torch.randint(3, 10, (1,)).item() | 
 |         C = torch.randint(3, 10, (1,)).item() | 
 |         x = torch.randn(N, C, 64, 64, dtype=torch.float32) * 10 | 
 |  | 
 |         for count_include_pad in [True, False]: | 
 |             avg_pool2d = torch.nn.AvgPool2d( | 
 |                 kernel_size=3, | 
 |                 stride=2, | 
 |                 padding=1, | 
 |                 count_include_pad=count_include_pad) | 
 |  | 
 |             self.assertEqual( | 
 |                 avg_pool2d(x), | 
 |                 avg_pool2d(x.to_mkldnn()).to_dense()) | 
 |  | 
 |     def test_avg_pool2d_stride_none(self): | 
 |         N = torch.randint(3, 10, (1,)).item() | 
 |         C = torch.randint(3, 10, (1,)).item() | 
 |         x = torch.randn(N, C, 64, 64, dtype=torch.float32) * 10 | 
 |  | 
 |         for count_include_pad in [True, False]: | 
 |             y1 = F.avg_pool2d( | 
 |                 x, | 
 |                 kernel_size=3, | 
 |                 stride=None, | 
 |                 padding=1, | 
 |                 count_include_pad=count_include_pad) | 
 |             y2 = F.avg_pool2d( | 
 |                 x.to_mkldnn(), | 
 |                 kernel_size=3, | 
 |                 stride=None, | 
 |                 padding=1, | 
 |                 count_include_pad=count_include_pad) | 
 |  | 
 |             self.assertEqual(y1, y2.to_dense()) | 
 |  | 
 |     def test_avg_pool3d(self): | 
 |         N = torch.randint(3, 10, (1,)).item() | 
 |         C = torch.randint(3, 10, (1,)).item() | 
 |         x = torch.randn(N, C, 64, 64, 64, dtype=torch.float32) * 10 | 
 |  | 
 |         for count_include_pad in [True, False]: | 
 |             avg_pool3d = torch.nn.AvgPool3d( | 
 |                 kernel_size=3, | 
 |                 stride=2, | 
 |                 padding=1, | 
 |                 count_include_pad=count_include_pad) | 
 |  | 
 |             self.assertEqual( | 
 |                 avg_pool3d(x), | 
 |                 avg_pool3d(x.to_mkldnn()).to_dense()) | 
 |  | 
 |     def test_adaptive_avg_pool2d(self): | 
 |         N = torch.randint(3, 10, (1,)).item() | 
 |         C = torch.randint(3, 10, (1,)).item() | 
 |         x = torch.randn(N, C, 224, 224, dtype=torch.float32) * 100 | 
 |  | 
 |         adaptive_avg_pool2d = torch.nn.AdaptiveAvgPool2d(7) | 
 |  | 
 |         self.assertEqual( | 
 |             adaptive_avg_pool2d(x), | 
 |             adaptive_avg_pool2d(x.to_mkldnn()).to_dense()) | 
 |  | 
 |     def test_batch_norm2d(self): | 
 |         N = torch.randint(3, 10, (1,)).item() | 
 |         C = torch.randint(3, 100, (1,)).item() | 
 |         x = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10 | 
 |  | 
 |         # TODO: support training | 
 |         for train in [False]: | 
 |             bn = torch.nn.BatchNorm2d(C).float().train(train) | 
 |             mkldnn_bn = mkldnn_utils.to_mkldnn(copy.deepcopy(bn)) | 
 |             self.assertEqual( | 
 |                 bn(x), | 
 |                 mkldnn_bn(x.to_mkldnn()).to_dense()) | 
 |  | 
 |             self._test_serialization(mkldnn_bn, (x.to_mkldnn(),)) | 
 |             self._test_tracing(mkldnn_bn, (x.to_mkldnn(),)) | 
 |  | 
 |     def test_batch_norm3d(self): | 
 |         N = torch.randint(3, 10, (1,)).item() | 
 |         C = torch.randint(3, 100, (1,)).item() | 
 |         x = torch.randn(N, C, 30, 30, 30, dtype=torch.float32) * 10 | 
 |  | 
 |         # TODO: support training | 
 |         for train in [False]: | 
 |             bn = torch.nn.BatchNorm3d(C).float().train(train) | 
 |             mkldnn_bn = mkldnn_utils.to_mkldnn(copy.deepcopy(bn)) | 
 |             self.assertEqual( | 
 |                 bn(x), | 
 |                 mkldnn_bn(x.to_mkldnn()).to_dense()) | 
 |  | 
 |             self._test_serialization(mkldnn_bn, (x.to_mkldnn(),)) | 
 |             self._test_tracing(mkldnn_bn, (x.to_mkldnn(),)) | 
 |  | 
 |     def test_add(self): | 
 |         N = torch.randint(3, 10, (1,)).item() | 
 |         C = torch.randint(3, 100, (1,)).item() | 
 |         alpha = torch.randn(1, dtype=torch.float32).item() | 
 |  | 
 |         x = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10 | 
 |         y = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10 | 
 |         mx = x.to_mkldnn() | 
 |         my = y.to_mkldnn() | 
 |  | 
 |         # add | 
 |         self.assertEqual( | 
 |             x + y, | 
 |             (mx + my).to_dense()) | 
 |  | 
 |         self.assertEqual( | 
 |             torch.add(x, y, alpha=alpha), | 
 |             torch.add(mx, my, alpha=alpha).to_dense()) | 
 |  | 
 |         # add_ | 
 |         x += y | 
 |         mx += my | 
 |         self.assertEqual(x, mx.to_dense()) | 
 |  | 
 |         # add_out | 
 |         out = x.clone() | 
 |         mkldnn_out = out.to_mkldnn() | 
 |         torch.add(x, y, alpha=alpha, out=out) | 
 |         torch.add(mx, my, alpha=alpha, out=mkldnn_out) | 
 |         self.assertEqual(out, mkldnn_out.to_dense()) | 
 |  | 
 |     def test_mul(self): | 
 |         N = torch.randint(3, 10, (1,)).item() | 
 |         C = torch.randint(3, 100, (1,)).item() | 
 |         value = torch.randn(1, dtype=torch.float32).item() | 
 |  | 
 |         x = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10 | 
 |         y = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10 | 
 |         mx = x.to_mkldnn() | 
 |         my = y.to_mkldnn() | 
 |  | 
 |         # mul | 
 |         self.assertEqual( | 
 |             x * y, | 
 |             (mx * my).to_dense()) | 
 |  | 
 |         self.assertEqual( | 
 |             x * value, | 
 |             (mx * value).to_dense()) | 
 |  | 
 |         self.assertEqual( | 
 |             torch.mul(x, y), | 
 |             torch.mul(mx, my).to_dense()) | 
 |  | 
 |         self.assertEqual( | 
 |             torch.mul(x, value), | 
 |             torch.mul(mx, value).to_dense()) | 
 |  | 
 |         # mul_ | 
 |         x *= y | 
 |         mx *= my | 
 |         self.assertEqual(x, mx.to_dense()) | 
 |  | 
 |         x *= value | 
 |         mx *= value | 
 |         self.assertEqual(x, mx.to_dense()) | 
 |  | 
 |         # mul_out | 
 |         out = x.clone() | 
 |         mkldnn_out = out.to_mkldnn() | 
 |         torch.mul(x, y, out=out) | 
 |         torch.mul(mx, my, out=mkldnn_out) | 
 |         self.assertEqual(out, mkldnn_out.to_dense()) | 
 |  | 
 |         out = x.clone() | 
 |         mkldnn_out = out.to_mkldnn() | 
 |         torch.mul(x, value, out=out) | 
 |         torch.mul(mx, value, out=mkldnn_out) | 
 |         self.assertEqual(out, mkldnn_out.to_dense()) | 
 |  | 
 |     def test_view(self): | 
 |         x = torch.randn(3, 4, 5, dtype=torch.float32).to_mkldnn() | 
 |         self.assertRaisesRegex(RuntimeError, | 
 |                                "Change to use reshape", | 
 |                                lambda: x.view(x.size(0), -1)) | 
 |  | 
 |     def test_reshape(self): | 
 |         x = torch.randn(3, 4, 5, dtype=torch.float32) * 10 | 
 |         size = (x.size(0), -1) | 
 |  | 
 |         self.assertEqual( | 
 |             x.reshape(size), | 
 |             x.to_mkldnn().reshape(size).to_dense(), | 
 |         ) | 
 |         # test whether share same memory for plain format tensor | 
 |         y = x.to_mkldnn() | 
 |         z = y.reshape(size).add_(y.reshape(size)) | 
 |         self.assertEqual( | 
 |             y.reshape(size).to_dense(), | 
 |             z.to_dense(), | 
 |         ) | 
 |  | 
 |     def test_reshape_blocked_format(self): | 
 |         # construct an mkldnn blocked tensor with mkldnn conv2d | 
 |         C = 7 | 
 |         m = mkldnn_utils.to_mkldnn(torch.nn.Conv2d(C, C, 3)) | 
 |         x = torch.randn(1, C, 8, 8).to_mkldnn() | 
 |  | 
 |         # mkldnn tensor w/ blocked format | 
 |         y_block = m(x) | 
 |         # aten tensor w/ plain format | 
 |         y_plain = y_block.to_dense() | 
 |  | 
 |         y_block_reshape = y_block.reshape(C, -1) | 
 |         y_plain_reshape = y_plain.reshape(C, -1) | 
 |  | 
 |         self.assertEqual(y_plain_reshape, y_block_reshape.to_dense()) | 
 |  | 
 |     def test_clone(self): | 
 |         x = torch.randn(4, 5, dtype=torch.float32) * 10 | 
 |         self.assertEqual( | 
 |             x.clone(), | 
 |             x.to_mkldnn().clone().to_dense(), | 
 |         ) | 
 |         # test whether share same memory | 
 |         y = x.to_mkldnn() | 
 |         z = y.clone().add_(y) | 
 |         self.assertNotEqual( | 
 |             y.to_dense(), | 
 |             z.to_dense(), | 
 |         ) | 
 |  | 
 |     def test_transpose(self): | 
 |         x = torch.randn(3, 4, 5, dtype=torch.float32) * 10 | 
 |         for dim1 in range(x.ndim): | 
 |             for dim2 in range(x.ndim): | 
 |                 self.assertEqual( | 
 |                     x.transpose(dim1, dim2), | 
 |                     x.to_mkldnn().transpose(dim1, dim2).to_dense(), | 
 |                 ) | 
 |  | 
 |     def test_linear(self): | 
 |         in_features = torch.randint(3, 10, (1,)).item() | 
 |         out_features = torch.randint(3, 100, (1,)).item() | 
 |         x = torch.randn(3, in_features, dtype=torch.float32) * 10 | 
 |  | 
 |         for bias in [True, False]: | 
 |             linear = torch.nn.Linear(in_features, out_features, bias=bias).float() | 
 |             mkldnn_linear = mkldnn_utils.to_mkldnn(copy.deepcopy(linear)) | 
 |             self.assertEqual( | 
 |                 linear(x), | 
 |                 mkldnn_linear(x.to_mkldnn()).to_dense()) | 
 |  | 
 |             self._test_serialization(mkldnn_linear, (x.to_mkldnn(),)) | 
 |             self._test_tracing(mkldnn_linear, (x.to_mkldnn(),)) | 
 |  | 
 |     def test_softmax(self): | 
 |         x = torch.randn(3, 4, 5, dtype=torch.float32) * 10 | 
 |         for dim in range(x.ndim): | 
 |             softmax = torch.nn.Softmax(dim=dim) | 
 |             self.assertEqual( | 
 |                 softmax(x), | 
 |                 softmax(x.to_mkldnn()).to_dense()) | 
 |  | 
 |     def test_sigmoid(self): | 
 |         x = torch.randn(4, 5, dtype=torch.float32) * 10 | 
 |         mkldnn_x = x.to_mkldnn() | 
 |         self.assertEqual( | 
 |             torch.sigmoid(x), | 
 |             torch.sigmoid(mkldnn_x).to_dense(), | 
 |         ) | 
 |         # inplace | 
 |         torch.sigmoid_(x) | 
 |         torch.sigmoid_(mkldnn_x) | 
 |         self.assertEqual(x, mkldnn_x.to_dense()) | 
 |  | 
 |     def _test_serialization(self, module, inputs): | 
 |         with TemporaryFileName() as fname: | 
 |             torch.jit.save(module, fname) | 
 |             loaded = torch.jit.load(fname) | 
 |             self.assertEqual( | 
 |                 module(*inputs).to_dense(), | 
 |                 loaded(*inputs).to_dense()) | 
 |  | 
 |     def _test_tracing(self, module, inputs): | 
 |         traced = torch.jit.trace(module, inputs, check_trace=False) | 
 |         self.assertEqual( | 
 |             module(*inputs).to_dense(), | 
 |             traced(*inputs).to_dense()) | 
 |  | 
 |     def test_set_data_tensorimpl_type(self): | 
 |         # Dense tensor has impl of type `TensorImpl`, while MKL-DNN tensor has impl | 
 |         # of type `OpaqueTensorImpl<IDeepTensorWrapperPtr>`. | 
 |         x = torch.randn((1, 2), dtype=torch.float, device=torch.device('cpu')) | 
 |         x_mkldnn = x.to_mkldnn() | 
 |         with self.assertRaisesRegex(RuntimeError, 'incompatible tensor type'): | 
 |             x.data = x_mkldnn | 
 |  | 
 |     def test_empty(self): | 
 |         x1 = torch.empty(4, 5, 2, 3, dtype=torch.float32) | 
 |         x2 = torch.empty(4, 5, 2, 3, dtype=torch.float32, layout=torch._mkldnn) | 
 |         self.assertEqual(x1.size(), x2.to_dense().size()) | 
 |         self.assertEqual(x1.dtype, x2.to_dense().dtype) | 
 |  | 
 |     def test_zero_(self): | 
 |         x1 = torch.randn(4, 5, dtype=torch.float32) * 10 | 
 |         x2 = x1.clone().to_mkldnn() | 
 |         self.assertEqual( | 
 |             x1.zero_(), | 
 |             x2.zero_().to_dense(), | 
 |         ) | 
 |  | 
 |     def test_is_mkldnn(self): | 
 |         x = torch.randn(1, dtype=torch.float32) | 
 |         self.assertFalse(x.is_mkldnn) | 
 |         self.assertTrue(x.to_mkldnn().is_mkldnn) | 
 |  | 
 |     # legacy constructor/new doesn't support mkldnn tensors | 
 |     def test_legacy_new_failure(self): | 
 |         x = torch.randn(1, dtype=torch.float32) | 
 |         x_mkldnn = x.to_mkldnn() | 
 |         self.assertRaises(RuntimeError, lambda: x_mkldnn.new(device='cpu')) | 
 |         self.assertRaises(RuntimeError, lambda: x_mkldnn.new(x.storage())) | 
 |         self.assertRaises(RuntimeError, lambda: x_mkldnn.new(x)) | 
 |         self.assertRaises(RuntimeError, lambda: x_mkldnn.new(torch.Size([2, 3]))) | 
 |         self.assertRaises(RuntimeError, lambda: x_mkldnn.new([6])) | 
 |  | 
 |     def test_is_mkldnn_jit(self): | 
 |         class EnsureMkldnn(torch.jit.ScriptModule): | 
 |             @torch.jit.script_method | 
 |             def forward(self, x): | 
 |                 if not x.is_mkldnn: | 
 |                     x = x.to_mkldnn() | 
 |                 return x | 
 |  | 
 |         m = EnsureMkldnn() | 
 |         x = torch.randn(1, dtype=torch.float32) | 
 |         self.assertTrue(m(x).is_mkldnn) | 
 |         self.assertTrue(m(x.to_mkldnn()).is_mkldnn) | 
 |  | 
 |     def _test_imagenet_model(self, model): | 
 |         model = model.train(False).float() | 
 |         mkldnn_model = mkldnn_utils.to_mkldnn(copy.deepcopy(model)) | 
 |         x = torch.randn(1, 3, 224, 224, dtype=torch.float32) | 
 |         with torch.no_grad(): | 
 |             self.assertEqual( | 
 |                 model(x), | 
 |                 mkldnn_model(x.to_mkldnn()).to_dense(), | 
 |             ) | 
 |  | 
 |     @skipIfNoTorchVision | 
 |     def test_resnet18(self): | 
 |         model = torchvision.models.resnet.resnet18(pretrained=False) | 
 |         self._test_imagenet_model(model) | 
 |  | 
 |     @skipIfNoTorchVision | 
 |     def test_resnext50_32x4d(self): | 
 |         model = torchvision.models.resnet.resnext50_32x4d(pretrained=False) | 
 |         self._test_imagenet_model(model) | 
 |  | 
 |  | 
 | if __name__ == '__main__': | 
 |     run_tests() |