fix torchvhsion failed case test_classification_model on slow_conv2d Pull Request resolved: https://github.com/pytorch/pytorch/pull/77347 Approved by: https://github.com/datumbox, https://github.com/frank-wei

commit: 2b7943c47c8561a46103488b0fe9a592b87dc5bb [log] [tgz]
author: mingfeima <mingfei.ma@intel.com> Thu May 12 20:06:23 2022 +0800
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Fri May 13 08:04:08 2022 +0000
tree: ad9d2b5e91333cf111e53a504c07e8dd86d1b9bd
parent: 39bd37f34f4a6bfc72af3f8829452a8dfc67d081 [diff]
diff --git a/aten/src/ATen/native/Convolution.cpp b/aten/src/ATen/native/Convolution.cpp
index 2bfa197..b4387f1 100644
--- a/aten/src/ATen/native/Convolution.cpp
+++ b/aten/src/ATen/native/Convolution.cpp

@@ -1407,6 +1407,7 @@
     case ConvBackend::SlowTranspose2d:
     case ConvBackend::SlowTranspose3d:
       input = input.contiguous(backend_memory_format);
+      weight = weight.contiguous(backend_memory_format);
       if (params.groups == 1) {
         output = _convolution_nogroup_backend(input, weight, bias, backend, params);
       } else {
@@ -1905,6 +1906,7 @@
     case ConvBackend::SlowTranspose3d:
     {
       input = input.contiguous(backend_memory_format);
+      weight = weight.contiguous(backend_memory_format);
       if (params.groups == 1) {
         std::tie(backend_grad_input, backend_grad_weight, backend_grad_bias) =
           _convolution_backward_nogroup_backend(

diff --git a/test/test_nn.py b/test/test_nn.py
index 91c5b70..3768ff2 100644
--- a/test/test_nn.py
+++ b/test/test_nn.py

@@ -19165,12 +19165,12 @@
     @onlyCPU
     @dtypes(torch.float, torch.double)
     def test_conv_thnn_nhwc(self, device, dtype):
-        def helper(n, c, h, w, out_channels, kernel_size, dilation, groups):
+        def helper(n, c, h, w, out_channels, kernel_size, dilation, groups, weight_memory_format):
             input = torch.randint(-3, 3, (n, c, h, w), dtype=dtype, device=device)\
                 .to(memory_format=torch.channels_last)
             input.requires_grad_()
             conv = nn.Conv2d(c, out_channels, kernel_size, dilation=dilation, groups=groups)\
-                .to(device='cpu', dtype=dtype, memory_format=torch.channels_last)
+                .to(device='cpu', dtype=dtype, memory_format=weight_memory_format)
             for p in conv.parameters():
                 p.data = torch.randint_like(p, -3, 3)
 
@@ -19197,15 +19197,16 @@
             self.assertEqual(input.grad, ref_input.grad, exact_dtype=False)
 
         with torch.backends.mkldnn.flags(enabled=False):
-            # non-dilated conv: thnn_conv2d normal path (with im2col)
-            helper(2, 8, 4, 4, out_channels=4, kernel_size=3, dilation=1, groups=1)
-            helper(2, 8, 4, 4, out_channels=8, kernel_size=3, dilation=1, groups=8)
-            # non-dilated conv: thnn_conv2d fast path (skip im2col)
-            helper(1, 16, 56, 56, out_channels=16, kernel_size=1, dilation=1, groups=1)
-            helper(1, 16, 56, 56, out_channels=16, kernel_size=1, dilation=1, groups=16)
-            # dilated conv: slow_conv_dilated2d
-            helper(2, 8, 11, 13, out_channels=16, kernel_size=3, dilation=2, groups=1)
-            helper(2, 16, 11, 13, out_channels=32, kernel_size=3, dilation=2, groups=16)
+            for mf in [torch.contiguous_format, torch.channels_last]:
+                # non-dilated conv: thnn_conv2d normal path (with im2col)
+                helper(2, 8, 4, 4, out_channels=4, kernel_size=3, dilation=1, groups=1, weight_memory_format=mf)
+                helper(2, 8, 4, 4, out_channels=8, kernel_size=3, dilation=1, groups=8, weight_memory_format=mf)
+                # non-dilated conv: thnn_conv2d fast path (skip im2col)
+                helper(1, 16, 56, 56, out_channels=16, kernel_size=1, dilation=1, groups=1, weight_memory_format=mf)
+                helper(1, 16, 56, 56, out_channels=16, kernel_size=1, dilation=1, groups=16, weight_memory_format=mf)
+                # dilated conv: slow_conv_dilated2d
+                helper(2, 8, 11, 13, out_channels=16, kernel_size=3, dilation=2, groups=1, weight_memory_format=mf)
+                helper(2, 16, 11, 13, out_channels=32, kernel_size=3, dilation=2, groups=16, weight_memory_format=mf)
 
     @onlyCUDA
     @skipCUDAIfRocmVersionLessThan((4, 3))
commit	2b7943c47c8561a46103488b0fe9a592b87dc5bb	[log] [tgz]
author	mingfeima <mingfei.ma@intel.com>	Thu May 12 20:06:23 2022 +0800
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Fri May 13 08:04:08 2022 +0000
tree	ad9d2b5e91333cf111e53a504c07e8dd86d1b9bd
parent	39bd37f34f4a6bfc72af3f8829452a8dfc67d081 [diff]