fix torchvhsion failed case test_classification_model on slow_conv2d
Pull Request resolved: https://github.com/pytorch/pytorch/pull/77347
Approved by: https://github.com/datumbox, https://github.com/frank-wei
diff --git a/aten/src/ATen/native/Convolution.cpp b/aten/src/ATen/native/Convolution.cpp
index 2bfa197..b4387f1 100644
--- a/aten/src/ATen/native/Convolution.cpp
+++ b/aten/src/ATen/native/Convolution.cpp
@@ -1407,6 +1407,7 @@
case ConvBackend::SlowTranspose2d:
case ConvBackend::SlowTranspose3d:
input = input.contiguous(backend_memory_format);
+ weight = weight.contiguous(backend_memory_format);
if (params.groups == 1) {
output = _convolution_nogroup_backend(input, weight, bias, backend, params);
} else {
@@ -1905,6 +1906,7 @@
case ConvBackend::SlowTranspose3d:
{
input = input.contiguous(backend_memory_format);
+ weight = weight.contiguous(backend_memory_format);
if (params.groups == 1) {
std::tie(backend_grad_input, backend_grad_weight, backend_grad_bias) =
_convolution_backward_nogroup_backend(
diff --git a/test/test_nn.py b/test/test_nn.py
index 91c5b70..3768ff2 100644
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -19165,12 +19165,12 @@
@onlyCPU
@dtypes(torch.float, torch.double)
def test_conv_thnn_nhwc(self, device, dtype):
- def helper(n, c, h, w, out_channels, kernel_size, dilation, groups):
+ def helper(n, c, h, w, out_channels, kernel_size, dilation, groups, weight_memory_format):
input = torch.randint(-3, 3, (n, c, h, w), dtype=dtype, device=device)\
.to(memory_format=torch.channels_last)
input.requires_grad_()
conv = nn.Conv2d(c, out_channels, kernel_size, dilation=dilation, groups=groups)\
- .to(device='cpu', dtype=dtype, memory_format=torch.channels_last)
+ .to(device='cpu', dtype=dtype, memory_format=weight_memory_format)
for p in conv.parameters():
p.data = torch.randint_like(p, -3, 3)
@@ -19197,15 +19197,16 @@
self.assertEqual(input.grad, ref_input.grad, exact_dtype=False)
with torch.backends.mkldnn.flags(enabled=False):
- # non-dilated conv: thnn_conv2d normal path (with im2col)
- helper(2, 8, 4, 4, out_channels=4, kernel_size=3, dilation=1, groups=1)
- helper(2, 8, 4, 4, out_channels=8, kernel_size=3, dilation=1, groups=8)
- # non-dilated conv: thnn_conv2d fast path (skip im2col)
- helper(1, 16, 56, 56, out_channels=16, kernel_size=1, dilation=1, groups=1)
- helper(1, 16, 56, 56, out_channels=16, kernel_size=1, dilation=1, groups=16)
- # dilated conv: slow_conv_dilated2d
- helper(2, 8, 11, 13, out_channels=16, kernel_size=3, dilation=2, groups=1)
- helper(2, 16, 11, 13, out_channels=32, kernel_size=3, dilation=2, groups=16)
+ for mf in [torch.contiguous_format, torch.channels_last]:
+ # non-dilated conv: thnn_conv2d normal path (with im2col)
+ helper(2, 8, 4, 4, out_channels=4, kernel_size=3, dilation=1, groups=1, weight_memory_format=mf)
+ helper(2, 8, 4, 4, out_channels=8, kernel_size=3, dilation=1, groups=8, weight_memory_format=mf)
+ # non-dilated conv: thnn_conv2d fast path (skip im2col)
+ helper(1, 16, 56, 56, out_channels=16, kernel_size=1, dilation=1, groups=1, weight_memory_format=mf)
+ helper(1, 16, 56, 56, out_channels=16, kernel_size=1, dilation=1, groups=16, weight_memory_format=mf)
+ # dilated conv: slow_conv_dilated2d
+ helper(2, 8, 11, 13, out_channels=16, kernel_size=3, dilation=2, groups=1, weight_memory_format=mf)
+ helper(2, 16, 11, 13, out_channels=32, kernel_size=3, dilation=2, groups=16, weight_memory_format=mf)
@onlyCUDA
@skipCUDAIfRocmVersionLessThan((4, 3))