Dont skip NCCL backend when testing all_reduce_cuda (#48231)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/48231
Noticed that these tests were being skipped with NCCL backend, but
there doesn't appear to be a valid reason to. Enabled these tests and verify
that they pass with 500 stress runs.
ghstack-source-id: 117085209
Test Plan: CI
Reviewed By: SciPioneer
Differential Revision: D25079030
fbshipit-source-id: 8204288ffbd387375a1a86fe8c07243cfd855549
diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py
index 30f48ac..86f9392 100644
--- a/torch/testing/_internal/distributed/distributed_test.py
+++ b/torch/testing/_internal/distributed/distributed_test.py
@@ -1359,8 +1359,8 @@
)
@unittest.skipIf(
- BACKEND != "gloo",
- "Only Gloo backend will have CUDA allReduce tested",
+ BACKEND != "gloo" and BACKEND != "nccl",
+ "Only Gloo and NCCL backends will have CUDA allReduce tested",
)
@skip_if_no_gpu
def test_all_reduce_sum_cuda(self):
@@ -1424,8 +1424,8 @@
dist.all_reduce(_build_tensor(1, dtype=torch.cfloat), unsupported_op, group_id)
@unittest.skipIf(
- BACKEND != "gloo",
- "Only Gloo backend will have CUDA allReduce tested",
+ BACKEND != "gloo" and BACKEND != "nccl",
+ "Only Gloo and NCCL backends will have CUDA allReduce tested",
)
@skip_if_no_gpu
def test_all_reduce_sum_cuda_complex(self):