Fix failure of test_dynamo_distributed & test_inductor_collectives (#117741) When CUDA is not available `c10d.init_process_group("nccl"...)` will fail with > RuntimeError: ProcessGroupNCCL is only supported with GPUs, no GPUs found! Hence add a corresponding skip marker to the classes deriving from DynamoDistributedSingleProcTestCase next to the `requires_nccl` marker. Pull Request resolved: https://github.com/pytorch/pytorch/pull/117741 Approved by: https://github.com/ezyang, https://github.com/malfet

commit: b5b36cf0c4e1958f1ff25120f5d4beeef3288187 [log] [tgz]
author: Alexander Grund <alexander.grund@tu-dresden.de> Thu Jan 25 13:25:36 2024 +0000
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Thu Jan 25 13:25:36 2024 +0000
tree: ad0b9090127849b6e7d113a03073741db9b4007b
parent: ee1dbb2acf3f9b304c5f47b919e8de0fe8fee45c [diff]
diff --git a/test/distributed/test_dynamo_distributed.py b/test/distributed/test_dynamo_distributed.py
index 00d85c3..bc3705b 100644
--- a/test/distributed/test_dynamo_distributed.py
+++ b/test/distributed/test_dynamo_distributed.py

@@ -30,6 +30,7 @@
     requires_nccl,
     _dynamo_dist_per_rank_init,
 )
+from torch.testing._internal.common_utils import requires_cuda
 import torch._dynamo.logging
 from torch.testing._internal.common_cuda import (
     PLATFORM_SUPPORTS_FLASH_ATTENTION, PLATFORM_SUPPORTS_MEM_EFF_ATTENTION
@@ -546,6 +547,7 @@
 
 
 @requires_nccl()
+@requires_cuda
 class TestSingleProc(DynamoDistributedSingleProcTestCase):
     """
     Test harness initializes dist process group.

diff --git a/test/distributed/test_inductor_collectives.py b/test/distributed/test_inductor_collectives.py
index 85c34cc..82a59bf 100644
--- a/test/distributed/test_inductor_collectives.py
+++ b/test/distributed/test_inductor_collectives.py

@@ -19,6 +19,7 @@
     requires_nccl,
     skip_if_lt_x_gpu,
 )
+from torch.testing._internal.common_utils import requires_cuda
 from torch._inductor.compile_fx import compile_fx as inductor_compile_fx
 from torch.utils._triton import has_triton
 from torch._inductor.utils import run_and_get_triton_code
@@ -524,6 +525,7 @@
 
 
 @requires_nccl()
+@requires_cuda
 class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
     """
     Prefer single-proc test runner for basic tests as it is easier to work with.

diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py
index 98d45d9..e08dcf8 100644
--- a/torch/testing/_internal/common_utils.py
+++ b/torch/testing/_internal/common_utils.py

@@ -1295,6 +1295,7 @@
     # other libraries take up about 11% of space per process
     torch.cuda.set_per_process_memory_fraction(round(1 / num_procs - .11, 2))
 
+requires_cuda = unittest.skipUnless(torch.cuda.is_available(), "Requires CUDA")
 
 def skipIfCrossRef(fn):
     @wraps(fn)
commit	b5b36cf0c4e1958f1ff25120f5d4beeef3288187	[log] [tgz]
author	Alexander Grund <alexander.grund@tu-dresden.de>	Thu Jan 25 13:25:36 2024 +0000
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Thu Jan 25 13:25:36 2024 +0000
tree	ad0b9090127849b6e7d113a03073741db9b4007b
parent	ee1dbb2acf3f9b304c5f47b919e8de0fe8fee45c [diff]