[SPMD] Remove test_grad_layout_1devicemodule_2replicaperprocess (#54826)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/54826

This test will no longer work, because we errored out SPMD in #54454.

This test is already disabled.
ghstack-source-id: 125602473

Test Plan: N/A

Reviewed By: rohan-varma

Differential Revision: D27381719

fbshipit-source-id: a3079ff0766f91112cbe58c1f00c1b02d241c8cd
diff --git a/test/distributed/test_c10d.py b/test/distributed/test_c10d.py
index 617c52f..dee9411 100644
--- a/test/distributed/test_c10d.py
+++ b/test/distributed/test_c10d.py
@@ -3678,22 +3678,6 @@
         local_batch_size = 8
         self._test_grad_layout(replica_devices, layer_devs, local_batch_size)
 
-    @unittest.skipIf(
-        True, "Re-enable when DDP with multiple GPUs per process is confirmed to work"
-    )
-    @requires_nccl()
-    @skip_if_lt_x_gpu(4)
-    def test_grad_layout_1devicemodule_2replicaperprocess(self):
-        int_devices = gpus_for_rank(self.world_size)[self.rank][:2]
-        dev0 = torch.device("cuda:" + str(int_devices[0]))
-        dev1 = torch.device("cuda:" + str(int_devices[1]))
-        # Tells DDP to replicate the model to both of this process's devices.
-        replica_devices = [dev0, dev1]
-        # Tells _test_grad_layout to construct ConvNet with all layers on this process's first assigned device.
-        layer_devs = dev0
-        local_batch_size = 16
-        self._test_grad_layout(replica_devices, layer_devs, local_batch_size)
-
     @requires_nccl()
     @skip_if_lt_x_gpu(4)
     @skip_if_rocm