Fixed an issue where a user-specified default device clashed with the… (#114560) … device placement of the RNG. This PR now ignores the user-specified default device, allocates the tensor on the CPU and then moves the tensor to the device of the input tensor. This was more or less already the standard procedure in case the default device wasn't set. Fixes #114536. Pull Request resolved: https://github.com/pytorch/pytorch/pull/114560 Approved by: https://github.com/soulitzer

commit: b6df8414601e1e086e830ca9e919e7fdc8874e71 [log] [tgz]
author: Tobias Ringwald <github@ringwald.email> Wed Nov 29 17:45:45 2023 +0000
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Wed Nov 29 17:45:49 2023 +0000
tree: 1056c338d155bcda4d0a947d8660a9ebe7935ad0
parent: b20330ef810fcbe33bed8a248d3b15f8e4b3ba38 [diff]
diff --git a/test/test_autograd.py b/test/test_autograd.py
index f91324b..0c8fe14 100644
--- a/test/test_autograd.py
+++ b/test/test_autograd.py

@@ -8743,6 +8743,18 @@
         self.assertEqual(y, y2)
         self.assertEqual(y_expected, y2_expected)
 
+    @unittest.skipIf(not TEST_CUDA, "test requires CUDA")
+    def test_gradcheck_default_device_placement_context(self):
+        # During gradcheck with fast_mode=True, we create a random vector on the CPU device using a CPU generator.
+        # This test ensures that this still works when the default device is set to something else by the user.
+        with torch.device('cuda'):
+            x = torch.randn(3, dtype=torch.double, requires_grad=True)
+
+            def func(inp):
+                return inp ** 2.0
+
+            self.assertTrue(gradcheck(func, x, fast_mode=True))
+
 def index_perm_variable(shape, max_indices):
     if not isinstance(shape, tuple):
         shape = (shape,)

diff --git a/torch/autograd/gradcheck.py b/torch/autograd/gradcheck.py
index dff8321..d4650e9 100644
--- a/torch/autograd/gradcheck.py
+++ b/torch/autograd/gradcheck.py

@@ -1668,7 +1668,7 @@
             .view(x_values.shape)
         )
         values /= values.norm()
-        vec = torch.sparse_coo_tensor(x._indices(), values, x.size())
+        vec = torch.sparse_coo_tensor(x._indices(), values, x.size(), device=x.device)
     elif _is_sparse_compressed_tensor(x):
         if x.layout in {torch.sparse_csr, torch.sparse_bsr}:
             compressed_indices, plain_indices = x.crow_indices(), x.col_indices()
@@ -1683,7 +1683,12 @@
         )
         values /= values.norm()
         vec = torch.sparse_compressed_tensor(
-            compressed_indices, plain_indices, values, x.size(), layout=x.layout
+            compressed_indices,
+            plain_indices,
+            values,
+            x.size(),
+            layout=x.layout,
+            device=x.device,
         )
     else:
         dtype = _to_real_dtype(x.dtype) if downcast_complex else x.dtype
@@ -1785,13 +1790,20 @@
 def _make_vectors(inp_tensors, outputs, *, use_forward_ad):
     # Use our own generator to avoid messing with the user's RNG state
     g_cpu = torch.Generator()
+
+    def _vec_from_tensor_cpu(*args):
+        # Default allocate all tensors on CPU, so they are on the same device as the generator
+        # even if the user specified a default device
+        with torch.device("cpu"):
+            return _vec_from_tensor(*args)
+
     all_u = []
     all_u_dense = []
     for inp in inp_tensors:
-        ur = _vec_from_tensor(inp, g_cpu, True)
+        ur = _vec_from_tensor_cpu(inp, g_cpu, True)
         ur_dense = _to_flat_dense_if_sparse(ur)
         if inp.is_complex():
-            ui = _vec_from_tensor(inp, g_cpu, True)
+            ui = _vec_from_tensor_cpu(inp, g_cpu, True)
             all_u.append((ur, ui))
             ui_dense = _to_flat_dense_if_sparse(ui)
             all_u_dense.append((ur_dense, ui_dense))
@@ -1799,7 +1811,9 @@
             all_u.append(ur)
             all_u_dense.append(ur_dense)
     all_v = (
-        None if use_forward_ad else [_vec_from_tensor(out, g_cpu) for out in outputs]
+        None
+        if use_forward_ad
+        else [_vec_from_tensor_cpu(out, g_cpu) for out in outputs]
     )
     return all_v, all_u, all_u_dense
commit	b6df8414601e1e086e830ca9e919e7fdc8874e71	[log] [tgz]
author	Tobias Ringwald <github@ringwald.email>	Wed Nov 29 17:45:45 2023 +0000
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Wed Nov 29 17:45:49 2023 +0000
tree	1056c338d155bcda4d0a947d8660a9ebe7935ad0
parent	b20330ef810fcbe33bed8a248d3b15f8e4b3ba38 [diff]