Fix empty cpu tensor output in cudagraph (#114144)
We can ignore empty cpu tensors
Differential Revision: [D51472324](https://our.internmc.facebook.com/intern/diff/D51472324)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/114144
Approved by: https://github.com/davidberard98
diff --git a/test/inductor/test_cudagraph_trees.py b/test/inductor/test_cudagraph_trees.py
index d950a12..50b0065 100644
--- a/test/inductor/test_cudagraph_trees.py
+++ b/test/inductor/test_cudagraph_trees.py
@@ -808,6 +808,19 @@
# didnt do additional recordings
self.assertTrue(self.get_manager().new_graph_id().id == 2)
+ def test_empty_cpu_tensor(self):
+ def foo(x):
+ return x @ x, torch.tensor([])
+
+ foo_opt = torch.compile(foo)
+ x = torch.rand([4], device="cuda")
+
+ for _ in range(3):
+ out_opt = foo_opt(x)
+ self.assertEqual(foo(x), out_opt)
+
+ self.assertTrue(self.get_manager().new_graph_id().id == 1)
+
def test_output_alias(self):
inp = torch.rand([20, 20], device="cuda")
diff --git a/torch/_inductor/cudagraph_trees.py b/torch/_inductor/cudagraph_trees.py
index 6abef96..73a0deb 100644
--- a/torch/_inductor/cudagraph_trees.py
+++ b/torch/_inductor/cudagraph_trees.py
@@ -1137,7 +1137,7 @@
continue
torch._check(
- o.is_cuda,
+ o.is_cuda or o.untyped_storage().data_ptr() == 0,
lambda: (
"Expected all cuda outputs in cuda graph recording. Non cuda output "
f"from {self.stack_traces[i] if self.stack_traces else '(unknown)'}"