create benchmark example tensors with correct sizes (#106238) We need to consider the node's offset when we create benchmark example tensors with test_cat_addmm. Otherwise, we would fail with applying torch.as_strided to the return tensor value. Pull Request resolved: https://github.com/pytorch/pytorch/pull/106238 Approved by: https://github.com/jansel

commit: f11090288c66c492db3d119ba51a95dd6f1f80f2 [log] [tgz]
author: Yang Chen <yangche@meta.com> Sat Jul 29 06:59:24 2023 +0000
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Tue Aug 01 01:14:53 2023 +0000
tree: f4cf1503198b4acbf342e29407f5bd186dc3b52c
parent: 03e85be9b0b2a7842a4152e7f6c3d7f2048ab5c5 [diff]
diff --git a/test/inductor/test_max_autotune.py b/test/inductor/test_max_autotune.py
index ad343f0..1c20897 100644
--- a/test/inductor/test_max_autotune.py
+++ b/test/inductor/test_max_autotune.py

@@ -165,6 +165,31 @@
         with config.patch({"max_autotune": True, "autotune_in_subproc": True}):
             torch.compile(addmm, dynamic=dynamic)(x, a, b)
 
+    def test_cat_addmm(self):
+        def fn(a: torch.Tensor, b: torch.Tensor, c: torch.Tensor):
+            return torch.cat(
+                [
+                    torch.addmm(a, b, c),
+                    torch.addmm(b, c, a),
+                ],
+                1,
+            )
+
+        args = [
+            torch.randn(4, 4, device="cuda"),
+            torch.randn(4, 4, device="cuda"),
+            torch.randn(4, 4, device="cuda"),
+        ]
+        with config.patch(
+            {
+                "max_autotune": True,
+                "max_autotune_gemm_backends": "Triton",
+            }
+        ):
+            expected = fn(*args)
+            actual = torch.compile(fn)(*args)
+            torch.testing.assert_close(actual, expected, atol=1e-2, rtol=1e-2)
+
     def test_triton_template_with_epilogues_and_dynamic_shape(self):
         def fn(
             x: torch.Tensor, w: torch.Tensor, bias: torch.Tensor, mul: torch.Tensor

diff --git a/torch/_inductor/select_algorithm.py b/torch/_inductor/select_algorithm.py
index 7cd12b2..3b128e5 100644
--- a/torch/_inductor/select_algorithm.py
+++ b/torch/_inductor/select_algorithm.py

@@ -931,6 +931,7 @@
             V.graph.sizevars.size_hints(node.get_stride()),
             device=node.get_device(),
             dtype=node.get_dtype(),
+            extra_size=node.layout.offset,
         )
 
     @staticmethod
commit	f11090288c66c492db3d119ba51a95dd6f1f80f2	[log] [tgz]
author	Yang Chen <yangche@meta.com>	Sat Jul 29 06:59:24 2023 +0000
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Tue Aug 01 01:14:53 2023 +0000
tree	f4cf1503198b4acbf342e29407f5bd186dc3b52c
parent	03e85be9b0b2a7842a4152e7f6c3d7f2048ab5c5 [diff]