[aotinductor] Relax ExternKernel kwargs checking (#111167) Summary: When a fallback kernel is called without specifying any kwargs, we still need to fill in default values for those kwargs when generating cpp call. Pull Request resolved: https://github.com/pytorch/pytorch/pull/111167 Approved by: https://github.com/chenyang78, https://github.com/jgong5

commit: 73d288fdf9d0beb76229cabc8566ee116f8a21a2 [log] [tgz]
author: Bin Bao <binbao@meta.com> Fri Oct 13 15:10:57 2023 -0700
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Sat Oct 14 21:41:33 2023 +0000
tree: 36b4f0e4ff2abadff05b1f732fd1b98479c3a16e
parent: 5caf2e55d4c59d3562d5e3e91c22698451edcb80 [diff]
diff --git a/test/inductor/test_aot_inductor.py b/test/inductor/test_aot_inductor.py
index d6b4557..7d8cdb3 100644
--- a/test/inductor/test_aot_inductor.py
+++ b/test/inductor/test_aot_inductor.py

@@ -143,10 +143,13 @@
     with torch.no_grad(), config.patch(
         "aot_inductor.abi_compatible", self.abi_compatible
     ):
+        torch.manual_seed(0)
         model = model.to(self.device)
         ref_model = copy.deepcopy(model)
         ref_inputs = copy.deepcopy(example_inputs)
         expected = ref_model(*ref_inputs)
+
+        torch.manual_seed(0)
         actual = AOTInductorModelRunner.run(model, example_inputs, options, constraints)
 
     self.assertTrue(same(actual, expected))
@@ -162,10 +165,13 @@
     with torch.no_grad(), config.patch(
         "aot_inductor.abi_compatible", self.abi_compatible
     ):
+        torch.manual_seed(0)
         model = model.to(self.device)
         ref_model = copy.deepcopy(model)
         ref_inputs = copy.deepcopy(list_example_inputs)
         list_expected = [ref_model(*inputs) for inputs in ref_inputs]
+
+        torch.manual_seed(0)
         list_actual = AOTInductorModelRunner.run_multiple(
             model, list_example_inputs, options, constraints
         )
@@ -885,6 +891,16 @@
             torch.float32, self.device == "cuda"
         )
 
+    def test_normal_functional(self):
+        class Model(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, x):
+                return torch.ops.aten.normal_functional.default(x)
+
+        self.check_model(Model(), (torch.empty(4, 1, 4, 4),))
+
 
 class AOTInductorTestABICompatibleCpu(TestCase):
     device = "cpu"
@@ -907,6 +923,7 @@
         # TODO: test_freezing_abi_compatible_cpu somehow fails on CI but not locally,
         #   NotImplementedError: Cannot access storage of OpaqueTensorImpl
         "test_freezing": TestFailure(("abi_compatible_cpu",), is_skip=True),
+        "test_normal_functional": TestFailure(("abi_compatible_cpu",)),
         "test_poi_multiple_dynamic": TestFailure(("abi_compatible_cpu",)),
         "test_sdpa": TestFailure(("abi_compatible_cpu",)),
         "test_sdpa_2": TestFailure(("abi_compatible_cpu",)),
@@ -927,6 +944,9 @@
     AOTInductorTestABICompatibleCuda,
     "abi_compatible_cuda",
     # test_failures, xfail by default, set is_skip=True to skip
+    {
+        "test_normal_functional": TestFailure(("abi_compatible_cuda",)),
+    },
 )
 
 

diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py
index 1441362..f5cae04 100644
--- a/torch/_inductor/ir.py
+++ b/torch/_inductor/ir.py

@@ -3397,21 +3397,22 @@
         )
 
     def codegen_kwargs(self):
-        kwargs = []
-        if self.kwargs:
-            if V.graph.cpp_wrapper:
-                # TODO: use native_functions.yaml as the ground truth
+        if V.graph.cpp_wrapper:
+            # FIXME: we should unconditionally fill self.kwargs with missing default values
+            # instead of carrying an extra self.ordered_kwargs_for_cpp_kernel
+            if self.kwargs:
                 assert (
                     self.ordered_kwargs_for_cpp_kernel
-                ), "ordered_kwargs_for_cpp_kernel has to be provided"
-                for arg_name in self.ordered_kwargs_for_cpp_kernel:
-                    v = self.get_kwargs_value(arg_name)
-                    kwargs.append(V.graph.wrapper_code.val_to_arg_str(v))
-            else:
-                kwargs = [
-                    f"{k}={V.graph.wrapper_code.val_to_arg_str(v)}"
-                    for k, v in self.kwargs.items()
-                ]
+                ), "ordered_kwargs_for_cpp_kernel is missing"
+            kwargs = []
+            for arg_name in self.ordered_kwargs_for_cpp_kernel:
+                v = self.get_kwargs_value(arg_name)
+                kwargs.append(V.graph.wrapper_code.val_to_arg_str(v))
+        else:
+            kwargs = [
+                f"{k}={V.graph.wrapper_code.val_to_arg_str(v)}"
+                for k, v in self.kwargs.items()
+            ]
         return kwargs
 
     def codegen_size_asserts(self, wrapper):
commit	73d288fdf9d0beb76229cabc8566ee116f8a21a2	[log] [tgz]
author	Bin Bao <binbao@meta.com>	Fri Oct 13 15:10:57 2023 -0700
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Sat Oct 14 21:41:33 2023 +0000
tree	36b4f0e4ff2abadff05b1f732fd1b98479c3a16e
parent	5caf2e55d4c59d3562d5e3e91c22698451edcb80 [diff]