[aotinductor] Relax ExternKernel kwargs checking (#111167)
Summary: When a fallback kernel is called without specifying any kwargs, we still need to fill in default values for those kwargs when generating cpp call.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/111167
Approved by: https://github.com/chenyang78, https://github.com/jgong5
diff --git a/test/inductor/test_aot_inductor.py b/test/inductor/test_aot_inductor.py
index d6b4557..7d8cdb3 100644
--- a/test/inductor/test_aot_inductor.py
+++ b/test/inductor/test_aot_inductor.py
@@ -143,10 +143,13 @@
with torch.no_grad(), config.patch(
"aot_inductor.abi_compatible", self.abi_compatible
):
+ torch.manual_seed(0)
model = model.to(self.device)
ref_model = copy.deepcopy(model)
ref_inputs = copy.deepcopy(example_inputs)
expected = ref_model(*ref_inputs)
+
+ torch.manual_seed(0)
actual = AOTInductorModelRunner.run(model, example_inputs, options, constraints)
self.assertTrue(same(actual, expected))
@@ -162,10 +165,13 @@
with torch.no_grad(), config.patch(
"aot_inductor.abi_compatible", self.abi_compatible
):
+ torch.manual_seed(0)
model = model.to(self.device)
ref_model = copy.deepcopy(model)
ref_inputs = copy.deepcopy(list_example_inputs)
list_expected = [ref_model(*inputs) for inputs in ref_inputs]
+
+ torch.manual_seed(0)
list_actual = AOTInductorModelRunner.run_multiple(
model, list_example_inputs, options, constraints
)
@@ -885,6 +891,16 @@
torch.float32, self.device == "cuda"
)
+ def test_normal_functional(self):
+ class Model(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, x):
+ return torch.ops.aten.normal_functional.default(x)
+
+ self.check_model(Model(), (torch.empty(4, 1, 4, 4),))
+
class AOTInductorTestABICompatibleCpu(TestCase):
device = "cpu"
@@ -907,6 +923,7 @@
# TODO: test_freezing_abi_compatible_cpu somehow fails on CI but not locally,
# NotImplementedError: Cannot access storage of OpaqueTensorImpl
"test_freezing": TestFailure(("abi_compatible_cpu",), is_skip=True),
+ "test_normal_functional": TestFailure(("abi_compatible_cpu",)),
"test_poi_multiple_dynamic": TestFailure(("abi_compatible_cpu",)),
"test_sdpa": TestFailure(("abi_compatible_cpu",)),
"test_sdpa_2": TestFailure(("abi_compatible_cpu",)),
@@ -927,6 +944,9 @@
AOTInductorTestABICompatibleCuda,
"abi_compatible_cuda",
# test_failures, xfail by default, set is_skip=True to skip
+ {
+ "test_normal_functional": TestFailure(("abi_compatible_cuda",)),
+ },
)
diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py
index 1441362..f5cae04 100644
--- a/torch/_inductor/ir.py
+++ b/torch/_inductor/ir.py
@@ -3397,21 +3397,22 @@
)
def codegen_kwargs(self):
- kwargs = []
- if self.kwargs:
- if V.graph.cpp_wrapper:
- # TODO: use native_functions.yaml as the ground truth
+ if V.graph.cpp_wrapper:
+ # FIXME: we should unconditionally fill self.kwargs with missing default values
+ # instead of carrying an extra self.ordered_kwargs_for_cpp_kernel
+ if self.kwargs:
assert (
self.ordered_kwargs_for_cpp_kernel
- ), "ordered_kwargs_for_cpp_kernel has to be provided"
- for arg_name in self.ordered_kwargs_for_cpp_kernel:
- v = self.get_kwargs_value(arg_name)
- kwargs.append(V.graph.wrapper_code.val_to_arg_str(v))
- else:
- kwargs = [
- f"{k}={V.graph.wrapper_code.val_to_arg_str(v)}"
- for k, v in self.kwargs.items()
- ]
+ ), "ordered_kwargs_for_cpp_kernel is missing"
+ kwargs = []
+ for arg_name in self.ordered_kwargs_for_cpp_kernel:
+ v = self.get_kwargs_value(arg_name)
+ kwargs.append(V.graph.wrapper_code.val_to_arg_str(v))
+ else:
+ kwargs = [
+ f"{k}={V.graph.wrapper_code.val_to_arg_str(v)}"
+ for k, v in self.kwargs.items()
+ ]
return kwargs
def codegen_size_asserts(self, wrapper):