Simplify Dispatcher case for zero arguments (#104613)

MSVC detects calling  Dispatcher::callWithDispatchKeySlowPath with zero arguments.  This PR fixes it and simplifies code.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/104613
Approved by: https://github.com/ezyang
diff --git a/aten/src/ATen/core/dispatch/Dispatcher.h b/aten/src/ATen/core/dispatch/Dispatcher.h
index 0066786..b794f37 100644
--- a/aten/src/ATen/core/dispatch/Dispatcher.h
+++ b/aten/src/ATen/core/dispatch/Dispatcher.h
@@ -589,27 +589,27 @@
   auto dispatchKey = dispatchKeySet.highestPriorityTypeId();
   auto& schema = op.schema();
   auto schema_ref = std::reference_wrapper<const FunctionSchema>(schema);
-  if (guard.needsInputs()) {
-    constexpr auto num_boxed_args = impl::boxed_size<Args...>();
-    // If we used std::array<IValue, num_boxed_args> here, we would
-    // have to spend time default constructing the IValues in
-    // boxedArgs. aligned_storage has no such requirement.
-    // Max to avoid zero-size array.`
-    std::aligned_storage_t<sizeof(IValue), alignof(IValue)> boxedArgs[std::max(num_boxed_args, static_cast<size_t>(1))];
-    // For debugging only; could be removed (but the compiler will do
-    // that for us and it's nice to have the extra assurance of
-    // correctness from our debug builds).
-    int lastArgIdx = 0;
-    impl::boxArgsToStack(boxedArgs, lastArgIdx, args...);
-    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(lastArgIdx == num_boxed_args);
-    // I don't *think* we need std::launder here, because IValue has
-    // no subclasses and no const or reference fields. (We also
-    // couldn't use it even if we wanted to because we are currently
-    // stuck on C++14 rather than C++17, but we could do a backport
-    // similar to folly::launder if needed.)
-    runRecordFunction(guard, schema_ref, dispatchKey, c10::ArrayRef<const c10::IValue>(reinterpret_cast<IValue *>(boxedArgs), num_boxed_args));
-    for (size_t ii = 0; ii < num_boxed_args; ++ii) {
-      reinterpret_cast<IValue *>(&boxedArgs[ii])->~IValue();
+  constexpr auto num_boxed_args = impl::boxed_size<Args...>();
+  if constexpr (num_boxed_args != 0) {
+    if (guard.needsInputs()) {
+      // If we used std::array<IValue, num_boxed_args> here, we would
+      // have to spend time default constructing the IValues in
+      // boxedArgs. aligned_storage has no such requirement.
+      impl::IValueAlignedStorage boxedArgs[num_boxed_args];
+      // For debugging only; could be removed (but the compiler will do
+      // that for us and it's nice to have the extra assurance of
+      // correctness from our debug builds).
+      int lastArgIdx = 0;
+      impl::boxArgsToStack(boxedArgs, lastArgIdx, args...);
+      TORCH_INTERNAL_ASSERT_DEBUG_ONLY(lastArgIdx == num_boxed_args);
+      // I don't *think* we need std::launder here, because IValue has
+      // no subclasses and no const or reference fields.
+      runRecordFunction(guard, schema_ref, dispatchKey, c10::ArrayRef<const c10::IValue>(reinterpret_cast<IValue *>(boxedArgs), num_boxed_args));
+      for (size_t ii = 0; ii < num_boxed_args; ++ii) {
+        reinterpret_cast<IValue *>(&boxedArgs[ii])->~IValue();
+      }
+    } else {
+      runRecordFunction(guard, schema_ref, dispatchKey);
     }
   } else {
     runRecordFunction(guard, schema_ref, dispatchKey);