remove unnecessary BoxedKernelWrapper specialization now that ops are all c10-full (#50963)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/50963

Test Plan: Imported from OSS

Reviewed By: bhosmer

Differential Revision: D26026665

Pulled By: bdhirsh

fbshipit-source-id: ef6e515f7dae5052538789e5b75dc551b4ce3b11
diff --git a/aten/src/ATen/core/boxing/KernelFunction_test.cpp b/aten/src/ATen/core/boxing/KernelFunction_test.cpp
index e17efab..b07eedd 100644
--- a/aten/src/ATen/core/boxing/KernelFunction_test.cpp
+++ b/aten/src/ATen/core/boxing/KernelFunction_test.cpp
@@ -143,30 +143,6 @@
 }
 
 void boxed_func_for_outofplace_multi_op(const OperatorHandle& /*opHandle*/, Stack* stack) {
-  // (Tensor(a!), Tensor(b!), Scalar, Scalar) -> (Tensor(a!), Tensor(b!))
-  EXPECT_EQ(4, stack->size());
-
-  ASSERT_TRUE(stack->at(0).isTensor());
-  auto t1 = stack->at(0).toTensor();
-
-  ASSERT_TRUE(stack->at(1).isTensor());
-  auto t2 = stack->at(1).toTensor();
-
-  ASSERT_TRUE(stack->at(2).isScalar());
-  auto s1 = stack->at(2).toScalar();
-
-  ASSERT_TRUE(stack->at(3).isScalar());
-  auto s2 = stack->at(3).toScalar();
-
-  t1.add_(s1);
-  t2.add_(s2);
-
-  stack->clear();
-  torch::jit::push(stack, t1);
-  torch::jit::push(stack, t2);
-}
-
-void boxed_func_for_legacy_outofplace_multi_op(const OperatorHandle& /*opHandle*/, Stack* stack) {
   // (Scalar, Scalar, Tensor(a!), Tensor(b!)) -> (Tensor(a!), Tensor(b!))
   EXPECT_EQ(4, stack->size());
 
@@ -275,26 +251,6 @@
 void expectOutOfPlaceMultiBoxedCallingWorks(const KernelFunction& func) {
   OperatorHandle dummy = makeDummyOperatorHandle();
 
-  auto t1 = at::zeros({1});
-  auto t2 = at::zeros({1});
-  auto s1 = 1.0f;
-  auto s2 = 2.0f;
-  vector<IValue> stack {t1, t2, s1, s2};
-  func.callBoxed(dummy, &stack);
-
-  // kernel should have updated output args and returned them on the stack
-  EXPECT_EQ(t1.item().toFloat(), 1.0f);
-  EXPECT_EQ(t2.item().toFloat(), 2.0f);
-  EXPECT_EQ(2, stack.size());
-  EXPECT_TRUE(stack[0].isTensor());
-  EXPECT_TRUE(stack[0].toTensor().is_same(t1));
-  EXPECT_TRUE(stack[1].isTensor());
-  EXPECT_TRUE(stack[1].toTensor().is_same(t2));
-}
-
-void expectLegacyOutOfPlaceMultiBoxedCallingWorks(const KernelFunction& func) {
-  OperatorHandle dummy = makeDummyOperatorHandle();
-
   auto s1 = 1.0f;
   auto s2 = 2.0f;
   auto t1 = at::zeros({1});
@@ -395,31 +351,6 @@
 void expectOutOfPlaceMultiUnboxedCallingWorks(const KernelFunction& func) {
   OperatorHandle dummy = makeDummyOperatorHandle();
 
-  auto t1 = at::zeros({1});
-  auto t2 = at::zeros({1});
-  auto s1 = 1.0f;
-  auto s2 = 2.0f;
-
-  std::tuple<at::Tensor&, at::Tensor&> tup = func.call<
-    std::tuple<at::Tensor&, at::Tensor&>, at::Tensor&, at::Tensor&, at::Scalar, at::Scalar
-  >(dummy, t1, t2, s1, s2);
-
-  // kernel should have updated out args and returned them in a tuple
-  EXPECT_EQ(t1.item().toFloat(), 1.0f);
-  EXPECT_EQ(t2.item().toFloat(), 2.0f);
-
-  auto t1_out = std::get<0>(tup);
-  EXPECT_EQ(t1_out.item().toFloat(), 1.0f);
-  EXPECT_TRUE(t1_out.is_same(t1));
-
-  auto t2_out = std::get<1>(tup);
-  EXPECT_EQ(t2_out.item().toFloat(), 2.0f);
-  EXPECT_TRUE(t2_out.is_same(t2));
-}
-
-void expectLegacyOutOfPlaceMultiUnboxedCallingWorks(const KernelFunction& func) {
-  OperatorHandle dummy = makeDummyOperatorHandle();
-
   auto s1 = 1.0f;
   auto s2 = 2.0f;
   auto t1 = at::zeros({1});
@@ -478,11 +409,6 @@
   kernels::expectOutOfPlaceMultiBoxedCallingWorks(func);
 }
 
-TEST(KernelFunctionTest, givenBoxedFunction_withLegacyOutOfPlaceMultiSignature_whenCallingBoxed_thenWorks) {
-  KernelFunction func = KernelFunction::makeFromBoxedFunction<&kernels::boxed_func_for_legacy_outofplace_multi_op>();
-  kernels::expectLegacyOutOfPlaceMultiBoxedCallingWorks(func);
-}
-
 // functional, unboxed calling
 
 TEST(KernelFunctionTest, givenBoxedFunction_withReturn_whenCallingUnboxed_thenWorks) {
@@ -517,11 +443,6 @@
   kernels::expectOutOfPlaceMultiUnboxedCallingWorks(func);
 }
 
-TEST(KernelFunctionTest, givenBoxedFunction_withLegacyOutOfPlaceMultiSignature_whenCallingUnboxed_thenWorks) {
-  KernelFunction func = KernelFunction::makeFromBoxedFunction<&kernels::boxed_func_for_legacy_outofplace_multi_op>();
-  kernels::expectLegacyOutOfPlaceMultiUnboxedCallingWorks(func);
-}
-
 // functors etc.
 
 TEST(KernelFunctionTest, givenUnboxedFunctor_withReturn_whenCallingBoxed_thenWorks) {
diff --git a/aten/src/ATen/core/boxing/impl/boxing.h b/aten/src/ATen/core/boxing/impl/boxing.h
index 4f9ae1f..94acfb9 100644
--- a/aten/src/ATen/core/boxing/impl/boxing.h
+++ b/aten/src/ATen/core/boxing/impl/boxing.h
@@ -196,16 +196,13 @@
 };
 
 //
-// 3. in-place and legacy out-of-place ops take a single non-const Tensor
-// reference as their first argument, and return it.
+// 3. in-place ops take a single non-const Tensor reference
+// as their first argument, and return it.
 //
 // Note: all signatures matching this pattern are are assumed to be for such ops.
 // Because of this, the generated BoxedKernelWrapper specializations simply
 // return the in-place argument.
 //
-// TODO update comment when legacy out-of-place signatures no longer need
-// to be supported, due to hacky_wrapper reordering
-//
 
 template <class... OtherArgs>
 struct BoxedKernelWrapper<
@@ -243,9 +240,8 @@
   at::Tensor&(FirstArg, RestArgs...),
   std::enable_if_t<
     can_box_all<FirstArg, RestArgs...>::value
-    // this skips over in-place (and legacy out-of-place) kernels with a non-const Tensor
+    // this skips over in-place kernels with a non-const Tensor
     // arg at the front, so those can unambiguously trigger the preceding specialization.
-    // TODO update comment when hacky_wrapper reorders legacy out-of-place signatures
     && !is_mutable_tensor_ref<FirstArg>::value,
     void
   >
@@ -280,23 +276,7 @@
 struct BoxedKernelWrapper<
   Result(Args...),
   std::enable_if_t<
-    can_box_all<Args...>::value && is_tuple_of_mutable_tensor_refs<Result>::value
-    // this test skips over legacy kernels with out args at the front, so they can trigger
-    // the specialization that follows.
-    // note: this test is complicated by the fact that boolean value expressions in templates
-    // don't shortcut. some signatures have a result tuple that's wider than the arg list, and
-    // without the length limiting ternary these will cause a template evaluation error on this
-    // test, even if a length check precedes it in the conjunction.
-    // TODO remove when hacky_wrapper reorders legacy kernel out args
-    && !std::is_same<
-        Result,
-        guts::typelist::to_tuple_t<
-          guts::typelist::take_t<
-            guts::typelist::typelist<Args...>,
-            sizeof...(Args) >= std::tuple_size<Result>::value ? std::tuple_size<Result>::value : sizeof...(Args)
-          >
-        >
-      >::value,
+    can_box_all<Args...>::value && is_tuple_of_mutable_tensor_refs<Result>::value,
     void
   >
 > {
@@ -327,55 +307,5 @@
   }
 };
 
-//
-// 6. legacy trap for old-school multi-return out functions with mutable args
-// at start rather than end of arg list.
-// TODO remove when hacky_wrapper reorders legacy kernel out args
-//
-
-template <class Result, class... Args>
-struct BoxedKernelWrapper<
-  Result(Args...),
-  std::enable_if_t<
-    can_box_all<Args...>::value && is_tuple_of_mutable_tensor_refs<Result>::value
-    // this test fires passes for legacy kernels with out args at the front.
-    // note: this test is complicated by the fact that boolean value expressions in templates
-    // don't shortcut. some signatures have a result tuple that's wider than the arg list, and
-    // without the length limiting ternary these will cause a template evaluation error on this
-    // test, even if a length check precedes it in the conjunction.
-    && std::is_same<
-        Result,
-        guts::typelist::to_tuple_t<
-          guts::typelist::take_t<
-            guts::typelist::typelist<Args...>,
-            sizeof...(Args) >= std::tuple_size<Result>::value ? std::tuple_size<Result>::value : sizeof...(Args)
-          >
-        >
-      >::value,
-    void
-  >
-> {
-  static Result call(
-    KernelFunction::InternalBoxedKernelFunction* boxed_kernel_func,
-    OperatorKernel* functor,
-    const OperatorHandle& opHandle,
-    Args... args
-  ) {
-    using ArgTuple = std::tuple<Args...>;
-    constexpr int RetCount = std::tuple_size<Result>();
-
-    torch::jit::Stack stack = boxArgs(args...);
-    (*boxed_kernel_func)(functor, opHandle, &stack);
-    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
-      stack.size() == RetCount,
-      "Boxed kernel was expected to return ", RetCount, " values on the stack, ",
-      "but instead returned ", stack.size(), " values."
-    );
-
-    auto legacy_result = guts::tuple_take<ArgTuple, RetCount>(ArgTuple{args...});
-    return legacy_result;
-  }
-};
-
 } // impl
 } // c10