[PyTorch] Add IValue::toDimVector & mostly replace toIntVector with it (#71247)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/71247

Most uses of toIntVector() were for a Tensor shape. We have DimVector to avoid heap allocations in those cases, so let's use it.
ghstack-source-id: 146933314

Test Plan: CI -- if we think DimVector is good in general then I think we have to think this change is good?

Reviewed By: mikeiovine

Differential Revision: D33556198

fbshipit-source-id: cf2ad92c2d0b99ab1df4da0f6843e6ccb9a6320b
diff --git a/aten/src/ATen/core/ivalue.h b/aten/src/ATen/core/ivalue.h
index 88769da..d22ad5a 100644
--- a/aten/src/ATen/core/ivalue.h
+++ b/aten/src/ATen/core/ivalue.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <ATen/core/DimVector.h>
 #include <ATen/core/TensorBody.h>
 #include <ATen/core/blob.h>
 #include <ATen/core/custom_class.h>
@@ -576,6 +577,7 @@
   c10::List<int64_t> toIntList() &&;
   c10::List<int64_t> toIntList() const&;
   std::vector<int64_t> toIntVector() const;
+  at::DimVector toDimVector() const;
 
   // ConstantString
   IValue(c10::intrusive_ptr<ivalue::ConstantString> v);
diff --git a/aten/src/ATen/core/ivalue_inl.h b/aten/src/ATen/core/ivalue_inl.h
index 946f46e..87a3229 100644
--- a/aten/src/ATen/core/ivalue_inl.h
+++ b/aten/src/ATen/core/ivalue_inl.h
@@ -1661,16 +1661,21 @@
 }
 
 template <typename T>
-static std::vector<T> createVectorFromList(const c10::detail::ListImpl* impl) {
-  std::vector<T> result;
+static T createVectorLikeFromList(const c10::detail::ListImpl* impl) {
+  T result;
   result.reserve(impl->list.size());
   for (size_t i = 0, N = impl->list.size(); i < N; ++i) {
-    result.push_back(impl->list[i].to<T>());
+    result.push_back(impl->list[i].to<typename T::value_type>());
   }
   return result;
 }
 
 template <typename T>
+static std::vector<T> createVectorFromList(const c10::detail::ListImpl* impl) {
+  return createVectorLikeFromList<std::vector<T>>(impl);
+}
+
+template <typename T>
 std::vector<T> createVectorFromList(const c10::List<T>& impl) {
   std::vector<T> result;
   result.reserve(impl.size());
@@ -1805,6 +1810,14 @@
   return createVectorFromList<int64_t>(
       static_cast<const c10::detail::ListImpl*>(payload.u.as_intrusive_ptr));
 }
+inline at::DimVector IValue::toDimVector() const {
+  AT_ASSERT(isIntList(), "Expected IntList but got ", tagKind());
+  TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
+      payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(),
+      "called toDimVector on null intrusive_ptr IValue");
+  return createVectorLikeFromList<at::DimVector>(
+      static_cast<const c10::detail::ListImpl*>(payload.u.as_intrusive_ptr));
+}
 inline c10::List<double> IValue::toDoubleList() && {
   AT_ASSERT(isDoubleList(), "Expected DoubleList but got ", tagKind());
   return c10::List<double>(moveToIntrusivePtr<c10::detail::ListImpl>());
diff --git a/aten/src/ATen/test/ivalue_test.cpp b/aten/src/ATen/test/ivalue_test.cpp
index 2de6108..f86bcec 100644
--- a/aten/src/ATen/test/ivalue_test.cpp
+++ b/aten/src/ATen/test/ivalue_test.cpp
@@ -32,6 +32,7 @@
   ASSERT_EQ(foo2.toDouble(), 4.0);
   ASSERT_EQ(foo.use_count(), 2);
   ASSERT_TRUE(baz.toIntVector() == std::vector<int64_t>({3, 4, 5}));
+  ASSERT_TRUE(baz.toDimVector() == at::DimVector({3, 4, 5}));
 
   auto move_it = std::move(baz).toIntList();
   ASSERT_EQ(foo.use_count(), 2);
diff --git a/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp b/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp
index e639801..22a13fb 100644
--- a/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp
+++ b/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp
@@ -292,7 +292,7 @@
   TORCH_INTERNAL_ASSERT(weight_ival.isTensor());
   weight = weight_ival.toTensor();
 
-  auto shape = pop(stack).toIntVector();
+  auto shape = pop(stack).toDimVector();
   auto input = pop(stack).toTensor();
 
   at::Tensor dst, mean, rstd;
diff --git a/torch/csrc/jit/runtime/register_prim_ops.cpp b/torch/csrc/jit/runtime/register_prim_ops.cpp
index 3022614..832bae8 100644
--- a/torch/csrc/jit/runtime/register_prim_ops.cpp
+++ b/torch/csrc/jit/runtime/register_prim_ops.cpp
@@ -2350,7 +2350,7 @@
           size.reserve(8);
           for (const auto i : c10::irange(num_inputs)) {
             size =
-                at::infer_size(size, peek(stack, i, num_inputs).toIntVector());
+                at::infer_size(size, peek(stack, i, num_inputs).toDimVector());
           }
           drop(stack, num_inputs);
           push(stack, IValue(size));
@@ -2474,12 +2474,12 @@
         [](Stack& stack) {
           IValue self_size, other_size;
           pop(stack, self_size, other_size);
-          auto s = self_size.toIntVector();
-          auto o = other_size.toIntVector();
+          auto s = self_size.toDimVector();
+          auto o = other_size.toDimVector();
           if (s == o) {
-            push(stack, IValue());
+            stack.emplace_back();
           } else {
-            push(stack, s);
+            stack.emplace_back(std::move(self_size));
           }
         },
         aliasAnalysisFromSchema()),
diff --git a/torch/csrc/jit/runtime/register_prim_ops_fulljit.cpp b/torch/csrc/jit/runtime/register_prim_ops_fulljit.cpp
index 76268ee..bd58ffc 100644
--- a/torch/csrc/jit/runtime/register_prim_ops_fulljit.cpp
+++ b/torch/csrc/jit/runtime/register_prim_ops_fulljit.cpp
@@ -166,7 +166,7 @@
            if (size.isNone()) {
              push(stack, std::move(self));
            } else {
-             push(stack, at::sum_to(self.toTensor(), size.toIntVector()));
+             push(stack, at::sum_to(self.toTensor(), size.toDimVector()));
            }
          },
          aliasAnalysisFromSchema()),
@@ -722,7 +722,7 @@
   if (int_ivalue.isInt()) {
     scale_factor_double = static_cast<double>(int_ivalue.toInt());
   } else if (int_ivalue.isIntList()) {
-    auto int_list = int_ivalue.toIntVector();
+    auto int_list = int_ivalue.toDimVector();
     std::vector<double> double_vec(int_list.begin(), int_list.end());
     scale_factor_double = double_vec;
   } else if (int_ivalue.isNone()) {
diff --git a/torch/csrc/jit/runtime/register_special_ops.cpp b/torch/csrc/jit/runtime/register_special_ops.cpp
index 6df7222..6d61a44 100644
--- a/torch/csrc/jit/runtime/register_special_ops.cpp
+++ b/torch/csrc/jit/runtime/register_special_ops.cpp
@@ -251,7 +251,7 @@
 
           auto result = at::split_with_sizes(
               (std::move(peek(stack, 0, 3))).toTensor(),
-              (std::move(peek(stack, 1, 3))).toIntVector(),
+              (std::move(peek(stack, 1, 3))).toDimVector(),
               (std::move(peek(stack, 2, 3))).toInt());
           drop(stack, 3);
           pack(stack, std::move(result));
@@ -322,7 +322,7 @@
         [](Stack& stack) {
           auto a = pop(stack);
           auto b = pop(stack);
-          push(stack, at::infer_size(a.toIntVector(), b.toIntVector()));
+          push(stack, at::infer_size(a.toDimVector(), b.toDimVector()));
         },
         aliasAnalysisFromSchema()),
     OperatorGenerator(
diff --git a/torch/csrc/jit/runtime/static/native_ops.cpp b/torch/csrc/jit/runtime/static/native_ops.cpp
index 85da072..09788c4 100644
--- a/torch/csrc/jit/runtime/static/native_ops.cpp
+++ b/torch/csrc/jit/runtime/static/native_ops.cpp
@@ -268,7 +268,7 @@
       }
       return [](ProcessedNode* p_node) {
         const auto& in0_t = p_node->Input(0).toTensor();
-        const auto in1_iv = p_node->Input(1).toIntVector();
+        const auto in1_iv = p_node->Input(1).toDimVector();
         p_node->Output(0) = at::native::permute(in0_t, in1_iv);
       };
     });
@@ -284,7 +284,7 @@
       }
       return [](ProcessedNode* p_node) {
         const auto& in0_t = p_node->Input(0).toTensor();
-        const auto in1_iv = p_node->Input(1).toIntVector();
+        const auto in1_iv = p_node->Input(1).toDimVector();
         p_node->Output(0) = at::native::reshape(in0_t, in1_iv);
       };
     });
diff --git a/torch/csrc/jit/runtime/static/ops.cpp b/torch/csrc/jit/runtime/static/ops.cpp
index 29f27e0..233fc30 100644
--- a/torch/csrc/jit/runtime/static/ops.cpp
+++ b/torch/csrc/jit/runtime/static/ops.cpp
@@ -91,10 +91,11 @@
 at::Tensor& reshape_copy_out(
     at::Tensor& out,
     const at::Tensor& self,
-    const std::vector<int64_t>& proposed_shape,
+    at::IntArrayRef proposed_shape,
     bool infer_size) {
-  auto shape = infer_size ? at::infer_size(proposed_shape, self.numel())
-                          : proposed_shape;
+  auto shape = infer_size
+      ? at::infer_size(proposed_shape, self.numel())
+      : std::vector<int64_t>(proposed_shape.begin(), proposed_shape.end());
   at::native::resize_(out, shape, c10::nullopt);
 
   auto self_contig = self.expect_contiguous();
@@ -1659,7 +1660,7 @@
       TORCH_CHECK(n->inputs().size() == 2);
       return [](ProcessedNode* p_node) {
         const auto& self = p_node->Input(0).toTensor(); // self
-        const auto proposed_shape = p_node->Input(1).toIntVector(); // shape
+        const auto proposed_shape = p_node->Input(1).toDimVector(); // shape
 
         if (p_node->Output(0).isNone()) {
           p_node->Output(0) = create_empty_from(self);
@@ -1836,7 +1837,7 @@
   }
   return [](ProcessedNode* p_node) {
     const auto& self = p_node->Input(0).toTensor();
-    const auto repeats = p_node->Input(1).toIntVector();
+    const auto repeats = p_node->Input(1).toDimVector();
 
     if (p_node->Output(0).isNone()) {
       p_node->Output(0) = at::native::repeat(self, repeats);
@@ -2030,7 +2031,7 @@
       return [](ProcessedNode* p_node) {
         // ignore Input(5): `bool cudnn_enable=True`
         const auto& input = p_node->Input(0).toTensor();
-        const auto normalized_shape = p_node->Input(1).toIntVector();
+        const auto normalized_shape = p_node->Input(1).toDimVector();
         auto weight_opt = p_node->Input(2).toOptional<at::Tensor>();
         auto bias_opt = p_node->Input(3).toOptional<at::Tensor>();
         float eps = p_node->Input(4).toDouble();
@@ -2124,7 +2125,7 @@
       at::cpu::norm_outf(
           in0_t,
           in1_s,
-          p_node->Input(2).toIntVector(), // dim
+          p_node->Input(2).toDimVector(), // dim
           p_node->Input(3).toBool(), // keepdim
           p_node->Input(4).toScalarType(), // dtype
           out_t);
@@ -2145,7 +2146,7 @@
       at::cpu::norm_outf(
           in0_t,
           in1_s,
-          p_node->Input(2).toIntVector(), // dim
+          p_node->Input(2).toDimVector(), // dim
           p_node->Input(3).toBool(), // keepdim
           out_t);
     };
@@ -2311,7 +2312,7 @@
     return nullptr;
   }
   return [](ProcessedNode* p_node) {
-    const auto& size = p_node->Input(0).toIntVector();
+    const auto& size = p_node->Input(0).toDimVector();
     const auto fill_value = p_node->Input(1).toScalar();
     if (p_node->Output(0).isNone()) {
       const auto dtype = p_node->Input(2).toOptional<c10::ScalarType>();
@@ -2380,7 +2381,7 @@
           "aten::linalg_norm(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor"))) {
     return [](ProcessedNode* p_node) {
       const auto& input = p_node->Input(0).toTensor();
-      const auto dim = p_node->Input(2).toIntVector();
+      const auto dim = p_node->Input(2).toDimVector();
       const auto keepdim = p_node->Input(3).toBool();
       const auto dtype = p_node->Input(4).toOptional<c10::ScalarType>();
       if (p_node->Output(0).isNone()) {
@@ -2407,7 +2408,7 @@
           "aten::linalg_norm.ord_str(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor"))) {
     return [](ProcessedNode* p_node) {
       const auto& input = p_node->Input(0).toTensor();
-      const auto dim = p_node->Input(2).toIntVector();
+      const auto dim = p_node->Input(2).toDimVector();
       const auto keepdim = p_node->Input(3).toBool();
       const auto dtype = p_node->Input(4).toOptional<c10::ScalarType>();
       if (p_node->Output(0).isNone()) {
diff --git a/torch/csrc/jit/runtime/static/ops.h b/torch/csrc/jit/runtime/static/ops.h
index 3b7fa62..81cf88f 100644
--- a/torch/csrc/jit/runtime/static/ops.h
+++ b/torch/csrc/jit/runtime/static/ops.h
@@ -9,7 +9,7 @@
 at::Tensor& reshape_copy_out(
     at::Tensor& out,
     const at::Tensor& self,
-    const std::vector<int64_t>& proposed_shape,
+    at::IntArrayRef proposed_shape,
     bool infer_size = true);
 at::Tensor& to_copy_out(
     Tensor& out,
diff --git a/torch/csrc/profiler/util.cpp b/torch/csrc/profiler/util.cpp
index 16528ac..95c1c7c 100644
--- a/torch/csrc/profiler/util.cpp
+++ b/torch/csrc/profiler/util.cpp
@@ -373,8 +373,8 @@
       return 0;
     }
 
-    const std::vector<int64_t> input_sizes = input_sizes_ref.toIntVector();
-    const std::vector<int64_t> kernel_sizes = kernel_sizes_ref.toIntVector();
+    const auto input_sizes = input_sizes_ref.toDimVector();
+    const auto kernel_sizes = kernel_sizes_ref.toDimVector();
     const uint64_t groups = groups_ref.toInt();
     const std::vector<int64_t> padding = padding_ref.toIntVector();
     const std::vector<int64_t> stride = stride_ref.toIntVector();
@@ -437,8 +437,8 @@
       return 0;
     }
 
-    std::vector<int64_t> mat1_size = mat1_sizes_ref.toIntVector();
-    std::vector<int64_t> mat2_size = mat2_sizes_ref.toIntVector();
+    const auto mat1_size = mat1_sizes_ref.toDimVector();
+    const auto mat2_size = mat2_sizes_ref.toDimVector();
     if (mat1_size.size() == 0) {
       return 0;
     }
@@ -478,8 +478,8 @@
       return 0;
     }
 
-    std::vector<int64_t> mat1_size = mat1_sizes_ref.toIntVector();
-    std::vector<int64_t> mat2_size = mat2_sizes_ref.toIntVector();
+    const auto mat1_size = mat1_sizes_ref.toDimVector();
+    const auto mat2_size = mat2_sizes_ref.toDimVector();
     if (mat1_size.size() == 0) {
       return 0;
     }
@@ -519,7 +519,7 @@
       return 0;
     }
 
-    std::vector<int64_t> mat_size = mat_sizes.toIntVector();
+    const auto mat_size = mat_sizes.toDimVector();
     uint64_t flops = 1;
     for (int64_t dim : mat_size) {
       flops *= dim;
@@ -538,7 +538,7 @@
       return 0;
     }
 
-    std::vector<int64_t> mat_size = mat_sizes.toIntVector();
+    const auto mat_size = mat_sizes.toDimVector();
     uint64_t flops = 1;
     for (int64_t dim : mat_size) {
       flops *= dim;