[PyTorch] AOTI: add CPU fast path in aoti_torch_empty_strided (#110877) This seems to reduce benchmark time by 15-20%. Supersedes D49835545. Differential Revision: [D49974460](https://our.internmc.facebook.com/intern/diff/D49974460/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/110877 Approved by: https://github.com/chenyang78, https://github.com/jansel, https://github.com/desertfire ghstack dependencies: #110876

commit: a2c17a2b00f7c41866bbde28d33b8c50e5632e01 [log] [tgz]
author: Scott Wolchok <swolchok@fb.com> Thu Oct 12 10:49:03 2023 -0700
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Fri Oct 13 02:16:11 2023 +0000
tree: e62ee91b76a11b7f9c56f4482d019a3e82df403a
parent: b85f8482331e82176b6656991559189b4587b1cb [diff]
diff --git a/torch/csrc/inductor/aoti_torch/shim_common.cpp b/torch/csrc/inductor/aoti_torch/shim_common.cpp
index 46ebe94..96dd856 100644
--- a/torch/csrc/inductor/aoti_torch/shim_common.cpp
+++ b/torch/csrc/inductor/aoti_torch/shim_common.cpp

@@ -183,12 +183,18 @@
   AOTI_TORCH_CONVERT_EXCEPTION_TO_ERROR_CODE({
     c10::IntArrayRef sizes(sizes_ptr, ndim);
     c10::IntArrayRef strides(strides_ptr, ndim);
-    c10::Device device = c10_device(device_type, device_index);
-    c10::TensorOptions options = c10::TensorOptions().device(device).dtype(
-        static_cast<c10::ScalarType>(dtype));
-    at::Tensor* new_tensor =
-        new at::Tensor(at::empty_strided(sizes, strides, options));
-    *ret_new_tensor = tensor_pointer_to_tensor_handle(new_tensor);
+    if (c10::DeviceType(device_type) == c10::DeviceType::CPU) {
+      *ret_new_tensor = tensor_pointer_to_tensor_handle(
+          new at::Tensor(at::detail::empty_strided_cpu(
+              sizes, strides, static_cast<c10::ScalarType>(dtype))));
+    } else {
+      c10::Device device = c10_device(device_type, device_index);
+      c10::TensorOptions options = c10::TensorOptions().device(device).dtype(
+          static_cast<c10::ScalarType>(dtype));
+      at::Tensor* new_tensor =
+          new at::Tensor(at::empty_strided(sizes, strides, options));
+      *ret_new_tensor = tensor_pointer_to_tensor_handle(new_tensor);
+    }
   });
 }
commit	a2c17a2b00f7c41866bbde28d33b8c50e5632e01	[log] [tgz]
author	Scott Wolchok <swolchok@fb.com>	Thu Oct 12 10:49:03 2023 -0700
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Fri Oct 13 02:16:11 2023 +0000
tree	e62ee91b76a11b7f9c56f4482d019a3e82df403a
parent	b85f8482331e82176b6656991559189b4587b1cb [diff]