aten/src/ATen/EmptyTensor.cpp - platform/external/pytorch - Git at Google

 #define TORCH_ASSERT_NO_OPERATORS
 #include <ATen/EmptyTensor.h>
 #include <ATen/detail/CUDAHooksInterface.h>
 #include <ATen/Context.h>
 #include <ATen/detail/PrivateUse1HooksInterface.h>
 #include <c10/core/CPUAllocator.h>
 #include <c10/util/safe_numerics.h>

 #include <limits>

 namespace at::detail {
 namespace {
 c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) {
   if (pin_memory) {
     // NB: This is not quite right, if you somehow had both CUDA and PrivateUse1 initialized
     // in the same PyTorch build, you would ONLY ever get the CUDA pinned memory allocator.
     // To properly support this, see https://github.com/pytorch/pytorch/issues/14560
     if (at::globalContext().hasCUDA()) {
       return at::detail::getCUDAHooks().getPinnedMemoryAllocator();
     } else if(at::isPrivateUse1HooksRegistered()) {
       return at::GetPrivateUse1HooksInterface()->getPinnedMemoryAllocator();
     } else {
       TORCH_CHECK(false, "Need to provide pin_memory allocator to use pin memory.")
     }
   }
   return c10::GetCPUAllocator();
 }

 constexpr uint64_t storage_max() {
   // int64_t and size_t are used somewhat inconsistently throughout ATen.
   // To be safe, storage size calculations must fit in both types.
   constexpr auto int64_max = static_cast<uint64_t>(
       std::numeric_limits<int64_t>::max());
   constexpr auto size_max = static_cast<uint64_t>(
       std::numeric_limits<size_t>::max());
   return std::min(int64_max, size_max);
 }

 inline void raise_warning_for_complex_half(ScalarType dtype) {
   if (dtype == kComplexHalf) {
     TORCH_WARN_ONCE(
         "ComplexHalf support is experimental and many operators don't support it yet.");
   }
 }

 }  // namespace (anonymous)

 size_t computeStorageNbytesContiguous(
     IntArrayRef sizes,
     size_t itemsize_bytes,
     size_t storage_offset
   ) {
   // Ignore overflow checks on mobile
 #ifndef C10_MOBILE
   uint64_t size = 1;
   bool overflowed = c10::safe_multiplies_u64(sizes, &size);
   overflowed |= c10::add_overflows(size, storage_offset, &size);
   overflowed |= c10::mul_overflows(size, itemsize_bytes, &size);
   overflowed |= size > storage_max();
   TORCH_CHECK(!overflowed,
               "Storage size calculation overflowed with sizes=", sizes);
   return static_cast<size_t>(size);
 #else
   const auto numel = c10::multiply_integers(sizes);
   return itemsize_bytes * (storage_offset + numel);
 #endif
 }

 size_t computeStorageNbytes(
     IntArrayRef sizes,
     IntArrayRef strides,
     size_t itemsize_bytes,
     size_t storage_offset
   ) {
   TORCH_CHECK(
     sizes.size() == strides.size(),
     "dimensionality of sizes (",
     sizes.size(),
     ") must match dimensionality of strides (",
     strides.size(),
     ")");

   // Ignore overflow checks on mobile
 #ifndef C10_MOBILE
   // size of the underlying storage is 1 bigger than the offset
   // of the last element according to stride
   uint64_t size = storage_offset + 1;
   bool overflowed = false;
   for (const auto i : c10::irange(sizes.size())) {
     if (sizes[i] == 0) {
       return 0;
     }

     uint64_t strided_size = 0;
     overflowed |= c10::mul_overflows(strides[i], sizes[i] - 1, &strided_size);
     overflowed |= c10::add_overflows(size, strided_size, &size);
   }
   overflowed |= c10::mul_overflows(size, itemsize_bytes, &size);
   overflowed |= size > storage_max();
   TORCH_CHECK(!overflowed,
               "Storage size calculation overflowed with sizes=",
               sizes, " and strides=", strides);
   return static_cast<size_t>(size);
 #else
   // size of the underlying storage is 1 bigger than the offset
   // of the last element according to stride
   uint64_t size = 1;
   for (const auto i : c10::irange(sizes.size())) {
     if (sizes[i] == 0) {
       return 0;
     }

     size += strides[i] * (sizes[i] - 1);
   }
   return itemsize_bytes * (storage_offset + size);
 #endif
 }

 SymInt computeStorageNbytesContiguous(
     SymIntArrayRef sizes,
     const SymInt& itemsize_bytes,
     const SymInt& storage_offset
   ) {
   const auto numel = c10::multiply_integers(sizes);
   return itemsize_bytes * (storage_offset + numel);
 }

 // not including mobile-only macros in this function,
 // since mobile shouldn't be using symints.
 SymInt computeStorageNbytes(
     SymIntArrayRef sizes,
     SymIntArrayRef strides,
     const SymInt& itemsize_bytes,
     const SymInt& storage_offset
   ) {
   TORCH_CHECK(
     sizes.size() == strides.size(),
     "dimensionality of sizes (",
     sizes.size(),
     ") must match dimensionality of strides (",
     strides.size(),
     ")");

   // size of the underlying storage is 1 bigger than the offset
   // of the last element according to stride
   SymInt size = 1;
   for (const auto i : c10::irange(sizes.size())) {
     if (TORCH_GUARD_SIZE_OBLIVIOUS(sizes[i].sym_eq(0))) {
       return 0;
     }

     size += strides[i] * (sizes[i] - 1);
   }
   return itemsize_bytes * (storage_offset + size);
 }

 template <typename T>
 TensorBase _empty_generic(
     ArrayRef<T> size,
     c10::Allocator* allocator,
     c10::DispatchKeySet ks,
     ScalarType scalar_type,
     c10::optional<c10::MemoryFormat> memory_format_opt) {
   at::detail::check_size_nonnegative(size);
   at::detail::raise_warning_for_complex_half(scalar_type);
   caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
   auto size_bytes = computeStorageNbytesContiguous(size, dtype.itemsize());
   auto storage_impl = c10::make_intrusive<StorageImpl>(
       c10::StorageImpl::use_byte_size_t(),
       size_bytes,
       allocator,
       /*resizeable=*/true);

   auto tensor = detail::make_tensor_base<TensorImpl>(
       std::move(storage_impl), ks, dtype);
   // Default TensorImpl has size [0]
   // NB: test for meta dispatch key to avoid guarding on zero-ness
   if (ks.has(c10::DispatchKey::Meta) || size.size() != 1 || size[0] != 0) {
     tensor.unsafeGetTensorImpl()->generic_set_sizes_contiguous(size);
   }

   if (memory_format_opt.has_value()) {
     // Restriding a just-created empty contiguous tensor does nothing.
     if (*memory_format_opt != MemoryFormat::Contiguous) {
       tensor.unsafeGetTensorImpl()->empty_tensor_restride(*memory_format_opt);
     }
   }

   return tensor;
 }

 TensorBase empty_generic(
     IntArrayRef size,
     c10::Allocator* allocator,
     c10::DispatchKeySet ks,
     ScalarType scalar_type,
     c10::optional<c10::MemoryFormat> memory_format_opt) {
   return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
 }

 template <typename T>
 TensorBase _empty_strided_generic(
     T size,
     T stride,
     c10::Allocator* allocator,
     c10::DispatchKeySet ks,
     ScalarType scalar_type) {
   at::detail::check_size_nonnegative(size);
   at::detail::raise_warning_for_complex_half(scalar_type);
   caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
   auto size_bytes = computeStorageNbytes(size, stride, dtype.itemsize());
   auto storage_impl = c10::make_intrusive<StorageImpl>(
       c10::StorageImpl::use_byte_size_t(),
       size_bytes,
       allocator,
       /*resizeable=*/true);

   auto tensor = detail::make_tensor_base<TensorImpl>(
       std::move(storage_impl), ks, dtype);
   tensor.unsafeGetTensorImpl()->set_sizes_and_strides(size, stride);
   return tensor;
 }

 TensorBase empty_strided_generic(
     IntArrayRef size,
     IntArrayRef stride,
     c10::Allocator* allocator,
     c10::DispatchKeySet ks,
     ScalarType scalar_type) {
   return _empty_strided_generic<IntArrayRef>(size, stride, allocator, ks, scalar_type);
 }

 TensorBase empty_strided_symint_generic(
     SymIntArrayRef size,
     SymIntArrayRef stride,
     c10::Allocator* allocator,
     c10::DispatchKeySet ks,
     ScalarType scalar_type) {
   return _empty_strided_generic<SymIntArrayRef>(size, stride, allocator, ks, scalar_type);
 }

 TensorBase empty_cpu(IntArrayRef size, ScalarType dtype, bool pin_memory,
                      c10::optional<c10::MemoryFormat> memory_format_opt) {
   auto allocator = GetCPUAllocatorMaybePinned(pin_memory);
   constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU);
   return empty_generic(size, allocator, cpu_ks, dtype, memory_format_opt);
 }

 TensorBase empty_cpu(
     IntArrayRef size,
     c10::optional<ScalarType> dtype_opt,
     c10::optional<Layout> layout_opt,
     c10::optional<Device> device_opt,
     c10::optional<bool> pin_memory_opt,
     c10::optional<c10::MemoryFormat> memory_format_opt) {
   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU);
   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);

   auto pin_memory = pinned_memory_or_default(pin_memory_opt);
   auto dtype = dtype_or_default(dtype_opt);
   return empty_cpu(size, dtype, pin_memory, memory_format_opt);
 }

 TensorBase empty_cpu(
     IntArrayRef size, const TensorOptions &options) {
   return at::detail::empty_cpu(
       size,
       optTypeMetaToScalarType(options.dtype_opt()),
       options.layout_opt(),
       options.device_opt(),
       options.pinned_memory_opt(),
       options.memory_format_opt());
 }

 TensorBase empty_strided_cpu(IntArrayRef size, IntArrayRef stride,
                              ScalarType dtype, bool pin_memory) {
   auto allocator = at::detail::GetCPUAllocatorMaybePinned(pin_memory);
   constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU);
   return at::detail::empty_strided_generic(
       size, stride, allocator, cpu_ks, dtype);
 }

 TensorBase empty_strided_cpu(
     IntArrayRef size,
     IntArrayRef stride,
     c10::optional<ScalarType> dtype_opt,
     c10::optional<Layout> layout_opt,
     c10::optional<Device> device_opt,
     c10::optional<bool> pin_memory_opt) {
   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU);
   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);

   auto pin_memory = pinned_memory_or_default(pin_memory_opt);
   auto dtype = dtype_or_default(dtype_opt);
   return at::detail::empty_strided_cpu(size, stride, dtype, pin_memory);
 }

 TensorBase empty_strided_cpu(
     IntArrayRef size,
     IntArrayRef stride,
     const TensorOptions &options) {
   return at::detail::empty_strided_cpu(
       size,
       stride,
       optTypeMetaToScalarType(options.dtype_opt()),
       options.layout_opt(),
       options.device_opt(),
       options.pinned_memory_opt());
 }

 // The meta allocator ignores whatever allocation is requested and always
 // gives you nullptr
 struct MetaAllocator final : public at::Allocator {
   MetaAllocator() = default;
   ~MetaAllocator() override = default;
   static void deleter(void* const pointer) {
     TORCH_INTERNAL_ASSERT(!pointer);
   }
   DataPtr allocate(const size_t nbytes) const override {
     return {nullptr, nullptr, &deleter, at::Device(DeviceType::Meta)};
   }
   DeleterFnPtr raw_deleter() const override {
     return deleter;
   }
   void copy_data(void* dest, const void* src, std::size_t count) const final {}
 };

 static MetaAllocator g_meta_alloc;

 REGISTER_ALLOCATOR(kMeta, &g_meta_alloc);

 TensorBase empty_meta(IntArrayRef size, ScalarType dtype,
                      c10::optional<c10::MemoryFormat> memory_format_opt) {
   auto *allocator = GetAllocator(kMeta);
   constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
   return at::detail::empty_generic(
       size, allocator, meta_dks, dtype, memory_format_opt);
 }

 TensorBase empty_meta(
   IntArrayRef size,
   c10::optional<ScalarType> dtype_opt,
   c10::optional<Layout> layout_opt,
   c10::optional<Device> device_opt,
   c10::optional<bool> pin_memory_opt,
   c10::optional<c10::MemoryFormat> memory_format_opt
 ) {
   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
   // NB: because there is no SparseMeta (yet), non-strided layout is
   // exerciseable
   TORCH_CHECK_NOT_IMPLEMENTED(
     layout_or_default(layout_opt) == Layout::Strided,
     "non-strided meta tensors not supported yet"
   );

   auto dtype = dtype_or_default(dtype_opt);
   return empty_meta(size, dtype, memory_format_opt);
 }

 TensorBase empty_symint_meta(
   SymIntArrayRef size,
   c10::optional<ScalarType> dtype_opt,
   c10::optional<Layout> layout_opt,
   c10::optional<Device> device_opt,
   c10::optional<bool> pin_memory_opt,
   c10::optional<c10::MemoryFormat> memory_format_opt
 ) {
   auto *allocator = GetAllocator(kMeta);
   constexpr c10::DispatchKeySet ks(c10::DispatchKey::Meta);
   auto scalar_type = dtype_or_default(dtype_opt);
   return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
 }

 TensorBase empty_meta(
     IntArrayRef size, const TensorOptions &options) {
   return at::detail::empty_meta(
       size,
       optTypeMetaToScalarType(options.dtype_opt()),
       options.layout_opt(),
       options.device_opt(),
       options.pinned_memory_opt(),
       options.memory_format_opt());
 }

 TensorBase empty_strided_meta(IntArrayRef size, IntArrayRef stride,
                               ScalarType dtype) {
   auto *allocator = GetAllocator(kMeta);
   constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
   return at::detail::empty_strided_generic(
       size, stride, allocator, meta_dks, dtype);
 }

 TensorBase empty_strided_meta(
     IntArrayRef size,
     IntArrayRef stride,
     c10::optional<ScalarType> dtype_opt,
     c10::optional<Layout> layout_opt,
     c10::optional<Device> device_opt,
     c10::optional<bool> pin_memory_opt) {
   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);

   auto dtype = dtype_or_default(dtype_opt);
   return at::detail::empty_strided_meta(size, stride, dtype);
 }

 TensorBase empty_strided_meta(
     IntArrayRef size,
     IntArrayRef stride,
     const TensorOptions &options) {
   return at::detail::empty_strided_meta(
       size,
       stride,
       optTypeMetaToScalarType(options.dtype_opt()),
       options.layout_opt(),
       options.device_opt(),
       options.pinned_memory_opt());
 }

 TensorBase empty_strided_symint_meta(SymIntArrayRef size, SymIntArrayRef stride,
                               ScalarType dtype) {
   auto *allocator = GetAllocator(kMeta);
   constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
   return at::detail::empty_strided_symint_generic(
       size, stride, allocator, meta_dks, dtype);
 }

 TensorBase empty_strided_symint_meta(
     SymIntArrayRef size,
     SymIntArrayRef stride,
     c10::optional<ScalarType> dtype_opt,
     c10::optional<Layout> layout_opt,
     c10::optional<Device> device_opt,
     c10::optional<bool> pin_memory_opt) {
   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);

   auto dtype = dtype_or_default(dtype_opt);
   return at::detail::empty_strided_symint_meta(size, stride, dtype);
 }

 TensorBase empty_strided_symint_meta(
     SymIntArrayRef size,
     SymIntArrayRef stride,
     const TensorOptions &options) {
   return at::detail::empty_strided_symint_meta(
       size,
       stride,
       optTypeMetaToScalarType(options.dtype_opt()),
       options.layout_opt(),
       options.device_opt(),
       options.pinned_memory_opt());
 }

 } // namespace at::detail
	#define TORCH_ASSERT_NO_OPERATORS
	#include <ATen/EmptyTensor.h>
	#include <ATen/detail/CUDAHooksInterface.h>
	#include <ATen/Context.h>
	#include <ATen/detail/PrivateUse1HooksInterface.h>
	#include <c10/core/CPUAllocator.h>
	#include <c10/util/safe_numerics.h>

	#include <limits>

	namespace at::detail {
	namespace {
	c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) {
	if (pin_memory) {
	// NB: This is not quite right, if you somehow had both CUDA and PrivateUse1 initialized
	// in the same PyTorch build, you would ONLY ever get the CUDA pinned memory allocator.
	// To properly support this, see https://github.com/pytorch/pytorch/issues/14560
	if (at::globalContext().hasCUDA()) {
	return at::detail::getCUDAHooks().getPinnedMemoryAllocator();
	} else if(at::isPrivateUse1HooksRegistered()) {
	return at::GetPrivateUse1HooksInterface()->getPinnedMemoryAllocator();
	} else {
	TORCH_CHECK(false, "Need to provide pin_memory allocator to use pin memory.")
	}
	}
	return c10::GetCPUAllocator();
	}

	constexpr uint64_t storage_max() {
	// int64_t and size_t are used somewhat inconsistently throughout ATen.
	// To be safe, storage size calculations must fit in both types.
	constexpr auto int64_max = static_cast<uint64_t>(
	std::numeric_limits<int64_t>::max());
	constexpr auto size_max = static_cast<uint64_t>(
	std::numeric_limits<size_t>::max());
	return std::min(int64_max, size_max);
	}

	inline void raise_warning_for_complex_half(ScalarType dtype) {
	if (dtype == kComplexHalf) {
	TORCH_WARN_ONCE(
	"ComplexHalf support is experimental and many operators don't support it yet.");
	}
	}

	} // namespace (anonymous)

	size_t computeStorageNbytesContiguous(
	IntArrayRef sizes,
	size_t itemsize_bytes,
	size_t storage_offset
	) {
	// Ignore overflow checks on mobile
	#ifndef C10_MOBILE
	uint64_t size = 1;
	bool overflowed = c10::safe_multiplies_u64(sizes, &size);
	overflowed \|= c10::add_overflows(size, storage_offset, &size);
	overflowed \|= c10::mul_overflows(size, itemsize_bytes, &size);
	overflowed \|= size > storage_max();
	TORCH_CHECK(!overflowed,
	"Storage size calculation overflowed with sizes=", sizes);
	return static_cast<size_t>(size);
	#else
	const auto numel = c10::multiply_integers(sizes);
	return itemsize_bytes * (storage_offset + numel);
	#endif
	}

	size_t computeStorageNbytes(
	IntArrayRef sizes,
	IntArrayRef strides,
	size_t itemsize_bytes,
	size_t storage_offset
	) {
	TORCH_CHECK(
	sizes.size() == strides.size(),
	"dimensionality of sizes (",
	sizes.size(),
	") must match dimensionality of strides (",
	strides.size(),
	")");

	// Ignore overflow checks on mobile
	#ifndef C10_MOBILE
	// size of the underlying storage is 1 bigger than the offset
	// of the last element according to stride
	uint64_t size = storage_offset + 1;
	bool overflowed = false;
	for (const auto i : c10::irange(sizes.size())) {
	if (sizes[i] == 0) {
	return 0;
	}

	uint64_t strided_size = 0;
	overflowed \|= c10::mul_overflows(strides[i], sizes[i] - 1, &strided_size);
	overflowed \|= c10::add_overflows(size, strided_size, &size);
	}
	overflowed \|= c10::mul_overflows(size, itemsize_bytes, &size);
	overflowed \|= size > storage_max();
	TORCH_CHECK(!overflowed,
	"Storage size calculation overflowed with sizes=",
	sizes, " and strides=", strides);
	return static_cast<size_t>(size);
	#else
	// size of the underlying storage is 1 bigger than the offset
	// of the last element according to stride
	uint64_t size = 1;
	for (const auto i : c10::irange(sizes.size())) {
	if (sizes[i] == 0) {
	return 0;
	}

	size += strides[i] * (sizes[i] - 1);
	}
	return itemsize_bytes * (storage_offset + size);
	#endif
	}

	SymInt computeStorageNbytesContiguous(
	SymIntArrayRef sizes,
	const SymInt& itemsize_bytes,
	const SymInt& storage_offset
	) {
	const auto numel = c10::multiply_integers(sizes);
	return itemsize_bytes * (storage_offset + numel);
	}

	// not including mobile-only macros in this function,
	// since mobile shouldn't be using symints.
	SymInt computeStorageNbytes(
	SymIntArrayRef sizes,
	SymIntArrayRef strides,
	const SymInt& itemsize_bytes,
	const SymInt& storage_offset
	) {
	TORCH_CHECK(
	sizes.size() == strides.size(),
	"dimensionality of sizes (",
	sizes.size(),
	") must match dimensionality of strides (",
	strides.size(),
	")");

	// size of the underlying storage is 1 bigger than the offset
	// of the last element according to stride
	SymInt size = 1;
	for (const auto i : c10::irange(sizes.size())) {
	if (TORCH_GUARD_SIZE_OBLIVIOUS(sizes[i].sym_eq(0))) {
	return 0;
	}

	size += strides[i] * (sizes[i] - 1);
	}
	return itemsize_bytes * (storage_offset + size);
	}

	template <typename T>
	TensorBase _empty_generic(
	ArrayRef<T> size,
	c10::Allocator* allocator,
	c10::DispatchKeySet ks,
	ScalarType scalar_type,
	c10::optional<c10::MemoryFormat> memory_format_opt) {
	at::detail::check_size_nonnegative(size);
	at::detail::raise_warning_for_complex_half(scalar_type);
	caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
	auto size_bytes = computeStorageNbytesContiguous(size, dtype.itemsize());
	auto storage_impl = c10::make_intrusive<StorageImpl>(
	c10::StorageImpl::use_byte_size_t(),
	size_bytes,
	allocator,
	/resizeable=/true);

	auto tensor = detail::make_tensor_base<TensorImpl>(
	std::move(storage_impl), ks, dtype);
	// Default TensorImpl has size [0]
	// NB: test for meta dispatch key to avoid guarding on zero-ness
	if (ks.has(c10::DispatchKey::Meta) \|\| size.size() != 1 \|\| size[0] != 0) {
	tensor.unsafeGetTensorImpl()->generic_set_sizes_contiguous(size);
	}

	if (memory_format_opt.has_value()) {
	// Restriding a just-created empty contiguous tensor does nothing.
	if (*memory_format_opt != MemoryFormat::Contiguous) {
	tensor.unsafeGetTensorImpl()->empty_tensor_restride(*memory_format_opt);
	}
	}

	return tensor;
	}

	TensorBase empty_generic(
	IntArrayRef size,
	c10::Allocator* allocator,
	c10::DispatchKeySet ks,
	ScalarType scalar_type,
	c10::optional<c10::MemoryFormat> memory_format_opt) {
	return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
	}

	template <typename T>
	TensorBase _empty_strided_generic(
	T size,
	T stride,
	c10::Allocator* allocator,
	c10::DispatchKeySet ks,
	ScalarType scalar_type) {
	at::detail::check_size_nonnegative(size);
	at::detail::raise_warning_for_complex_half(scalar_type);
	caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
	auto size_bytes = computeStorageNbytes(size, stride, dtype.itemsize());
	auto storage_impl = c10::make_intrusive<StorageImpl>(
	c10::StorageImpl::use_byte_size_t(),
	size_bytes,
	allocator,
	/resizeable=/true);

	auto tensor = detail::make_tensor_base<TensorImpl>(
	std::move(storage_impl), ks, dtype);
	tensor.unsafeGetTensorImpl()->set_sizes_and_strides(size, stride);
	return tensor;
	}

	TensorBase empty_strided_generic(
	IntArrayRef size,
	IntArrayRef stride,
	c10::Allocator* allocator,
	c10::DispatchKeySet ks,
	ScalarType scalar_type) {
	return _empty_strided_generic<IntArrayRef>(size, stride, allocator, ks, scalar_type);
	}

	TensorBase empty_strided_symint_generic(
	SymIntArrayRef size,
	SymIntArrayRef stride,
	c10::Allocator* allocator,
	c10::DispatchKeySet ks,
	ScalarType scalar_type) {
	return _empty_strided_generic<SymIntArrayRef>(size, stride, allocator, ks, scalar_type);
	}

	TensorBase empty_cpu(IntArrayRef size, ScalarType dtype, bool pin_memory,
	c10::optional<c10::MemoryFormat> memory_format_opt) {
	auto allocator = GetCPUAllocatorMaybePinned(pin_memory);
	constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU);
	return empty_generic(size, allocator, cpu_ks, dtype, memory_format_opt);
	}

	TensorBase empty_cpu(
	IntArrayRef size,
	c10::optional<ScalarType> dtype_opt,
	c10::optional<Layout> layout_opt,
	c10::optional<Device> device_opt,
	c10::optional<bool> pin_memory_opt,
	c10::optional<c10::MemoryFormat> memory_format_opt) {
	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU);
	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);

	auto pin_memory = pinned_memory_or_default(pin_memory_opt);
	auto dtype = dtype_or_default(dtype_opt);
	return empty_cpu(size, dtype, pin_memory, memory_format_opt);
	}

	TensorBase empty_cpu(
	IntArrayRef size, const TensorOptions &options) {
	return at::detail::empty_cpu(
	size,
	optTypeMetaToScalarType(options.dtype_opt()),
	options.layout_opt(),
	options.device_opt(),
	options.pinned_memory_opt(),
	options.memory_format_opt());
	}

	TensorBase empty_strided_cpu(IntArrayRef size, IntArrayRef stride,
	ScalarType dtype, bool pin_memory) {
	auto allocator = at::detail::GetCPUAllocatorMaybePinned(pin_memory);
	constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU);
	return at::detail::empty_strided_generic(
	size, stride, allocator, cpu_ks, dtype);
	}

	TensorBase empty_strided_cpu(
	IntArrayRef size,
	IntArrayRef stride,
	c10::optional<ScalarType> dtype_opt,
	c10::optional<Layout> layout_opt,
	c10::optional<Device> device_opt,
	c10::optional<bool> pin_memory_opt) {
	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU);
	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);

	auto pin_memory = pinned_memory_or_default(pin_memory_opt);
	auto dtype = dtype_or_default(dtype_opt);
	return at::detail::empty_strided_cpu(size, stride, dtype, pin_memory);
	}

	TensorBase empty_strided_cpu(
	IntArrayRef size,
	IntArrayRef stride,
	const TensorOptions &options) {
	return at::detail::empty_strided_cpu(
	size,
	stride,
	optTypeMetaToScalarType(options.dtype_opt()),
	options.layout_opt(),
	options.device_opt(),
	options.pinned_memory_opt());
	}

	// The meta allocator ignores whatever allocation is requested and always
	// gives you nullptr
	struct MetaAllocator final : public at::Allocator {
	MetaAllocator() = default;
	~MetaAllocator() override = default;
	static void deleter(void* const pointer) {
	TORCH_INTERNAL_ASSERT(!pointer);
	}
	DataPtr allocate(const size_t nbytes) const override {
	return {nullptr, nullptr, &deleter, at::Device(DeviceType::Meta)};
	}
	DeleterFnPtr raw_deleter() const override {
	return deleter;
	}
	void copy_data(void* dest, const void* src, std::size_t count) const final {}
	};

	static MetaAllocator g_meta_alloc;

	REGISTER_ALLOCATOR(kMeta, &g_meta_alloc);

	TensorBase empty_meta(IntArrayRef size, ScalarType dtype,
	c10::optional<c10::MemoryFormat> memory_format_opt) {
	auto *allocator = GetAllocator(kMeta);
	constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
	return at::detail::empty_generic(
	size, allocator, meta_dks, dtype, memory_format_opt);
	}

	TensorBase empty_meta(
	IntArrayRef size,
	c10::optional<ScalarType> dtype_opt,
	c10::optional<Layout> layout_opt,
	c10::optional<Device> device_opt,
	c10::optional<bool> pin_memory_opt,
	c10::optional<c10::MemoryFormat> memory_format_opt
	) {
	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
	// NB: because there is no SparseMeta (yet), non-strided layout is
	// exerciseable
	TORCH_CHECK_NOT_IMPLEMENTED(
	layout_or_default(layout_opt) == Layout::Strided,
	"non-strided meta tensors not supported yet"
	);

	auto dtype = dtype_or_default(dtype_opt);
	return empty_meta(size, dtype, memory_format_opt);
	}

	TensorBase empty_symint_meta(
	SymIntArrayRef size,
	c10::optional<ScalarType> dtype_opt,
	c10::optional<Layout> layout_opt,
	c10::optional<Device> device_opt,
	c10::optional<bool> pin_memory_opt,
	c10::optional<c10::MemoryFormat> memory_format_opt
	) {
	auto *allocator = GetAllocator(kMeta);
	constexpr c10::DispatchKeySet ks(c10::DispatchKey::Meta);
	auto scalar_type = dtype_or_default(dtype_opt);
	return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
	}

	TensorBase empty_meta(
	IntArrayRef size, const TensorOptions &options) {
	return at::detail::empty_meta(
	size,
	optTypeMetaToScalarType(options.dtype_opt()),
	options.layout_opt(),
	options.device_opt(),
	options.pinned_memory_opt(),
	options.memory_format_opt());
	}

	TensorBase empty_strided_meta(IntArrayRef size, IntArrayRef stride,
	ScalarType dtype) {
	auto *allocator = GetAllocator(kMeta);
	constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
	return at::detail::empty_strided_generic(
	size, stride, allocator, meta_dks, dtype);
	}

	TensorBase empty_strided_meta(
	IntArrayRef size,
	IntArrayRef stride,
	c10::optional<ScalarType> dtype_opt,
	c10::optional<Layout> layout_opt,
	c10::optional<Device> device_opt,
	c10::optional<bool> pin_memory_opt) {
	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);

	auto dtype = dtype_or_default(dtype_opt);
	return at::detail::empty_strided_meta(size, stride, dtype);
	}

	TensorBase empty_strided_meta(
	IntArrayRef size,
	IntArrayRef stride,
	const TensorOptions &options) {
	return at::detail::empty_strided_meta(
	size,
	stride,
	optTypeMetaToScalarType(options.dtype_opt()),
	options.layout_opt(),
	options.device_opt(),
	options.pinned_memory_opt());
	}

	TensorBase empty_strided_symint_meta(SymIntArrayRef size, SymIntArrayRef stride,
	ScalarType dtype) {
	auto *allocator = GetAllocator(kMeta);
	constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
	return at::detail::empty_strided_symint_generic(
	size, stride, allocator, meta_dks, dtype);
	}

	TensorBase empty_strided_symint_meta(
	SymIntArrayRef size,
	SymIntArrayRef stride,
	c10::optional<ScalarType> dtype_opt,
	c10::optional<Layout> layout_opt,
	c10::optional<Device> device_opt,
	c10::optional<bool> pin_memory_opt) {
	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
	TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);

	auto dtype = dtype_or_default(dtype_opt);
	return at::detail::empty_strided_symint_meta(size, stride, dtype);
	}

	TensorBase empty_strided_symint_meta(
	SymIntArrayRef size,
	SymIntArrayRef stride,
	const TensorOptions &options) {
	return at::detail::empty_strided_symint_meta(
	size,
	stride,
	optTypeMetaToScalarType(options.dtype_opt()),
	options.layout_opt(),
	options.device_opt(),
	options.pinned_memory_opt());
	}

	} // namespace at::detail