Port `resize_as_` and `clone` from TH to Aten (#23027)

Summary:
API operators now routed to `at::native::resize_as_*_` and `at::native::clone` accordingly.
Internal `THTensor_(resizeAs)`, `THCTensor_(resizeAs)`, `THTensor_(newClone)` and `THCTensor_(newClone)` remains to support older TH code.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/23027

Differential Revision: D16362304

Pulled By: VitalyFedyunin

fbshipit-source-id: 4c1e8516da685f3fdea632ff791d143f27aeebeb
diff --git a/aten/src/ATen/Declarations.cwrap b/aten/src/ATen/Declarations.cwrap
index 57f3df7..f558390 100644
--- a/aten/src/ATen/Declarations.cwrap
+++ b/aten/src/ATen/Declarations.cwrap
@@ -182,19 +182,6 @@
     - THTensor* self
 ]]
 [[
-  name: _th_clone
-  cname: newClone
-  return: THTensor*
-  variants:
-    - function
-  cpu_half: True
-  cpu_bool: True
-  cuda_bool: True
-  cpu_bfloat16: True
-  arguments:
-    - THTensor* self
-]]
-[[
   name: _th_view
   cname: newView
   cpu_half: True
@@ -211,21 +198,6 @@
       long_args: True
 ]]
 [[
-  name: _th_resize_as_
-  cname: resizeAs
-  cpu_half: True
-  cpu_bool: True
-  cuda_bool: True
-  cpu_bfloat16: True
-  variants:
-    - function
-  return: self
-  scalar_check: the_template_->dim() == 0
-  arguments:
-    - THTensor* self
-    - THTensor* the_template
-]]
-[[
   name: _th_index_select
   cpu_bool: True
   cuda_bool: True
diff --git a/aten/src/ATen/native/Resize.cpp b/aten/src/ATen/native/Resize.cpp
index d04d695..e342aae 100644
--- a/aten/src/ATen/native/Resize.cpp
+++ b/aten/src/ATen/native/Resize.cpp
@@ -10,4 +10,8 @@
   return self;
 }
 
+Tensor& resize_as_cpu_(Tensor& self, const Tensor& the_template) {
+  return resize_cpu_(self, the_template.sizes());
+}
+
 }}
diff --git a/aten/src/ATen/native/TensorFactories.cpp b/aten/src/ATen/native/TensorFactories.cpp
index d9f5403..c08caca 100644
--- a/aten/src/ATen/native/TensorFactories.cpp
+++ b/aten/src/ATen/native/TensorFactories.cpp
@@ -826,5 +826,13 @@
     return tensor;
 }
 
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ clone ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Tensor clone(const Tensor& src) {
+  auto self = at::empty_like(src);
+  self.copy_(src);
+  return self;
+}
+
 } // namespace native
 } // namespace at
diff --git a/aten/src/ATen/native/cuda/Resize.cu b/aten/src/ATen/native/cuda/Resize.cu
index 6e00b4f..1fda687 100644
--- a/aten/src/ATen/native/cuda/Resize.cu
+++ b/aten/src/ATen/native/cuda/Resize.cu
@@ -12,4 +12,8 @@
   return self;
 }
 
+Tensor& resize_as_cuda_(Tensor& self, const Tensor& the_template) {
+  return resize_cuda_(self, the_template.sizes());
+}
+
 }}
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
index 6d70850..d58ed6e 100644
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@@ -2381,8 +2381,8 @@
 - func: clone(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    CPU: legacy::cpu::_th_clone
-    CUDA: legacy::cuda::_th_clone
+    CPU: clone
+    CUDA: clone
     SparseCPU: clone_sparse
     SparseCUDA: clone_sparse
     MkldnnCPU: mkldnn_clone
@@ -2391,8 +2391,8 @@
 - func: resize_as_(Tensor(a!) self, Tensor the_template) -> Tensor(a!)
   variants: function, method
   dispatch:
-    CPU: legacy::cpu::_th_resize_as_
-    CUDA: legacy::cuda::_th_resize_as_
+    CPU: resize_as_cpu_
+    CUDA: resize_as_cuda_
     SparseCPU: resize_as_sparse_
     SparseCUDA: resize_as_sparse_
 
diff --git a/aten/src/TH/generic/THTensor.cpp b/aten/src/TH/generic/THTensor.cpp
index 9ca0c9e..4742f5c 100644
--- a/aten/src/TH/generic/THTensor.cpp
+++ b/aten/src/TH/generic/THTensor.cpp
@@ -153,10 +153,11 @@
 
 THTensor *THTensor_(newClone)(THTensor *self)
 {
+  // already available in Aten as at::clone()
   THTensor *tensor = THTensor_(new)();
-  THTensor_(resizeAs)(tensor, self);
   at::Tensor tensor_wrap = THTensor_wrap(tensor);
   at::Tensor self_wrap = THTensor_wrap(self);
+  tensor_wrap.resize_as_(self_wrap);
   at::native::copy_(tensor_wrap, self_wrap, false);
   return tensor;
 }
@@ -217,6 +218,7 @@
 
 void THTensor_(resizeAs)(THTensor *self, THTensor *src)
 {
+  // already available in Aten as at::resize_as_()
   if(!THTensor_(isSameSizeAs)(self, src))
     THTensor_(resizeNd)(self, src->dim(), THTensor_getSizePtr(src), NULL);
 }
diff --git a/aten/src/THC/generic/THCTensor.cpp b/aten/src/THC/generic/THCTensor.cpp
index 65aaf08..14aaeaf 100644
--- a/aten/src/THC/generic/THCTensor.cpp
+++ b/aten/src/THC/generic/THCTensor.cpp
@@ -160,8 +160,11 @@
 
 THCTensor *THCTensor_(newClone)(THCState *state, THCTensor *self)
 {
+  // already available in Aten as at::clone()
   THCTensor *tensor = THCTensor_(new)(state);
-  THCTensor_(resizeAs)(state, tensor, self);
+  at::Tensor tensor_wrap = THTensor_wrap(tensor);
+  at::Tensor self_wrap = THTensor_wrap(self);
+  tensor_wrap.resize_as_(self_wrap);
   THCTensor_(copy)(state, tensor, self);
   return tensor;
 }
@@ -249,6 +252,7 @@
 
 void THCTensor_(resizeAs)(THCState *state, THCTensor *self, THCTensor *src)
 {
+  // already available in Aten as at::resize_as_()
   THCTensor_resizeAs(state, self, src);
 }
 
diff --git a/test/test_torch.py b/test/test_torch.py
index 09f9378..04e23b4 100644
--- a/test/test_torch.py
+++ b/test/test_torch.py
@@ -3479,15 +3479,12 @@
         for device in torch.testing.get_all_device_types():
             for dt in torch.testing.get_all_dtypes():
                 x = torch.tensor([1, 2, 3, 4], dtype=dt, device=device)
-
-                if (device == 'cuda' and dt == torch.bfloat16):
-                    self.assertRaises(RuntimeError, lambda: x.clone())
-                    continue
-
                 x_clone = x.clone()
+                if (device == 'cuda' and dt == torch.bfloat16):
+                    self.assertRaises(RuntimeError, lambda: copy(x))
+                    continue
                 y = copy(x)
                 y.fill_(1)
-
                 # copy is a shallow copy, only copies the tensor view,
                 # not the data
                 self.assertEqual(x, y)
@@ -3505,9 +3502,6 @@
             for dt in torch.testing.get_all_dtypes():
                 x = torch.tensor([[1, 2], [3, 4], [5, 6]], dtype=dt, device=device)
                 y = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=dt, device=device)
-                if (device == 'cuda' and dt == torch.bfloat16):
-                    self.assertRaises(RuntimeError, lambda: x.resize_as_(y))
-                    continue
                 x.resize_as_(y)
                 self.assertEqual(y.shape, x.shape)
 
@@ -3536,11 +3530,11 @@
         for device in torch.testing.get_all_device_types():
             for dt in torch.testing.get_all_dtypes():
                 x = torch.tensor((1, 1), dtype=dt, device=device)
-                if (device == 'cuda' and dt == torch.bfloat16):
-                    self.assertRaises(RuntimeError, lambda: x.clone())
-                    continue
-
                 y = x.clone()
+                if (device == 'cuda' and dt == torch.bfloat16):
+                    # `x - y` is used inside of the assertEqual
+                    self.assertRaises(RuntimeError, lambda: x - y)
+                    continue
                 self.assertEqual(x, y)
 
     def test_cat_all_dtypes_and_devices(self):