Migrate `var` & `std` to ATen (#39967) Summary: Not sure why there are so many issues for std & var, but this PR should close them all: std: Fix https://github.com/pytorch/pytorch/issues/24771, Fix https://github.com/pytorch/pytorch/issues/24676, Fix https://github.com/pytorch/pytorch/issues/24639, Fix https://github.com/pytorch/pytorch/issues/24529 var: Fix https://github.com/pytorch/pytorch/issues/24782, Fix https://github.com/pytorch/pytorch/issues/24677, Fix https://github.com/pytorch/pytorch/issues/24652, Fix https://github.com/pytorch/pytorch/issues/24530 ```py import time import torch def _time(): if torch.cuda.is_available(): torch.cuda.synchronize() return time.time() for device in (torch.device("cpu"), torch.device("cuda")): for size in ( [100000000], [10000, 10000], [1000, 1000, 100], [100, 100, 100, 100], ): t = torch.randn(*size, device=device) total_time = 0 for i in range(10): t1 = _time() t.std() t2 = _time() total_time += t2 - t1 print(f"Tensor of size {size} on {device}: {total_time / 10}") ``` Before: ``` Tensor of size [100000000] on cpu: 0.36041643619537356 Tensor of size [10000, 10000] on cpu: 0.37235140800476074 Tensor of size [1000, 1000, 100] on cpu: 0.386572527885437 Tensor of size [100, 100, 100, 100] on cpu: 0.37404844760894773 Tensor of size [100000000] on cuda: 0.0021645784378051757 Tensor of size [10000, 10000] on cuda: 0.002090191841125488 Tensor of size [1000, 1000, 100] on cuda: 0.00208127498626709 Tensor of size [100, 100, 100, 100] on cuda: 0.0020844221115112306 ``` After: ``` Tensor of size [100000000] on cpu: 0.1339871883392334 Tensor of size [10000, 10000] on cpu: 0.1343991994857788 Tensor of size [1000, 1000, 100] on cpu: 0.1346735954284668 Tensor of size [100, 100, 100, 100] on cpu: 0.11906447410583496 Tensor of size [100000000] on cuda: 0.0013531208038330077 Tensor of size [10000, 10000] on cuda: 0.0012922048568725585 Tensor of size [1000, 1000, 100] on cuda: 0.001285886764526367 Tensor of size [100, 100, 100, 100] on cuda: 0.0012899160385131836 ``` cc: VitalyFedyunin Pull Request resolved: https://github.com/pytorch/pytorch/pull/39967 Differential Revision: D22162469 Pulled By: VitalyFedyunin fbshipit-source-id: 8d901c779767b00f81cd6231bc665e04f297b4c3

commit: 7a3c223bbb711c7a93910ce406a0126b8000b43b [log] [tgz]
author: Shawn Zhong <github@shawnzhong.com> Mon Jun 22 14:22:16 2020 -0700
committer: Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com> Mon Jun 22 14:25:18 2020 -0700
tree: 1ff65eea5956451e665c7a8ec4b0fa2cee226cbe
parent: c4fc278fa8cbd0fb45b8130a679c62f673087484 [diff]
diff --git a/aten/src/ATen/Declarations.cwrap b/aten/src/ATen/Declarations.cwrap
index 7e71b0b..e4a6237 100644
--- a/aten/src/ATen/Declarations.cwrap
+++ b/aten/src/ATen/Declarations.cwrap

@@ -211,36 +211,6 @@
     - bool sorted
 ]]
 [[
-  name: _th_var
-  types:
-    - floating_point
-  backends:
-    - CPU
-    - CUDA
-  variants: function
-  options:
-    - cname: var_all
-      return: accreal
-      arguments:
-        - THTensor* self
-        - bool unbiased
-]]
-[[
-  name: _th_std
-  types:
-    - floating_point
-  backends:
-    - CPU
-    - CUDA
-  variants: function
-  options:
-    - cname: std_all
-      return: accreal
-      arguments:
-        - THTensor* self
-        - bool unbiased
-]]
-[[
   name: _th_renorm
   cname: renorm
   types:

diff --git a/aten/src/ATen/core/NamedRegistrations.cpp b/aten/src/ATen/core/NamedRegistrations.cpp
index f09bd2b..42f9445 100644
--- a/aten/src/ATen/core/NamedRegistrations.cpp
+++ b/aten/src/ATen/core/NamedRegistrations.cpp

@@ -18,8 +18,6 @@
   m.impl("_sparse_log_softmax.int", CppFunction::makeFallthrough());
   m.impl("_sparse_softmax.Dimname", CppFunction::makeFallthrough());
   m.impl("_sparse_softmax.int", CppFunction::makeFallthrough());
-  m.impl("_std", CppFunction::makeFallthrough());
-  m.impl("_var", CppFunction::makeFallthrough());
   m.impl("abs", CppFunction::makeFallthrough());
   m.impl("abs.out", CppFunction::makeFallthrough());
   m.impl("abs_", CppFunction::makeFallthrough());

diff --git a/aten/src/ATen/native/ReduceOps.cpp b/aten/src/ATen/native/ReduceOps.cpp
index d5f5ff7..ddbe5fe 100644
--- a/aten/src/ATen/native/ReduceOps.cpp
+++ b/aten/src/ATen/native/ReduceOps.cpp

@@ -835,7 +835,9 @@
   TORCH_CHECK(at::isFloatingType(self.scalar_type()) || at::isComplexType(self.scalar_type()),
               "var only supports floating-point dtypes");
   auto trivial_return = _allreduce_return_trivial(self, std::numeric_limits<double>::quiet_NaN());
-  return trivial_return.has_value() ? trivial_return.value() : at::_var(self, unbiased);
+  if (trivial_return.has_value())
+    return trivial_return.value();
+  return at::var(self, IntArrayRef{}, unbiased, false);
 }
 
 Tensor var(const Tensor& self, IntArrayRef dim, bool unbiased, bool keepdim) {
@@ -855,7 +857,9 @@
   TORCH_CHECK(at::isFloatingType(self.scalar_type()) || at::isComplexType(self.scalar_type()),
               "std only supports floating-point dtypes");
   auto trivial_return = _allreduce_return_trivial(self, std::numeric_limits<double>::quiet_NaN());
-  return trivial_return.has_value() ? trivial_return.value() : at::_std(self, unbiased);
+  if (trivial_return.has_value())
+    return trivial_return.value();
+  return at::std(self, IntArrayRef{}, unbiased, false);
 }
 
 Tensor std(const Tensor& self, IntArrayRef dim, bool unbiased, bool keepdim) {
@@ -868,7 +872,7 @@
 }
 
 Tensor std(const Tensor& self, DimnameList dim, bool unbiased, bool keepdim) {
-  return  at::std(self, dimnames_to_positions(self, dim), unbiased, keepdim);
+  return at::std(self, dimnames_to_positions(self, dim), unbiased, keepdim);
 }
 
 Tensor& std_out(Tensor& result, const Tensor& self, DimnameList dim, bool unbiased, bool keepdim) {

diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
index 99ef427..8c690fd 100644
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml

@@ -5216,18 +5216,6 @@
     CPU: _cumprod_out_cpu
     CUDA: _cumprod_out_cuda
 
-- func: _var(Tensor self, bool unbiased=True) -> Tensor
-  use_c10_dispatcher: full
-  dispatch:
-    CPU: legacy::cpu::_th_var
-    CUDA: legacy::cuda::_th_var
-
-- func: _std(Tensor self, bool unbiased=True) -> Tensor
-  use_c10_dispatcher: full
-  dispatch:
-    CPU: legacy::cpu::_th_std
-    CUDA: legacy::cuda::_th_std
-
 - func: _amp_non_finite_check_and_unscale_(Tensor(a!) self, Tensor(b!) found_inf, Tensor inv_scale) -> ()
   variants: function
   dispatch:

diff --git a/aten/src/TH/generic/THTensorMath.h b/aten/src/TH/generic/THTensorMath.h
index 394705f..dfc5e3e 100644
--- a/aten/src/TH/generic/THTensorMath.h
+++ b/aten/src/TH/generic/THTensorMath.h

@@ -49,9 +49,6 @@
 TH_API void THTensor_(renorm)(THTensor *r_, THTensor *t, scalar_t value, int dimension, scalar_t maxnorm);
 TH_API void THTensor_(histc)(THTensor *hist, THTensor *tensor, int64_t nbins, scalar_t minvalue, scalar_t maxvalue);
 
-TH_API accreal THTensor_(var_all)(THTensor *self, bool unbiased);
-TH_API accreal THTensor_(std_all)(THTensor *self, bool unbiased);
-
 #endif
 #endif
 #endif

diff --git a/aten/src/TH/generic/THTensorMoreMath.cpp b/aten/src/TH/generic/THTensorMoreMath.cpp
index 0a4e86a..4a81a26 100644
--- a/aten/src/TH/generic/THTensorMoreMath.cpp
+++ b/aten/src/TH/generic/THTensorMoreMath.cpp

@@ -710,20 +710,6 @@
   c10::raw::intrusive_ptr::decref(rowS);
 }
 
-accreal THTensor_(var_all)(THTensor *tensor, bool unbiased)
-{
-  accreal mean = THTensor_wrap(tensor).mean().item<accreal>();
-  accreal sum = 0;
-  TH_TENSOR_APPLY(scalar_t, tensor, sum += (*tensor_data - mean)*(*tensor_data - mean););
-  sum /= std::max<int64_t>(0, THTensor_(nElement)(tensor) - (unbiased ? 1 : 0));
-  return sum;
-}
-
-accreal THTensor_(std_all)(THTensor *tensor, bool unbiased)
-{
-  return sqrt(THTensor_(var_all)(tensor, unbiased));
-}
-
 void THTensor_(histc)(THTensor *hist, THTensor *tensor, int64_t nbins, scalar_t minvalue, scalar_t maxvalue)
 {
   if (nbins <= 0) {

diff --git a/aten/src/THC/generic/THCTensorMathReduce.cu b/aten/src/THC/generic/THCTensorMathReduce.cu
index cb315c8..76f470c 100644
--- a/aten/src/THC/generic/THCTensorMathReduce.cu
+++ b/aten/src/THC/generic/THCTensorMathReduce.cu

@@ -56,35 +56,6 @@
   THCTensor_(free)(state, data);
 }
 
-accreal THCTensor_(std_all)(THCState *state, THCTensor *self, bool unbiased)
-{
-  THCAssertSameGPU(THCTensor_(checkGPU)(state, 1, self));
-  return THCNumerics<accreal>::sqrt((THCTensor_(var_all)(state, self, unbiased)));
-}
-
-accreal THCTensor_(var_all)(THCState *state, THCTensor *self, bool unbiased)
-{
-  THCAssertSameGPU(THCTensor_(checkGPU)(state, 1, self));
-  accreal mean = THTensor_wrap(self).mean().item<accreal>();
-
-  accreal val;
-  if (!THC_reduceAll<scalar_t>(state, self,
-                           SquareFunctor<accreal>(mean),
-                           ReduceAdd<accreal>(),
-                           scalar_cast<accreal>(0),
-                           &val, 0)) {
-    THArgCheck(false, 1, CUTORCH_DIM_WARNING);
-  }
-
-  val = THCNumerics<accreal>::div(
-    val,
-    scalar_cast<accreal>(std::max<int64_t>(0, THCTensor_(nElement)(state, self) - (unbiased ? 1 : 0)))
-  );
-
-  THCudaCheck(cudaGetLastError());
-  return val;
-}
-
 #endif
 
 #endif

diff --git a/aten/src/THC/generic/THCTensorMathReduce.h b/aten/src/THC/generic/THCTensorMathReduce.h
index ebb62a6..334c9f4 100644
--- a/aten/src/THC/generic/THCTensorMathReduce.h
+++ b/aten/src/THC/generic/THCTensorMathReduce.h

@@ -9,9 +9,7 @@
 THC_API void THCTensor_(renorm)(THCState *state, THCTensor* self, THCTensor* src, scalar_t value, int dimension, scalar_t max_norm);
 THC_API void THCTensor_(norm)(THCState *state, THCTensor* self, THCTensor* src, scalar_t value, int dimension, int keepdim);
 
-THC_API accreal THCTensor_(std_all)(THCState *state, THCTensor *self, bool unbiased);
 THC_API accreal THCTensor_(normall)(THCState *state, THCTensor *self, scalar_t value);
-THC_API accreal THCTensor_(var_all)(THCState *state, THCTensor *self, bool unbiased);
 
 #endif
 

diff --git a/test/backward_compatibility/check_backward_compatibility.py b/test/backward_compatibility/check_backward_compatibility.py
index bf988b0..ad49e94 100644
--- a/test/backward_compatibility/check_backward_compatibility.py
+++ b/test/backward_compatibility/check_backward_compatibility.py

@@ -107,6 +107,8 @@
     ('aten::__or__', datetime.date(2020, 6, 30)),
     ('aten::__xor__', datetime.date(2020, 6, 30)),
     ('aten::split', datetime.date(2020, 6, 30)),
+    ('aten::_var', datetime.date(2020, 6, 30)),
+    ('aten::_std', datetime.date(2020, 6, 30)),
 ]
commit	7a3c223bbb711c7a93910ce406a0126b8000b43b	[log] [tgz]
author	Shawn Zhong <github@shawnzhong.com>	Mon Jun 22 14:22:16 2020 -0700
committer	Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>	Mon Jun 22 14:25:18 2020 -0700
tree	1ff65eea5956451e665c7a8ec4b0fa2cee226cbe
parent	c4fc278fa8cbd0fb45b8130a679c62f673087484 [diff]