Don't copy on clamp, clamp_out (#10352)
Summary:
This makes clamp and relu faster (fixes #10276).
The extra copying was introduced when clamp moved to ATen and
the _th_clamp_ wrapper was used to forward to TH/THC,
we remove that and add _th_clamp(_out) instead.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/10352
Reviewed By: ezyang
Differential Revision: D9233590
Pulled By: SsnL
fbshipit-source-id: 4f86a045498e5e577fb22656c71f171add7ed0ac
diff --git a/aten/src/ATen/Declarations.cwrap b/aten/src/ATen/Declarations.cwrap
index 33eb98d..983f496 100644
--- a/aten/src/ATen/Declarations.cwrap
+++ b/aten/src/ATen/Declarations.cwrap
@@ -2267,39 +2267,42 @@
- THTensor* other
]]
[[
- name: _th_clamp_
+ name: _th_clamp
cname: clamp
variants:
- method
- function
return: argument 0
arguments:
- - THTensor* self
+ - arg: THTensor* result
+ output: True
- THTensor* self
- real min
- real max
]]
[[
- name: _th_clamp_min_
+ name: _th_clamp_min
cname: cmaxValue
variants:
- method
- function
return: argument 0
arguments:
- - THTensor* self
+ - arg: THTensor* result
+ output: True
- THTensor* self
- real min
]]
[[
- name: _th_clamp_max_
+ name: _th_clamp_max
cname: cminValue
variants:
- method
- function
return: argument 0
arguments:
- - THTensor* self
+ - arg: THTensor* result
+ output: True
- THTensor* self
- real max
]]
diff --git a/aten/src/ATen/native/UnaryOps.cpp b/aten/src/ATen/native/UnaryOps.cpp
index 2e349aa..2d8a3e2 100644
--- a/aten/src/ATen/native/UnaryOps.cpp
+++ b/aten/src/ATen/native/UnaryOps.cpp
@@ -47,11 +47,11 @@
Tensor& _clamp__cpu(Tensor& self, Scalar min, Scalar max) {
if (!std::isnan(min.toDouble()) && !std::isnan(max.toDouble())) {
- return _th_clamp_(self, min, max);
+ return _th_clamp_out(self, self, min, max);
} else if (std::isnan(min.toDouble())) {
- return _th_clamp_max_(self, max);
+ return _th_clamp_max_out(self, self, max);
} else if (std::isnan(max.toDouble())) {
- return _th_clamp_min_(self, min);
+ return _th_clamp_min_out(self, self, min);
} else {
return self;
}
@@ -62,36 +62,30 @@
const Tensor& self,
Scalar min,
Scalar max) {
- result.resize_(self.sizes());
- result.copy_(self);
if (!std::isnan(min.toDouble()) && !std::isnan(max.toDouble())) {
- _th_clamp_(result, min, max);
+ _th_clamp_out(result, self, min, max);
} else if (std::isnan(min.toDouble())) {
- _th_clamp_max_(result, max);
+ _th_clamp_max_out(result, self, max);
} else if (std::isnan(max.toDouble())) {
- _th_clamp_min_(result, min);
+ _th_clamp_min_out(result, self, min);
}
return result;
}
Tensor& _clamp_max__cpu(Tensor& self, Scalar max) {
- return _th_clamp_max_(self, max);
+ return _th_clamp_max_out(self, self, max);
}
Tensor& _clamp_max_out_cpu(Tensor& result, const Tensor& self, Scalar max) {
- result.resize_(self.sizes());
- result.copy_(self);
- return _th_clamp_max_(result, max);
+ return _th_clamp_max_out(result, self, max);
}
Tensor& _clamp_min__cpu(Tensor& self, Scalar min) {
- return _th_clamp_min_(self, min);
+ return _th_clamp_min_out(self, self, min);
}
Tensor& _clamp_min_out_cpu(Tensor& result, const Tensor& self, Scalar min) {
- result.resize_(self.sizes());
- result.copy_(self);
- return _th_clamp_min_(result, min);
+ return _th_clamp_min_out(result, self, min);
}
Tensor& fill_(Tensor& self, Scalar value) {
diff --git a/aten/src/ATen/native/cuda/CUDAUnaryOps.cpp b/aten/src/ATen/native/cuda/CUDAUnaryOps.cpp
index 1380039..b55d561 100644
--- a/aten/src/ATen/native/cuda/CUDAUnaryOps.cpp
+++ b/aten/src/ATen/native/cuda/CUDAUnaryOps.cpp
@@ -4,11 +4,11 @@
Tensor& _clamp__cuda(Tensor& self, Scalar min, Scalar max) {
if (!std::isnan(min.toDouble()) && !std::isnan(max.toDouble())) {
- return _th_clamp_(self, min, max);
+ return _th_clamp_out(self, self, min, max);
} else if (std::isnan(min.toDouble())) {
- return _th_clamp_max_(self, max);
+ return _th_clamp_max_out(self, self, max);
} else if (std::isnan(max.toDouble())) {
- return _th_clamp_min_(self, min);
+ return _th_clamp_min_out(self, self, min);
} else {
return self;
}
@@ -19,36 +19,30 @@
const Tensor& self,
Scalar min,
Scalar max) {
- result.resize_(self.sizes());
- result.copy_(self);
if (!std::isnan(min.toDouble()) && !std::isnan(max.toDouble())) {
- _th_clamp_(result, min, max);
+ _th_clamp_out(result, self, min, max);
} else if (std::isnan(min.toDouble())) {
- _th_clamp_max_(result, max);
+ _th_clamp_max_out(result, self, max);
} else if (std::isnan(max.toDouble())) {
- _th_clamp_min_(result, min);
+ _th_clamp_min_out(result, self, min);
}
return result;
}
Tensor& _clamp_max__cuda(Tensor& self, Scalar max) {
- return _th_clamp_max_(self, max);
+ return _th_clamp_max_out(self, self, max);
}
Tensor& _clamp_max_out_cuda(Tensor& result, const Tensor& self, Scalar max) {
- result.resize_(self.sizes());
- result.copy_(self);
- return _th_clamp_max_(result, max);
+ return _th_clamp_max_out(result, self, max);
}
Tensor& _clamp_min__cuda(Tensor& self, Scalar min) {
- return _th_clamp_min_(self, min);
+ return _th_clamp_min_out(self, self, min);
}
Tensor& _clamp_min_out_cuda(Tensor& result, const Tensor& self, Scalar min) {
- result.resize_(self.sizes());
- result.copy_(self);
- return _th_clamp_min_(result, min);
+ return _th_clamp_min_out(result, self, min);
}
// These are just forwarding stubs