Some fixes to smooth_l1_loss (#45532)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/45532
- updated documentation
- explicitly not supporting negative values for beta (previously the
result was incorrect)
- Removing default value for beta in the backwards function, since it's
only used internally by autograd (as per convention)
Test Plan: Imported from OSS
Reviewed By: gchanan
Differential Revision: D24002415
Pulled By: bdhirsh
fbshipit-source-id: 980c141019ec2d437b771ee11fc1cec4b1fcfb48
diff --git a/aten/src/ATen/native/Loss.cpp b/aten/src/ATen/native/Loss.cpp
index 3563a74..2a3e97c 100644
--- a/aten/src/ATen/native/Loss.cpp
+++ b/aten/src/ATen/native/Loss.cpp
@@ -296,8 +296,10 @@
}
Tensor smooth_l1_loss(const Tensor& input, const Tensor& target, const int64_t reduction, double beta) {
- if (beta <= 0)
+ TORCH_CHECK(beta >= 0, "smooth_l1_loss does not support negative values for beta.")
+ if (beta == 0) {
return at::native::l1_loss(input, target, reduction);
+ }
Tensor loss;
auto iter = TensorIterator::binary_op(loss, input, target);
smooth_l1_stub(iter.device_type(), iter, beta);
@@ -305,8 +307,10 @@
}
Tensor& smooth_l1_loss_out(Tensor& result, const Tensor& input, const Tensor& target, int64_t reduction, double beta) {
- if (beta <= 0)
+ TORCH_CHECK(beta >= 0, "smooth_l1_loss does not support negative values for beta.")
+ if (beta == 0) {
return at::native::l1_loss_out(result, input, target, reduction);
+ }
if (reduction != Reduction::None) {
Tensor loss;
auto iter = TensorIterator::binary_op(loss, input, target);
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
index 43df603..2aa5cbf 100644
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@@ -6794,13 +6794,13 @@
dispatch:
CPU, CUDA: smooth_l1_loss
-- func: smooth_l1_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta=1.0, *, Tensor(a!) grad_input) -> Tensor(a!)
+- func: smooth_l1_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
CPU: smooth_l1_loss_backward_out
CUDA: smooth_l1_loss_backward_out
-- func: smooth_l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta=1.0) -> Tensor
+- func: smooth_l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta) -> Tensor
use_c10_dispatcher: full
python_module: nn
diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml
index 707baa3..92ee277 100644
--- a/tools/autograd/derivatives.yaml
+++ b/tools/autograd/derivatives.yaml
@@ -1589,7 +1589,7 @@
grad_output: replication_pad3d(grad, padding)
self: zeros_like(self)
-- name: smooth_l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta=1.0) -> Tensor
+- name: smooth_l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta) -> Tensor
grad_output: smooth_l1_loss_double_backward_grad_output(grad, grad_output, self, target, reduction, beta)
self: smooth_l1_loss_double_backward(grad * grad_output, self, target, reduction, beta)
target: -smooth_l1_loss_double_backward(grad * grad_output, self, target, reduction, beta)
diff --git a/torch/nn/modules/loss.py b/torch/nn/modules/loss.py
index bd3be4e..e408bde 100644
--- a/torch/nn/modules/loss.py
+++ b/torch/nn/modules/loss.py
@@ -758,7 +758,7 @@
class SmoothL1Loss(_Loss):
r"""Creates a criterion that uses a squared term if the absolute
- element-wise error falls below 1 and an L1 term otherwise.
+ element-wise error falls below beta and an L1 term otherwise.
It is less sensitive to outliers than the `MSELoss` and in some cases
prevents exploding gradients (e.g. see `Fast R-CNN` paper by Ross Girshick).
Also known as the Huber loss:
@@ -780,6 +780,9 @@
beta is an optional parameter that defaults to 1.
+ Note: When beta is set to 0, this is equivalent to we call out directly to :class:`L1Loss`.
+ Passing a negative value in for beta will result in an exception.
+
The division by :math:`n` can be avoided if sets ``reduction = 'sum'``.
Args: