blob: d78418d64190d3b42861e54d53046c951704c845 [file] [log] [blame]
#include "caffe2/operators/elu_op.h"
#include <algorithm>
#include <functional>
#include <string>
#include "caffe2/utils/eigen_utils.h"
namespace caffe2 {
template <>
template <typename T>
bool EluFunctor<CPUContext>::
operator()(const int N, const T* X, T* Y, CPUContext* /* context */) const {
ConstEigenVectorArrayMap<T> X_arr(X, N);
EigenVectorMap<T>(Y, N) =
(X_arr < 0).select(alpha * (X_arr.exp() - T(1)), X_arr);
return true;
}
template <>
template <typename T>
bool EluGradientFunctor<CPUContext>::Forward(
const std::vector<int>& Y_dims,
const std::vector<int>& /* dY_dims */,
const T* Y,
const T* dY,
T* dX,
CPUContext* /* context */) const {
const int size = std::accumulate(
Y_dims.cbegin(), Y_dims.cend(), 1, std::multiplies<int>());
ConstEigenVectorArrayMap<T> Y_arr(Y, size);
ConstEigenVectorArrayMap<T> dY_arr(dY, size);
EigenVectorArrayMap<T>(dX, size) =
(Y_arr < 0).select(dY_arr * (Y_arr + alpha), dY_arr);
return true;
}
REGISTER_CPU_OPERATOR(
Elu,
UnaryElementwiseWithArgsOp<
TensorTypes<float>,
CPUContext,
EluFunctor<CPUContext>>);
REGISTER_CPU_GRADIENT_OPERATOR(
EluGradient,
BinaryElementwiseWithArgsOp<
TensorTypes<float>,
CPUContext,
EluGradientFunctor<CPUContext>>);
// Input: X, output: Y
OPERATOR_SCHEMA(Elu)
.NumInputs(1)
.NumOutputs(1)
.AllowInplace({{0, 0}})
.IdenticalTypeAndShape()
.SetDoc(R"DOC(
This op implements the exponential linear unit (ELU) activation function as described in [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)](https://arxiv.org/abs/1511.07289). The op takes an input tensor $X$ of arbitrary shape, computes the elementwise elu operation, and returns a vector $Y$ of the same shape as output. The alpha parameter may be passed as an argument, but defaults to 1. The elu operation is defined as
$$y=f(x) =\begin{cases}\alpha(e^x-1) & x < 0 \\ x & otherwise\end{cases}$$
Github Links:
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/elu_op.h
- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/elu_op.cc
<details>
<summary> <b>Example</b> </summary>
**Code**
```
workspace.ResetWorkspace()
op = core.CreateOperator(
"Elu",
["X"],
["Y"],
alpha=1.1
)
workspace.FeedBlob("X", np.random.randn(3, 3).astype(np.float32))
print("X:\n", workspace.FetchBlob("X"), "\n")
workspace.RunOperatorOnce(op)
print("Y:\n", workspace.FetchBlob("Y"))
```
**Result**
```
X:
[[ 0.35339102 1.1860217 -0.10710736]
[-3.1173866 -0.1889988 -0.20330353]
[ 1.8525308 -0.368949 0.506277 ]]
Y:
[[ 0.35339102 1.1860217 -0.11172786]
[-1.0513 -0.18943374 -0.20236646]
[ 1.8525308 -0.33939326 0.506277 ]]
```
</details>
)DOC")
.Input(0, "X", "1D input tensor of data to be operated on.")
.Output(0, "Y", "1D input tensor, calculated as described above.")
.Arg(
"alpha",
"*(type: float; default: 1.0)* Defines alpha parameter used in calculation.")
.InheritOnnxSchema();
// Input: Y, dY, output: dX
GRADIENT_OPERATOR_SCHEMA(EluGradient)
.NumInputs(2)
.NumOutputs(1)
.AllowInplace({{1, 0}})
.SetDoc(R"DOC(
EluGradient takes both Y and dY and uses this to update dX according to the
chain rule and derivatives of the rectified linear function.
)DOC");
namespace {
class GetEluGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
std::vector<OperatorDef> GetGradientDefs() override {
return SingleGradientDef(
def_.type() + "Gradient",
"",
std::vector<std::string>{O(0), GO(0)},
std::vector<std::string>{GI(0)});
}
};
} // namespace
REGISTER_GRADIENT(Elu, GetEluGradient);
} // namespace caffe2