blob: 8ddda64137400d197f3d9069ddd2cdd916e964be [file] [log] [blame]
#include "caffe2/operators/selu_op.h"
#include "caffe2/utils/math.h"
namespace caffe2 {
template <>
bool SeluOp<float, CPUContext>::RunOnDevice() {
auto& X = Input(0);
auto* Y = Output(0);
Y->ResizeLike(X);
ConstEigenVectorArrayMap<float> Xvec(X.data<float>(), X.size());
EigenVectorArrayMap<float> Yvec(Y->mutable_data<float>(), Y->size());
Yvec = lambda_ * (Xvec > 0).select(Xvec, (alpha_ * Xvec.exp() - alpha_));
return true;
}
template <>
bool SeluGradientOp<float, CPUContext>::RunOnDevice() {
auto& Y = Input(0);
auto& dY = Input(1);
auto* dX = Output(0);
CAFFE_ENFORCE_EQ(dY.size(), Y.size());
dX->ResizeLike(Y);
ConstEigenVectorArrayMap<float> Yvec(Y.data<float>(), Y.size());
ConstEigenVectorArrayMap<float> dYvec(dY.data<float>(), dY.size());
EigenVectorArrayMap<float> dXvec(dX->mutable_data<float>(), dX->size());
const float la = lambda_ * alpha_;
dXvec = (Yvec > 0).select(lambda_ * dYvec, dYvec * (Yvec + la));
return true;
}
REGISTER_CPU_OPERATOR(Selu, SeluOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(SeluGradient, SeluGradientOp<float, CPUContext>);
// Input: X; output: Y
OPERATOR_SCHEMA(Selu)
.NumInputs(1)
.NumOutputs(1)
.AllowInplace({{0, 0}})
.IdenticalTypeAndShape()
.SetDoc(R"DOC(
Selu takes one input data (Tensor<T>) and produces one output data
(Tensor<T>) where the function, y = scale*(alpha_*e^x-alpha_ if x < 0 else x),
is applied to the tensor elementwise.
)DOC")
.Arg(
"alpha",
"(float) default to 1.6732~; affects the activation function itself. "
"This should go with the weight initialization in the paper. "
" See https://arxiv.org/abs/1706.02515 ")
.Arg(
"scale",
"(float) default to 1.0507~; affects the activation function itself.")
.Input(0, "X", "input tensor")
.Output(0, "Y", "input tensor")
.InheritOnnxSchema("Selu");
// Input: Y, dY; output: dX
OPERATOR_SCHEMA(SeluGradient)
.NumInputs(2)
.NumOutputs(1)
.AllowInplace({{1, 0}})
.SetDoc(R"DOC(
SeluGradient takes both Y and dY and uses this to update dX according to the
chain rule and derivatives of the selu function.
)DOC")
.Arg(
"alpha",
"(float) default to 1.6732~; affects the activation function itself."
"This should go with the weight initialization in the paper. "
" See https://arxiv.org/abs/1706.02515 ")
.Arg(
"scale",
"(float) default to 1.0507~; affects the activation function itself.")
.Input(0, "Y", "input tensor")
.Input(1, "dY", "input tensor");
class GetSeluGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
vector<OperatorDef> GetGradientDefs() override {
return SingleGradientDef(
def_.type() + "Gradient",
"",
vector<string>{O(0), GO(0)},
vector<string>{GI(0)});
}
};
REGISTER_GRADIENT(Selu, GetSeluGradient);
} // namespace caffe2