caffe2/operators/selu_op.cc - platform/external/pytorch - Git at Google

 #include "caffe2/operators/selu_op.h"

 #include "caffe2/utils/eigen_utils.h"
 #include "caffe2/utils/math.h"

 namespace caffe2 {

 template <>
 bool SeluOp<float, CPUContext>::RunOnDevice() {
   auto& X = Input(0);

   auto* Y = Output(0, X.sizes(), at::dtype<float>());

   ConstEigenVectorArrayMap<float> Xvec(X.data<float>(), X.numel());
   EigenVectorArrayMap<float> Yvec(
       Y->template mutable_data<float>(), Y->numel());
   Yvec = lambda_ * (Xvec > 0).select(Xvec, (alpha_ * Xvec.exp() - alpha_));
   return true;
 }

 template <>
 bool SeluGradientOp<float, CPUContext>::RunOnDevice() {
   auto& Y = Input(0);
   auto& dY = Input(1);

   CAFFE_ENFORCE_EQ(dY.numel(), Y.numel());
   auto* dX = Output(0, Y.sizes(), at::dtype<float>());

   ConstEigenVectorArrayMap<float> Yvec(Y.data<float>(), Y.numel());
   ConstEigenVectorArrayMap<float> dYvec(dY.data<float>(), dY.numel());
   EigenVectorArrayMap<float> dXvec(
       dX->template mutable_data<float>(), dX->numel());

   const float la = lambda_ * alpha_;
   dXvec = (Yvec > 0).select(lambda_ * dYvec, dYvec * (Yvec + la));
   return true;
 }

 REGISTER_CPU_OPERATOR(Selu, SeluOp<float, CPUContext>);
 REGISTER_CPU_OPERATOR(SeluGradient, SeluGradientOp<float, CPUContext>);

 // Input: X; output: Y
 OPERATOR_SCHEMA(Selu)
     .NumInputs(1)
     .NumOutputs(1)
     .AllowInplace({{0, 0}})
     .IdenticalTypeAndShape()
     .SetDoc(R"DOC(

 The *Selu* op takes one input tensor $X$, an argument $alpha$, an argument $scale$, and produces one output tensor $Y$ of the same shape as $X.$ The op performs the element wise *Selu* operation, defined as

 $$y=selu(x) =\begin{cases}scale (\alpha e^{x} - \alpha) & x < 0\\scale * x & otherwise\end{cases}$$

 The default value of *alpha* is 1.6732632423543772848170429916717 and the default value of *scale* is 1.0507009873554804934193349852946. See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) for more information.

 Github Links:

 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/selu_op.h
 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/selu_op.cc


 <details>

 <summary> <b>Example</b> </summary>

 **Code**

 ```

 workspace.ResetWorkspace()

 op = core.CreateOperator(
     "Selu",
     ["X"],
     ["Y"],
 )

 workspace.FeedBlob("X", np.random.randn(3, 3).astype(np.float32))
 print("X:\n", workspace.FetchBlob("X"), "\n")

 workspace.RunOperatorOnce(op)
 print("Y:\n", workspace.FetchBlob("Y"))

 ```

 **Result**

 ```

 X:
  [[ 1.1613879  -0.27111396 -1.2076733 ]
  [ 1.3442237  -1.0701777   1.2070968 ]
  [ 0.23810555  0.9740916  -1.7872391 ]]

 Y:
  [[ 1.2202715  -0.4174965  -1.2326177 ]
  [ 1.4123772  -1.1551634   1.2682979 ]
  [ 0.25017774  1.023479   -1.4637551 ]]

 ```

 </details>

 )DOC")
     .Arg(
         "alpha",
         "*(type: float; default: 1.673263~)* Alpha constant in equation.")
     .Arg(
         "scale",
         "*(type: float; default: 1.050700~; must be > 1.0)* Scale constant in equation.")
     .Input(0, "X", "Input tensor of data to be operated on.")
     .Output(0, "Y", "Output tensor with same shape as input.")
     .InheritOnnxSchema();

 // Input: Y, dY; output: dX
 OPERATOR_SCHEMA(SeluGradient)
     .NumInputs(2)
     .NumOutputs(1)
     .AllowInplace({{1, 0}})
     .SetDoc(R"DOC(
 SeluGradient takes both Y and dY and uses this to update dX according to the
 chain rule and derivatives of the selu function.
 )DOC")
     .Arg(
         "alpha",
         "(float) default to 1.6732~; affects the activation function itself."
         "This should go with the weight initialization in the paper. "
         " See https://arxiv.org/abs/1706.02515 ")
     .Arg(
         "scale",
         "(float) default to 1.0507~; affects the activation function itself.")
     .Input(0, "Y", "input tensor")
     .Input(1, "dY", "input tensor");

 class GetSeluGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         def_.type() + "Gradient",
         "",
         vector<string>{O(0), GO(0)},
         vector<string>{GI(0)});
   }
 };
 REGISTER_GRADIENT(Selu, GetSeluGradient);

 } // namespace caffe2
	#include "caffe2/operators/selu_op.h"

	#include "caffe2/utils/eigen_utils.h"
	#include "caffe2/utils/math.h"

	namespace caffe2 {

	template <>
	bool SeluOp<float, CPUContext>::RunOnDevice() {
	auto& X = Input(0);

	auto* Y = Output(0, X.sizes(), at::dtype<float>());

	ConstEigenVectorArrayMap<float> Xvec(X.data<float>(), X.numel());
	EigenVectorArrayMap<float> Yvec(
	Y->template mutable_data<float>(), Y->numel());
	Yvec = lambda_ * (Xvec > 0).select(Xvec, (alpha_ * Xvec.exp() - alpha_));
	return true;
	}

	template <>
	bool SeluGradientOp<float, CPUContext>::RunOnDevice() {
	auto& Y = Input(0);
	auto& dY = Input(1);

	CAFFE_ENFORCE_EQ(dY.numel(), Y.numel());
	auto* dX = Output(0, Y.sizes(), at::dtype<float>());

	ConstEigenVectorArrayMap<float> Yvec(Y.data<float>(), Y.numel());
	ConstEigenVectorArrayMap<float> dYvec(dY.data<float>(), dY.numel());
	EigenVectorArrayMap<float> dXvec(
	dX->template mutable_data<float>(), dX->numel());

	const float la = lambda_ * alpha_;
	dXvec = (Yvec > 0).select(lambda_ * dYvec, dYvec * (Yvec + la));
	return true;
	}

	REGISTER_CPU_OPERATOR(Selu, SeluOp<float, CPUContext>);
	REGISTER_CPU_OPERATOR(SeluGradient, SeluGradientOp<float, CPUContext>);

	// Input: X; output: Y
	OPERATOR_SCHEMA(Selu)
	.NumInputs(1)
	.NumOutputs(1)
	.AllowInplace({{0, 0}})
	.IdenticalTypeAndShape()
	.SetDoc(R"DOC(

	The Selu op takes one input tensor $X$, an argument $alpha$, an argument $scale$, and produces one output tensor $Y$ of the same shape as $X.$ The op performs the element wise Selu operation, defined as

	$$y=selu(x) =\begin{cases}scale (\alpha e^{x} - \alpha) & x < 0\\scale * x & otherwise\end{cases}$$

	The default value of alpha is 1.6732632423543772848170429916717 and the default value of scale is 1.0507009873554804934193349852946. See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) for more information.

	Github Links:

	- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/selu_op.h
	- https://github.com/pytorch/pytorch/blob/master/caffe2/operators/selu_op.cc


	<details>

	<summary> <b>Example</b> </summary>

	Code

	```

	workspace.ResetWorkspace()

	op = core.CreateOperator(
	"Selu",
	["X"],
	["Y"],
	)

	workspace.FeedBlob("X", np.random.randn(3, 3).astype(np.float32))
	print("X:\n", workspace.FetchBlob("X"), "\n")

	workspace.RunOperatorOnce(op)
	print("Y:\n", workspace.FetchBlob("Y"))

	```

	Result

	```

	X:
	[[ 1.1613879 -0.27111396 -1.2076733 ]
	[ 1.3442237 -1.0701777 1.2070968 ]
	[ 0.23810555 0.9740916 -1.7872391 ]]

	Y:
	[[ 1.2202715 -0.4174965 -1.2326177 ]
	[ 1.4123772 -1.1551634 1.2682979 ]
	[ 0.25017774 1.023479 -1.4637551 ]]

	```

	</details>

	)DOC")
	.Arg(
	"alpha",
	"(type: float; default: 1.673263~) Alpha constant in equation.")
	.Arg(
	"scale",
	"(type: float; default: 1.050700~; must be > 1.0) Scale constant in equation.")
	.Input(0, "X", "Input tensor of data to be operated on.")
	.Output(0, "Y", "Output tensor with same shape as input.")
	.InheritOnnxSchema();

	// Input: Y, dY; output: dX
	OPERATOR_SCHEMA(SeluGradient)
	.NumInputs(2)
	.NumOutputs(1)
	.AllowInplace({{1, 0}})
	.SetDoc(R"DOC(
	SeluGradient takes both Y and dY and uses this to update dX according to the
	chain rule and derivatives of the selu function.
	)DOC")
	.Arg(
	"alpha",
	"(float) default to 1.6732~; affects the activation function itself."
	"This should go with the weight initialization in the paper. "
	" See https://arxiv.org/abs/1706.02515 ")
	.Arg(
	"scale",
	"(float) default to 1.0507~; affects the activation function itself.")
	.Input(0, "Y", "input tensor")
	.Input(1, "dY", "input tensor");

	class GetSeluGradient : public GradientMakerBase {
	using GradientMakerBase::GradientMakerBase;
	vector<OperatorDef> GetGradientDefs() override {
	return SingleGradientDef(
	def_.type() + "Gradient",
	"",
	vector<string>{O(0), GO(0)},
	vector<string>{GI(0)});
	}
	};
	REGISTER_GRADIENT(Selu, GetSeluGradient);

	} // namespace caffe2