caffe2/sgd/decay_adagrad_op.h - platform/external/pytorch - Git at Google

 #pragma once

 #include "caffe2/core/operator.h"
 #include "caffe2/utils/eigen_utils.h"

 namespace caffe2 {

 template <typename Context>
 void decay_adagrad_compute(
     int N,
     const float* w,
     const float* g,
     const float* m,
     const float* v,
     float* nw,
     float* nm,
     float* nv,
     float beta1,
     float beta2,
     float eps_hat,
     float weight_decay,
     float c,
     const float* lr,
     Context* /*context*/) {
     ConstEigenVectorArrayMap<float> w_arr(w, N);
     ConstEigenVectorArrayMap<float> g_arr(g, N);
     ConstEigenVectorArrayMap<float> m_arr(m, N);
     ConstEigenVectorArrayMap<float> v_arr(v, N);
     EigenVectorArrayMap<float> nw_arr(nw, N);
     EigenVectorArrayMap<float> nm_arr(nm, N);
     EigenVectorArrayMap<float> nv_arr(nv, N);
     nm_arr = m_arr * beta1 + g_arr * (1.0f - beta1);
     nv_arr = v_arr + g_arr.square();
     nw_arr = w_arr + *lr * (nm_arr / c / (nv_arr.sqrt() + eps_hat) + weight_decay * w_arr);
 }

 template <typename T, class Context>
 class DecayAdagradOp final : public Operator<Context> {
  public:
   USE_OPERATOR_CONTEXT_FUNCTIONS;
   DecayAdagradOp(const OperatorDef& operator_def, Workspace* ws)
       : Operator<Context>(operator_def, ws),
         beta1_(this->template GetSingleArgument<float>("beta1", 0.9f)),
         beta2_(this->template GetSingleArgument<float>("beta2", 0.999f)),
         epsilon_(this->template GetSingleArgument<float>("epsilon", 1e-5f)),
         weight_decay_(this->template GetSingleArgument<float>("weight_decay", 0.0f)),
         bias_correction_first_(this->template GetSingleArgument<bool>("bias_correction_first", true)) {}

   bool RunOnDevice() override {
     // Iter live on the CPU
     CAFFE_ENFORCE(OperatorBase::InputIsTensorType(ITER, CPU));
     CAFFE_ENFORCE(Input(LR).numel() == 1);
     CAFFE_ENFORCE(Input(GRAD).numel() == Input(PARAM).numel());
     CAFFE_ENFORCE(Input(GRAD).numel() == Input(MOMENT_1).numel());
     CAFFE_ENFORCE(Input(GRAD).numel() == Input(MOMENT_2).numel());
     Output(OUTPUT_PARAM)->ResizeLike(Input(PARAM));
     Output(OUTPUT_MOMENT_1)->ResizeLike(Input(MOMENT_1));
     Output(OUTPUT_MOMENT_2)->ResizeLike(Input(MOMENT_2));

     const auto iter =
         OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0];

     const auto t = iter + 1;
     const auto c = (bias_correction_first_)? (T(1.) - std::pow(beta1_, t)) : 1.0;
     decay_adagrad_compute<Context>(
         Input(GRAD).numel(),
         Input(PARAM).template data<T>(),
         Input(GRAD).template data<T>(),
         Input(MOMENT_1).template data<T>(),
         Input(MOMENT_2).template data<T>(),
         Output(OUTPUT_PARAM)->template mutable_data<T>(),
         Output(OUTPUT_MOMENT_1)->template mutable_data<T>(),
         Output(OUTPUT_MOMENT_2)->template mutable_data<T>(),
         beta1_,
         beta2_,
         epsilon_,
         weight_decay_,
         c,
         Input(LR).template data<T>(),
         &context_);

     return true;
   }

  protected:
   T beta1_{0.9};
   T beta2_{0.999};
   T epsilon_{1e-8};
   T weight_decay_{0.0};
   bool bias_correction_first_{true};
   INPUT_TAGS(PARAM, MOMENT_1, MOMENT_2, GRAD, LR, ITER);
   OUTPUT_TAGS(OUTPUT_PARAM, OUTPUT_MOMENT_1, OUTPUT_MOMENT_2);
 };

 } // namespace caffe2
	#pragma once

	#include "caffe2/core/operator.h"
	#include "caffe2/utils/eigen_utils.h"

	namespace caffe2 {

	template <typename Context>
	void decay_adagrad_compute(
	int N,
	const float* w,
	const float* g,
	const float* m,
	const float* v,
	float* nw,
	float* nm,
	float* nv,
	float beta1,
	float beta2,
	float eps_hat,
	float weight_decay,
	float c,
	const float* lr,
	Context* /context/) {
	ConstEigenVectorArrayMap<float> w_arr(w, N);
	ConstEigenVectorArrayMap<float> g_arr(g, N);
	ConstEigenVectorArrayMap<float> m_arr(m, N);
	ConstEigenVectorArrayMap<float> v_arr(v, N);
	EigenVectorArrayMap<float> nw_arr(nw, N);
	EigenVectorArrayMap<float> nm_arr(nm, N);
	EigenVectorArrayMap<float> nv_arr(nv, N);
	nm_arr = m_arr * beta1 + g_arr * (1.0f - beta1);
	nv_arr = v_arr + g_arr.square();
	nw_arr = w_arr + lr (nm_arr / c / (nv_arr.sqrt() + eps_hat) + weight_decay * w_arr);
	}

	template <typename T, class Context>
	class DecayAdagradOp final : public Operator<Context> {
	public:
	USE_OPERATOR_CONTEXT_FUNCTIONS;
	DecayAdagradOp(const OperatorDef& operator_def, Workspace* ws)
	: Operator<Context>(operator_def, ws),
	beta1_(this->template GetSingleArgument<float>("beta1", 0.9f)),
	beta2_(this->template GetSingleArgument<float>("beta2", 0.999f)),
	epsilon_(this->template GetSingleArgument<float>("epsilon", 1e-5f)),
	weight_decay_(this->template GetSingleArgument<float>("weight_decay", 0.0f)),
	bias_correction_first_(this->template GetSingleArgument<bool>("bias_correction_first", true)) {}

	bool RunOnDevice() override {
	// Iter live on the CPU
	CAFFE_ENFORCE(OperatorBase::InputIsTensorType(ITER, CPU));
	CAFFE_ENFORCE(Input(LR).numel() == 1);
	CAFFE_ENFORCE(Input(GRAD).numel() == Input(PARAM).numel());
	CAFFE_ENFORCE(Input(GRAD).numel() == Input(MOMENT_1).numel());
	CAFFE_ENFORCE(Input(GRAD).numel() == Input(MOMENT_2).numel());
	Output(OUTPUT_PARAM)->ResizeLike(Input(PARAM));
	Output(OUTPUT_MOMENT_1)->ResizeLike(Input(MOMENT_1));
	Output(OUTPUT_MOMENT_2)->ResizeLike(Input(MOMENT_2));

	const auto iter =
	OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0];

	const auto t = iter + 1;
	const auto c = (bias_correction_first_)? (T(1.) - std::pow(beta1_, t)) : 1.0;
	decay_adagrad_compute<Context>(
	Input(GRAD).numel(),
	Input(PARAM).template data<T>(),
	Input(GRAD).template data<T>(),
	Input(MOMENT_1).template data<T>(),
	Input(MOMENT_2).template data<T>(),
	Output(OUTPUT_PARAM)->template mutable_data<T>(),
	Output(OUTPUT_MOMENT_1)->template mutable_data<T>(),
	Output(OUTPUT_MOMENT_2)->template mutable_data<T>(),
	beta1_,
	beta2_,
	epsilon_,
	weight_decay_,
	c,
	Input(LR).template data<T>(),
	&context_);

	return true;
	}

	protected:
	T beta1_{0.9};
	T beta2_{0.999};
	T epsilon_{1e-8};
	T weight_decay_{0.0};
	bool bias_correction_first_{true};
	INPUT_TAGS(PARAM, MOMENT_1, MOMENT_2, GRAD, LR, ITER);
	OUTPUT_TAGS(OUTPUT_PARAM, OUTPUT_MOMENT_1, OUTPUT_MOMENT_2);
	};

	} // namespace caffe2