Implementation of cyclical learning rate (#23914) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/23914 Implementation of cyclical learning rate, see https://arxiv.org/pdf/1506.01186.pdf Test Plan: canary: https://fburl.com/fblearner/siqb34md Reviewed By: chenshouyuan Differential Revision: D16632831 fbshipit-source-id: 20bd9d7fb61af5a8b594b039c5d434a0cc96fadc

commit: 3779893d1dabfc410ce2c2c1935c3eb3715ecdf1 [log] [tgz]
author: Andrew Li <lia4@fb.com> Tue Aug 27 10:42:56 2019 -0700
committer: Facebook Github Bot <facebook-github-bot@users.noreply.github.com> Tue Aug 27 10:44:16 2019 -0700
tree: 843a8c62474edd2943287df152d2a14c0c7bb313
parent: c351a68f5bd5bb77c28081b71abd60c5433b9f7c [diff]
diff --git a/caffe2/sgd/learning_rate_functors.h b/caffe2/sgd/learning_rate_functors.h
index 538c544..aec2435 100644
--- a/caffe2/sgd/learning_rate_functors.h
+++ b/caffe2/sgd/learning_rate_functors.h

@@ -250,6 +250,24 @@
   std::map<int64_t, std::unique_ptr<LearningRateFunctor<T>>> sub_policies_;
 };
 
+// Cyclical: return a learning rate with period 2 * stepsize and
+// lower bound base_lr, upper bound max_lr.
+// See https://arxiv.org/pdf/1506.01186.pdf
+template <typename T>
+class CyclicalLearningRate : public LearningRateFunctor<T> {
+ public:
+  CyclicalLearningRate(const T base_lr, const T max_lr, const int stepsize)
+      : base_lr_(base_lr), max_lr_(max_lr), stepsize_(stepsize) {}
+  T operator()(const int64_t iter) const override {
+    int cycle = static_cast<int>((iter / (2 * stepsize_)) + 1);
+    T x = abs(static_cast<T>(iter) / stepsize_ - 2 * cycle + 1);
+    return (1 + (T(max_lr_) / T(base_lr_) - 1) * std::max(T(0.0), (1 - x)));
+  }
+  T base_lr_;
+  T max_lr_;
+  int stepsize_;
+};
+
 } // namespace caffe2
 
 #endif // CAFFE2_SGD_LEARNING_RATE_FUNCTORS_H_

diff --git a/caffe2/sgd/learning_rate_op.cc b/caffe2/sgd/learning_rate_op.cc
index 2072b80..aa49715 100644
--- a/caffe2/sgd/learning_rate_op.cc
+++ b/caffe2/sgd/learning_rate_op.cc

@@ -32,6 +32,7 @@
    `alter`: uses  `active_first`, `active_period`, `inactive_period`
    `hill`: uses those in both `linearWarmup` and `inv`, plus `end_multiplier`
    `composite`: uses `sub_policy_num_iters` and additional args with format
+   `cyclic`: uses `max_lr`, `stepsize`
    sub_policy_{sub_policy_index}_{sub_policy_arg}, for example:
    sub_policy_0_policy: "exp", sub_policy_0_gamma: 0.99,
    sub_policy_0_lr_scale: 1.2
@@ -40,6 +41,7 @@
 
 Optional:
   `stepsize`: defaults to 0
+  `max_lr`: defaults to 0.005
   `gamma`: defaults to 0
   `power`: defaults to 0
   `num_iter`: defaults to 0
@@ -68,6 +70,7 @@
     .Arg("power", "(float, default 1.0) used only for inv policy type")
     .Arg("gamma", "(float, default 1.0) momentum of change")
     .Arg("stepsize", "(float, default 1.0) sampling rate on iterations")
+    .Arg("max_lr", "(float, default 0.005) max learning rate")
     .Arg("active_first", "(boolean, default True) in alter policy")
     .Arg("active_period", "(int64_t, required) in alter policy")
     .Arg("inactive_period", "(int64_t, required) in alter policy")

diff --git a/caffe2/sgd/learning_rate_op.h b/caffe2/sgd/learning_rate_op.h
index b355ed9..8a57a31 100644
--- a/caffe2/sgd/learning_rate_op.h
+++ b/caffe2/sgd/learning_rate_op.h

@@ -171,6 +171,14 @@
             createLearningRateFunctor(sub_policy, sub_policy_arg_prefix_str)));
       }
       return new CompositeLearningRate<T>(sub_policies);
+    } else if (policy == "cyclical") {
+      T max_lr =
+          this->template GetSingleArgument<float>(arg_prefix + "max_lr", 0.005);
+      int stepsize =
+          this->template GetSingleArgument<int>(arg_prefix + "stepsize", 0);
+      DCHECK_GT(stepsize, 0);
+      DCHECK_GE(max_lr, base_lr_);
+      return new CyclicalLearningRate<T>(base_lr_, max_lr, stepsize);
     } else {
       CAFFE_THROW("Unknown learning rate policy: ", policy);
       return NULL;
commit	3779893d1dabfc410ce2c2c1935c3eb3715ecdf1	[log] [tgz]
author	Andrew Li <lia4@fb.com>	Tue Aug 27 10:42:56 2019 -0700
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>	Tue Aug 27 10:44:16 2019 -0700
tree	843a8c62474edd2943287df152d2a14c0c7bb313
parent	c351a68f5bd5bb77c28081b71abd60c5433b9f7c [diff]