| #include "caffe2/sgd/learning_rate_op.h" | 
 |  | 
 | namespace caffe2 { | 
 | REGISTER_CPU_OPERATOR(LearningRate, LearningRateOp<float, CPUContext>); | 
 |  | 
 | OPERATOR_SCHEMA(LearningRate) | 
 |     .NumInputs(1) | 
 |     .NumOutputs(1) | 
 |     .TensorInferenceFunction([](const OperatorDef&, | 
 |                                 const vector<TensorShape>& in) { | 
 |       vector<TensorShape> out(1); | 
 |       out[0] = in[0]; | 
 |       return out; | 
 |     }) | 
 |     .SetDoc(R"DOC( | 
 | Learning rate is a decreasing function of time. With low learning rates the | 
 | improvements will be linear. With high learning rates they will start to look | 
 | more exponential. Learning rate is controlled by the following arguments: | 
 |  | 
 |  | 
 | Required: | 
 |  `iterations` | 
 |  `base_lr`: base learning rate | 
 |  `policy`: this controls how the learning rate is applied, options are: | 
 |    `fixed` | 
 |    `step`: uses `stepsize`, `gamma` | 
 |    `exp`: uses `gamma` | 
 |    `gate`: uses 'multiplier_1', 'multiplier_2', `num_iter`` | 
 |    `inv`: uses `gamma`, `power` | 
 |    `linearWarmup`: uses `start_multiplier`, `num_iter` | 
 |    `constantWarmup`: uses `multiplier`, `num_iter` | 
 |    `alter`: uses  `active_first`, `active_period`, `inactive_period` | 
 |    `hill`: uses those in both `linearWarmup` and `inv`, plus `end_multiplier` | 
 |    `composite`: uses `sub_policy_num_iters` and additional args with format | 
 |    `cyclic`: uses `max_lr`, `stepsize` | 
 |    `cosine`: uses `min_lr`, `max_lr`, `period`, `t_mult`, `lr_shrink` | 
 |    `constantThenLinearWarmup`: uses `start_warmup_multiplier`, `constant_warmup_num_iter`, `linear_warmup_num_iter` | 
 |    `compositeCyclical`: uses `start_warmup_multiplier`, `constant_warmup_num_iter`, `linear_warmup_num_iter`, `cyclical_max_lr`, `cyclical_step_size`, `cyclical_decay` | 
 |    `compositeCosine`: uses `start_warmup_multiplier`, `constant_warmup_num_iter`, `linear_warmup_num_iter`, `cosine_max_lr`, `cosine_period`, `cosine_t_mult`, `cosine_lr_shrink` | 
 |    sub_policy_{sub_policy_index}_{sub_policy_arg}, for example: | 
 |    sub_policy_0_policy: "exp", sub_policy_0_gamma: 0.99, | 
 |    sub_policy_0_lr_scale: 1.2 | 
 |    sub_policy_0_policy: "fixed", sub_policy_0_lr_scale: 1.0 | 
 |    sub_policy_num_iters: [1000, 1000] | 
 |  | 
 | Optional: | 
 |   `stepsize`: defaults to 0 | 
 |   `max_lr`: defaults to 0.005 | 
 |   `gamma`: defaults to 0 | 
 |   `power`: defaults to 0 | 
 |   `num_iter`: defaults to 0 | 
 |   `start_multiplier`: defaults to 0 | 
 |   `multiplier`: defaults to 0.5 | 
 |   `multiplier_1`: defaults to 1 | 
 |   `multiplier_2`: defaults to 1 | 
 |   `m1`: defaults to 0.5, the first piece lr of piece warmup | 
 |   `n1`: defaults to 0, iter threshold of the first piece lr | 
 |   `m2`: defaults to 0.5, the second piece lr of piece warmup | 
 |   `n2`: defaults to 0, iter threshold of the second piece lr | 
 |   `m3`: defaults to 0.5, the third piece lr of piece warmup | 
 |   `start_warmup_multiplier`: defaults to 0.1, part of constantThenLinearWarmup | 
 |   `constant_warmup_num_iter`: defaults to 10000000, part of constantThenLinearWarmup and constantThenLinearWarmup | 
 |   `linear_warmup_num_iter`: defaults to 10000000, part of constantThenLinearWarmup, CompositeCyclicalLRPolicy, CompositeCosineLRPolicy | 
 |   `cyclical_max_lr`: defaults to 0.05, part of CompositeCyclicalLRPolicy | 
 |   `cyclical_step_size`: defaults to 1000000, part of CompositeCyclicalLRPolicy | 
 |   `cyclical_decay`: defaults to 1.0, part of CompositeCyclicalLRPolicy | 
 |   `cosine_min_lr`:defaults to 0.01, part of CompositeCosineLRPolicy | 
 |   `cosine_max_lr`:defaults to 0.05, part of CompositeCosineLRPolicy | 
 |   `cosine_period`:defaults to 50, part of CompositeCosineLRPolicy | 
 |   `cosine_t_mult`:defaults to 1.0, part of CompositeCosineLRPolicy | 
 |   `cosine_lr_shrink`:defaults to 0.99, part of CompositeCosineLRPolicy | 
 |  | 
 | Usage: | 
 |   train_net.LearningRate(*iterations*, "*label*", base_lr=*float*, | 
 |                          policy="policy_name", stepsize=*int*, gamma=*float*) | 
 |  | 
 |  | 
 | Example usage: | 
 |   train_net.LearningRate(200, "LR", base_lr=-0.1, | 
 |                          policy="step", stepsize=20, gamma=0.9) | 
 | )DOC") | 
 |     .Arg("base_lr", "(float, required) base learning rate") | 
 |     .Arg("policy", "(float, default 1.0) strategy for gamma enforcement") | 
 |     .Arg("power", "(float, default 1.0) used only for inv policy type") | 
 |     .Arg("gamma", "(float, default 1.0) momentum of change") | 
 |     .Arg("stepsize", "(float, default 1.0) sampling rate on iterations") | 
 |     .Arg("max_lr", "(float, default 0.005) max learning rate") | 
 |     .Arg("active_first", "(boolean, default True) in alter policy") | 
 |     .Arg("active_period", "(int64_t, required) in alter policy") | 
 |     .Arg("inactive_period", "(int64_t, required) in alter policy") | 
 |     .Arg( | 
 |         "max_iter", | 
 |         "(int, default -1) maximum iterations in this training run") | 
 |     .Arg( | 
 |         "num_iter", | 
 |         "(int, default 0) number of iterations over which to warmup lr") | 
 |     .Arg( | 
 |         "start_multiplier", | 
 |         "(float, default 0) starting multiplier for learning rate") | 
 |     .Arg( | 
 |         "end_multiplier", | 
 |         "(float, default 0) end multiplier for learning rate") | 
 |     .Arg( | 
 |         "multiplier", | 
 |         "(float, default 0.5) constant multiplier for learning rate") | 
 |     .Arg( | 
 |         "multiplier_1", | 
 |         "(float, default 1) start multiplier for learning rate") | 
 |     .Arg("multiplier_2", "(float, default 1) end multiplier for learning rate") | 
 |     .Arg( | 
 |         "sub_policy_num_iters", | 
 |         "(int array, default empty) number of iterations for each sub learning rate policy in composite policy") | 
 |     .Arg("m1", "") | 
 |     .Arg("n1", "") | 
 |     .Arg("m2", "") | 
 |     .Arg("n2", "") | 
 |     .Arg("m3", "") | 
 |     .Arg("start_warmup_multiplier", "defaults to 0.1") | 
 |     .Arg("constant_warmup_num_iter", "defaults to 10000000") | 
 |     .Arg("linear_warmup_num_iter", "defaults to 10000000") | 
 |     .Arg( | 
 |         "cyclical_max_lr", | 
 |         "defaults to 0.05, part of CompositeCyclicalLRPolicy") | 
 |     .Arg( | 
 |         "cyclical_step_size", | 
 |         "defaults to 1000000, part of CompositeCyclicalLRPolicy") | 
 |     .Arg( | 
 |         "cyclical_decay", | 
 |         "defaults to 0.999, part of CompositeCyclicalLRPolicy") | 
 |     .Arg("cosine_min_lr", "defaults to 0.01, part of CompositeCosineLRPolicy") | 
 |     .Arg("cosine_max_lr", "defaults to 0.05, part of CompositeCosineLRPolicy") | 
 |     .Arg("cosine_period", "defaults to 50, part of CompositeCosineLRPolicy") | 
 |     .Arg("cosine_t_mult", "defaults to 1,0, part of CompositeCosineLRPolicy") | 
 |     .Arg( | 
 |         "cosine_lr_shrink", | 
 |         "defaults to 0.99, part of CompositeCosineLRPolicy") | 
 |     .Arg( | 
 |          "num_iter_1", | 
 |         "(int, default 0) number of iterations over which to warmup for slope policy") | 
 |     .Arg( | 
 |          "num_iter_2", | 
 |         "(int, default 0) number of iterations over which to gradually gate for slope policy") | 
 |     .Input(0, "input", "description needed") | 
 |     .Output(0, "output", "description needed") | 
 |     .DeviceInferenceFunction([](const OperatorDef& def) { | 
 |       return std::make_pair( | 
 |           std::vector<DeviceOption>{DeviceOption()}, | 
 |           std::vector<DeviceOption>{def.device_option()}); | 
 |     }); | 
 |  | 
 | NO_GRADIENT(LearningRate); | 
 | } // namespace caffe2 | 
 |  | 
 | using LearningRateOpFloatCPU = | 
 |     caffe2::LearningRateOp<float, caffe2::CPUContext>; | 
 |  | 
 | C10_EXPORT_CAFFE2_OP_TO_C10_CPU( | 
 |     LearningRate, | 
 |     "_caffe2::LearningRate(" | 
 |     "Tensor iterations, " | 
 |     "float base_lr," | 
 |     "str policy, " | 
 |     "float? power = 1.0, " | 
 |     "float? gamma = 1.0, " | 
 |     "int? stepsize = 1, " | 
 |     "float? max_lr = 0.005, " | 
 |     "bool? active_first = True, " | 
 |     "int? active_period = -1, " | 
 |     "int? inactive_period = -1, " | 
 |     "int? max_iter = -1, " | 
 |     "int? num_iter = 0, " | 
 |     "float? start_multiplier = 0, " | 
 |     "float? end_multiplier = 0, " | 
 |     "float? multiplier = 0.5, " | 
 |     "float? multiplier_1 = 1.0, " | 
 |     "float? multiplier_2 = 1.0, " | 
 |     "int[]? sub_policy_num_iters = None, " | 
 |     "float? m1 = 0.5, " | 
 |     "float? n1 = 0, " | 
 |     "float? m2 = 0.5, " | 
 |     "float? n2 = 0, " | 
 |     "float? m3 = 0.5, " | 
 |     "float? start_warmup_multiplier = 0.1, " | 
 |     "int? constant_warmup_num_iter = 10000000, " | 
 |     "int? linear_warmup_num_iter = 10000000, " | 
 |     "float? cyclical_max_lr = 0.05, " | 
 |     "int? cyclical_step_size = 1000000, " | 
 |     "float? cyclical_decay = 0.999, " | 
 |     "float? cosine_min_lr = 0.01, " | 
 |     "float? cosine_max_lr = 0.05, " | 
 |     "int? cosine_period = 50, " | 
 |     "float? cosine_t_mult = 1.0, " | 
 |     "float? cosine_lr_shrink = 0.99, " | 
 |     "float? decay = 1.0, " | 
 |     "int? num_iter_1 = 0, " | 
 |     "int? num_iter_2 = 0) -> Tensor output", | 
 |     LearningRateOpFloatCPU); |