| from .optimizer import Optimizer |
| |
| class Adadelta(Optimizer): |
| |
| def __init__(self, params, rho=0.9, eps=1e-6, weight_decay=0): |
| defaults = dict(rho=rho, eps=eps, weight_decay=weight_decay) |
| super(Adadelta, self).__init__(params, defaults) |
| |
| def step(self, closure=None): |
| loss = None |
| if closure is not None: |
| loss = closure() |
| |
| for group in self.param_groups: |
| for p in group['params']: |
| grad = p.grad |
| state = self.state[id(p)] |
| |
| # State initialization |
| if len(state) == 0: |
| state['step'] = 0 |
| state['square_avg'] = grad.new().resize_as_(grad).zero_() |
| state['acc_delta'] = grad.new().resize_as_(grad).zero_() |
| |
| square_avg, acc_delta = state['square_avg'], state['acc_delta'] |
| rho, eps = group['rho'], group['eps'] |
| |
| state['step'] += 1 |
| |
| if group['weight_decay'] != 0: |
| grad = grad.add(group['weight_decay'], p.data) |
| |
| square_avg.mul_(rho).addcmul_(1 - rho, grad, grad) |
| std = square_avg.add(eps).sqrt_() |
| delta = acc_delta.add(eps).sqrt_().div_(std).mul_(grad) |
| p.data.sub_(delta) |
| acc_delta.mul_(rho).addcmul_(1 - rho, delta, delta) |
| |
| return loss |
| |