torch/legacy/optim/rprop.py - platform/external/pytorch - Git at Google

 import torch

 def rprop(opfunc, x, config, state=None):
     """ A plain implementation of RPROP

     ARGS:
     - `opfunc` : a function that takes a single input (X), the point of
                 evaluation, and returns f(X) and df/dX
     - `x`      : the initial point
     - `state`  : a table describing the state of the optimizer; after each
                 call the state is modified
     - `state['stepsize']`    : initial step size, common to all components
     - `state['etaplus']`     : multiplicative increase factor, > 1 (default 1.2)
     - `state['etaminus']`    : multiplicative decrease factor, < 1 (default 0.5)
     - `state['stepsizemax']` : maximum stepsize allowed (default 50)
     - `state['stepsizemin']` : minimum stepsize allowed (default 1e-6)
     - `state['niter']`       : number of iterations (default 1)

     RETURN:
     - `x`     : the new x vector
     - `f(x)`  : the function, evaluated before the update

     (Martin Riedmiller, Koray Kavukcuoglu 2013)
     """
     if config is None and state is None:
         raise ValueError("rprop requires a dictionary to retain state between iterations")

     # (0) get/update state
     state = state or config
     stepsize = config.get('stepsize', 0.1)
     etaplus = config.get('etaplus', 1.2)
     etaminus = config.get('etaminus', 0.5)
     stepsizemax = config.get('stepsizemax', 50.0)
     stepsizemin = config.get('stepsizemin', 1e-06)
     niter = config.get('niter', 1)

     hfx = []

     for i in range(niter):
         # (1) evaluate f(x) and df/dx
         fx, dfdx = opfunc(x)

         # init temp storage
         if not 'delta' in state:
             state['delta']    = dfdx.new(dfdx.size()).zero_()
             state['stepsize'] = dfdx.new(dfdx.size()).fill_(stepsize)
             state['sign']     = dfdx.new(dfdx.size())
             state['bytesign'] = torch.ByteTensor(dfdx.size())
             state['psign']    = torch.ByteTensor(dfdx.size())
             state['nsign']    = torch.ByteTensor(dfdx.size())
             state['zsign']    = torch.ByteTensor(dfdx.size())
             state['dminmax']  = torch.ByteTensor(dfdx.size())
             if str(type(x)).find('Cuda') > -1:
                 # Push to GPU
                 state['psign']    = state['psign'].cuda()
                 state['nsign']    = state['nsign'].cuda()
                 state['zsign']    = state['zsign'].cuda()
                 state['dminmax']  = state['dminmax'].cuda()


         # sign of derivative from last step to this one
         torch.mul(state['sign'], dfdx, state['delta'])
         torch.sign(state['sign'], state['sign'])

         # get indices of >0, <0 and ==0 entries
         torch.gt(state['psign'], state['sign'], 0)
         torch.lt(state['nsign'], state['sign'], 0)
         torch.eq(state['zsign'], state['sign'], 0)

         # get step size updates
         state['sign'][state['psign']] = etaplus
         state['sign'][state['nsign']] = etaminus
         state['sign'][state['zsign']] = 1

         # update stepsizes with step size updates
         state['stepsize'].mul_(state['sign'])

         # threshold step sizes
         # >50 => 50
         torch.gt(state['dminmax'], state['stepsize'], stepsizemax)
         state['stepsize'][state['dminmax']] = stepsizemax
         # <1e-6 ==> 1e-6
         torch.lt(state['dminmax'], state['stepsize'], stepsizemin)
         state['stepsize'][state['dminmax']] = stepsizemin

         # for dir<0, dfdx=0
         # for dir>=0 dfdx=dfdx
         dfdx[state['nsign']] = 0
         torch.sign(state['sign'], dfdx)

         # update weights
         x.addcmul_(-1, state['sign'], state['stepsize'])

         # update state['dfdx'] with current dfdx
         state['delta'].copy_(dfdx)

         hfx.append(fx)

     # return x*, table of f(x) values from each step
     return x, hfx
	import torch

	def rprop(opfunc, x, config, state=None):
	""" A plain implementation of RPROP

	ARGS:
	- `opfunc` : a function that takes a single input (X), the point of
	evaluation, and returns f(X) and df/dX
	- `x` : the initial point
	- `state` : a table describing the state of the optimizer; after each
	call the state is modified
	- `state['stepsize']` : initial step size, common to all components
	- `state['etaplus']` : multiplicative increase factor, > 1 (default 1.2)
	- `state['etaminus']` : multiplicative decrease factor, < 1 (default 0.5)
	- `state['stepsizemax']` : maximum stepsize allowed (default 50)
	- `state['stepsizemin']` : minimum stepsize allowed (default 1e-6)
	- `state['niter']` : number of iterations (default 1)

	RETURN:
	- `x` : the new x vector
	- `f(x)` : the function, evaluated before the update

	(Martin Riedmiller, Koray Kavukcuoglu 2013)
	"""
	if config is None and state is None:
	raise ValueError("rprop requires a dictionary to retain state between iterations")

	# (0) get/update state
	state = state or config
	stepsize = config.get('stepsize', 0.1)
	etaplus = config.get('etaplus', 1.2)
	etaminus = config.get('etaminus', 0.5)
	stepsizemax = config.get('stepsizemax', 50.0)
	stepsizemin = config.get('stepsizemin', 1e-06)
	niter = config.get('niter', 1)

	hfx = []

	for i in range(niter):
	# (1) evaluate f(x) and df/dx
	fx, dfdx = opfunc(x)

	# init temp storage
	if not 'delta' in state:
	state['delta'] = dfdx.new(dfdx.size()).zero_()
	state['stepsize'] = dfdx.new(dfdx.size()).fill_(stepsize)
	state['sign'] = dfdx.new(dfdx.size())
	state['bytesign'] = torch.ByteTensor(dfdx.size())
	state['psign'] = torch.ByteTensor(dfdx.size())
	state['nsign'] = torch.ByteTensor(dfdx.size())
	state['zsign'] = torch.ByteTensor(dfdx.size())
	state['dminmax'] = torch.ByteTensor(dfdx.size())
	if str(type(x)).find('Cuda') > -1:
	# Push to GPU
	state['psign'] = state['psign'].cuda()
	state['nsign'] = state['nsign'].cuda()
	state['zsign'] = state['zsign'].cuda()
	state['dminmax'] = state['dminmax'].cuda()



	# sign of derivative from last step to this one
	torch.mul(state['sign'], dfdx, state['delta'])
	torch.sign(state['sign'], state['sign'])

	# get indices of >0, <0 and ==0 entries
	torch.gt(state['psign'], state['sign'], 0)
	torch.lt(state['nsign'], state['sign'], 0)
	torch.eq(state['zsign'], state['sign'], 0)

	# get step size updates
	state['sign'][state['psign']] = etaplus
	state['sign'][state['nsign']] = etaminus
	state['sign'][state['zsign']] = 1

	# update stepsizes with step size updates
	state['stepsize'].mul_(state['sign'])

	# threshold step sizes
	# >50 => 50
	torch.gt(state['dminmax'], state['stepsize'], stepsizemax)
	state['stepsize'][state['dminmax']] = stepsizemax
	# <1e-6 ==> 1e-6
	torch.lt(state['dminmax'], state['stepsize'], stepsizemin)
	state['stepsize'][state['dminmax']] = stepsizemin

	# for dir<0, dfdx=0
	# for dir>=0 dfdx=dfdx
	dfdx[state['nsign']] = 0
	torch.sign(state['sign'], dfdx)

	# update weights
	x.addcmul_(-1, state['sign'], state['stepsize'])

	# update state['dfdx'] with current dfdx
	state['delta'].copy_(dfdx)

	hfx.append(fx)

	# return x*, table of f(x) values from each step
	return x, hfx