torch/legacy/optim/cg.py - platform/external/pytorch - Git at Google

 import math

 INFINITY = float('inf')


 def sqrt_nothrow(x):
     return math.sqrt(x) if x >= 0 else float('nan')


 def cg(opfunc, x, config, state=None):
     """

     This cg implementation is a rewrite of minimize.m written by Carl
     E. Rasmussen. It is supposed to produce exactly same results (give
     or take numerical accuracy due to some changed order of
     operations). You can compare the result on rosenbrock with minimize.m.
     http://www.gatsby.ucl.ac.uk/~edward/code/minimize/example.html

         [x fx c] = minimize([0 0]', 'rosenbrock', -25)

     Note that we limit the number of function evaluations only, it seems much
     more important in practical use.

     ARGS:

     - `opfunc` : a function that takes a single input, the point of evaluation.
     - `x`      : the initial point
     - `state` : a table of parameters and temporary allocations.
     - `state['maxEval']`     : max number of function evaluations
     - `state['maxIter']`     : max number of iterations
     - `state['df0']` : if you pass torch.Tensor they will be used for temp storage
     - `state['df1']` : if you pass torch.Tensor they will be used for temp storage
     - `state['df2']` : if you pass torch.Tensor they will be used for temp storage
     - `state['df3']` : if you pass torch.Tensor they will be used for temp storage
     - `state['s']`   : if you pass torch.Tensor they will be used for temp storage
     - `state['x0']`  : if you pass torch.Tensor they will be used for temp storage

     RETURN:
     - `x*` : the new x vector, at the optimal point
     - `f`  : a table of all function values where
         `f[1]` is the value of the function before any optimization and
         `f[#f]` is the final fully optimized value, at x*

     (Koray Kavukcuoglu, 2012)
     """
     # parameters
     if config is None and state is None:
         raise ValueError("cg requires a dictionary to retain state between iterations")
     state = state if state is not None else config
     rho = config.get('rho', 0.01)
     sig = config.get('sig', 0.5)
     _int = config.get('int', 0.1)
     ext = config.get('ext', 3.0)
     maxIter = config.get('maxIter', 20)
     ratio = config.get('ratio', 100)
     maxEval = config.get('maxEval', maxIter * 1.25)
     red = 1

     i = 0
     ls_failed = 0
     fx = []

     # we need three points for the interpolation/extrapolation stuff
     z1, z2, z3 = 0, 0, 0
     d1, d2, d3 = 0, 0, 0
     f1, f2, f3 = 0, 0, 0

     df1 = state.get('df1', x.new())
     df2 = state.get('df2', x.new())
     df3 = state.get('df3', x.new())

     df1.resize_as_(x)
     df2.resize_as_(x)
     df3.resize_as_(x)

     # search direction
     s = state.get('s', x.new())
     s.resize_as_(x)

     # we need a temp storage for X
     x0 = state.get('x0', x.new())
     f0 = 0
     df0 = state.get('df0', x.new())
     x0.resize_as_(x)
     df0.resize_as_(x)

     # evaluate at initial point
     f1, tdf = opfunc(x)
     fx.append(f1)
     df1.copy_(tdf)
     i = i + 1

     # initial search direction
     s.copy_(df1).mul_(-1)

     d1 = -s.dot(s)         # slope
     z1 = red / (1 - d1)         # initial step

     while i < abs(maxEval):
         x0.copy_(x)
         f0 = f1
         df0.copy_(df1)

         x.add_(z1, s)
         f2, tdf = opfunc(x)
         df2.copy_(tdf)
         i = i + 1
         d2 = df2.dot(s)
         f3, d3, z3 = f1, d1, -z1   # init point 3 equal to point 1
         m = min(maxIter, maxEval - i)
         success = 0
         limit = -1

         while True:
             while (f2 > f1 + z1 * rho * d1 or d2 > -sig * d1) and m > 0:
                 limit = z1
                 if f2 > f1:
                     z2 = z3 - (0.5 * d3 * z3 * z3) / (d3 * z3 + f2 - f3)
                 else:
                     A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3)
                     B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2)
                     z2 = (sqrt_nothrow(B * B - A * d2 * z3 * z3) - B) / A

                 if z2 != z2 or z2 == INFINITY or z2 == -INFINITY:
                     z2 = z3 / 2

                 z2 = max(min(z2, _int * z3), (1 - _int) * z3)
                 z1 = z1 + z2
                 x.add_(z2, s)
                 f2, tdf = opfunc(x)
                 df2.copy_(tdf)
                 i = i + 1
                 m = m - 1
                 d2 = df2.dot(s)
                 z3 = z3 - z2

             if f2 > f1 + z1 * rho * d1 or d2 > -sig * d1:
                 break
             elif d2 > sig * d1:
                 success = 1
                 break
             elif m == 0:
                 break

             A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3)
             B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2)
             _denom = (B + sqrt_nothrow(B * B - A * d2 * z3 * z3))
             z2 = -d2 * z3 * z3 / _denom if _denom != 0 else float('nan')

             if z2 != z2 or z2 == INFINITY or z2 == -INFINITY or z2 < 0:
                 if limit < -0.5:
                     z2 = z1 * (ext - 1)
                 else:
                     z2 = (limit - z1) / 2
             elif (limit > -0.5) and (z2 + z1) > limit:
                 z2 = (limit - z1) / 2
             elif limit < -0.5 and (z2 + z1) > z1 * ext:
                 z2 = z1 * (ext - 1)
             elif z2 < -z3 * _int:
                 z2 = -z3 * _int
             elif limit > -0.5 and z2 < (limit - z1) * (1 - _int):
                 z2 = (limit - z1) * (1 - _int)

             f3 = f2
             d3 = d2
             z3 = -z2
             z1 = z1 + z2
             x.add_(z2, s)

             f2, tdf = opfunc(x)
             df2.copy_(tdf)
             i = i + 1
             m = m - 1
             d2 = df2.dot(s)

         if success == 1:
             f1 = f2
             fx.append(f1)
             ss = (df2.dot(df2) - df2.dot(df1)) / df1.dot(df1)
             s.mul_(ss)
             s.add_(-1, df2)
             tmp = df1.clone()
             df1.copy_(df2)
             df2.copy_(tmp)
             d2 = df1.dot(s)
             if d2 > 0:
                 s.copy_(df1)
                 s.mul_(-1)
                 d2 = -s.dot(s)

             z1 = z1 * min(ratio, d1 / (d2 - 1e-320))
             d1 = d2
             ls_failed = 0
         else:
             x.copy_(x0)
             f1 = f0
             df1.copy_(df0)
             if ls_failed or i > maxEval:
                 break

             tmp = df1.clone()
             df1.copy_(df2)
             df2.copy_(tmp)
             s.copy_(df1)
             s.mul_(-1)
             d1 = -s.dot(s)
             z1 = 1 / (1 - d1)
             ls_failed = 1

     state['df0'] = df0
     state['df1'] = df1
     state['df2'] = df2
     state['df3'] = df3
     state['x0'] = x0
     state['s'] = s
     return x, fx, i
	import math

	INFINITY = float('inf')


	def sqrt_nothrow(x):
	return math.sqrt(x) if x >= 0 else float('nan')


	def cg(opfunc, x, config, state=None):
	"""

	This cg implementation is a rewrite of minimize.m written by Carl
	E. Rasmussen. It is supposed to produce exactly same results (give
	or take numerical accuracy due to some changed order of
	operations). You can compare the result on rosenbrock with minimize.m.
	http://www.gatsby.ucl.ac.uk/~edward/code/minimize/example.html

	[x fx c] = minimize([0 0]', 'rosenbrock', -25)

	Note that we limit the number of function evaluations only, it seems much
	more important in practical use.

	ARGS:

	- `opfunc` : a function that takes a single input, the point of evaluation.
	- `x` : the initial point
	- `state` : a table of parameters and temporary allocations.
	- `state['maxEval']` : max number of function evaluations
	- `state['maxIter']` : max number of iterations
	- `state['df0']` : if you pass torch.Tensor they will be used for temp storage
	- `state['df1']` : if you pass torch.Tensor they will be used for temp storage
	- `state['df2']` : if you pass torch.Tensor they will be used for temp storage
	- `state['df3']` : if you pass torch.Tensor they will be used for temp storage
	- `state['s']` : if you pass torch.Tensor they will be used for temp storage
	- `state['x0']` : if you pass torch.Tensor they will be used for temp storage

	RETURN:
	- `x*` : the new x vector, at the optimal point
	- `f` : a table of all function values where
	`f[1]` is the value of the function before any optimization and
	`f[#f]` is the final fully optimized value, at x*

	(Koray Kavukcuoglu, 2012)
	"""
	# parameters
	if config is None and state is None:
	raise ValueError("cg requires a dictionary to retain state between iterations")
	state = state if state is not None else config
	rho = config.get('rho', 0.01)
	sig = config.get('sig', 0.5)
	_int = config.get('int', 0.1)
	ext = config.get('ext', 3.0)
	maxIter = config.get('maxIter', 20)
	ratio = config.get('ratio', 100)
	maxEval = config.get('maxEval', maxIter * 1.25)
	red = 1

	i = 0
	ls_failed = 0
	fx = []

	# we need three points for the interpolation/extrapolation stuff
	z1, z2, z3 = 0, 0, 0
	d1, d2, d3 = 0, 0, 0
	f1, f2, f3 = 0, 0, 0

	df1 = state.get('df1', x.new())
	df2 = state.get('df2', x.new())
	df3 = state.get('df3', x.new())

	df1.resize_as_(x)
	df2.resize_as_(x)
	df3.resize_as_(x)

	# search direction
	s = state.get('s', x.new())
	s.resize_as_(x)

	# we need a temp storage for X
	x0 = state.get('x0', x.new())
	f0 = 0
	df0 = state.get('df0', x.new())
	x0.resize_as_(x)
	df0.resize_as_(x)

	# evaluate at initial point
	f1, tdf = opfunc(x)
	fx.append(f1)
	df1.copy_(tdf)
	i = i + 1

	# initial search direction
	s.copy_(df1).mul_(-1)

	d1 = -s.dot(s) # slope
	z1 = red / (1 - d1) # initial step

	while i < abs(maxEval):
	x0.copy_(x)
	f0 = f1
	df0.copy_(df1)

	x.add_(z1, s)
	f2, tdf = opfunc(x)
	df2.copy_(tdf)
	i = i + 1
	d2 = df2.dot(s)
	f3, d3, z3 = f1, d1, -z1 # init point 3 equal to point 1
	m = min(maxIter, maxEval - i)
	success = 0
	limit = -1

	while True:
	while (f2 > f1 + z1 * rho * d1 or d2 > -sig * d1) and m > 0:
	limit = z1
	if f2 > f1:
	z2 = z3 - (0.5 * d3 * z3 * z3) / (d3 * z3 + f2 - f3)
	else:
	A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3)
	B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2)
	z2 = (sqrt_nothrow(B * B - A * d2 * z3 * z3) - B) / A

	if z2 != z2 or z2 == INFINITY or z2 == -INFINITY:
	z2 = z3 / 2

	z2 = max(min(z2, _int * z3), (1 - _int) * z3)
	z1 = z1 + z2
	x.add_(z2, s)
	f2, tdf = opfunc(x)
	df2.copy_(tdf)
	i = i + 1
	m = m - 1
	d2 = df2.dot(s)
	z3 = z3 - z2

	if f2 > f1 + z1 * rho * d1 or d2 > -sig * d1:
	break
	elif d2 > sig * d1:
	success = 1
	break
	elif m == 0:
	break

	A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3)
	B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2)
	_denom = (B + sqrt_nothrow(B * B - A * d2 * z3 * z3))
	z2 = -d2 * z3 * z3 / _denom if _denom != 0 else float('nan')

	if z2 != z2 or z2 == INFINITY or z2 == -INFINITY or z2 < 0:
	if limit < -0.5:
	z2 = z1 * (ext - 1)
	else:
	z2 = (limit - z1) / 2
	elif (limit > -0.5) and (z2 + z1) > limit:
	z2 = (limit - z1) / 2
	elif limit < -0.5 and (z2 + z1) > z1 * ext:
	z2 = z1 * (ext - 1)
	elif z2 < -z3 * _int:
	z2 = -z3 * _int
	elif limit > -0.5 and z2 < (limit - z1) * (1 - _int):
	z2 = (limit - z1) * (1 - _int)

	f3 = f2
	d3 = d2
	z3 = -z2
	z1 = z1 + z2
	x.add_(z2, s)

	f2, tdf = opfunc(x)
	df2.copy_(tdf)
	i = i + 1
	m = m - 1
	d2 = df2.dot(s)

	if success == 1:
	f1 = f2
	fx.append(f1)
	ss = (df2.dot(df2) - df2.dot(df1)) / df1.dot(df1)
	s.mul_(ss)
	s.add_(-1, df2)
	tmp = df1.clone()
	df1.copy_(df2)
	df2.copy_(tmp)
	d2 = df1.dot(s)
	if d2 > 0:
	s.copy_(df1)
	s.mul_(-1)
	d2 = -s.dot(s)

	z1 = z1 * min(ratio, d1 / (d2 - 1e-320))
	d1 = d2
	ls_failed = 0
	else:
	x.copy_(x0)
	f1 = f0
	df1.copy_(df0)
	if ls_failed or i > maxEval:
	break

	tmp = df1.clone()
	df1.copy_(df2)
	df2.copy_(tmp)
	s.copy_(df1)
	s.mul_(-1)
	d1 = -s.dot(s)
	z1 = 1 / (1 - d1)
	ls_failed = 1

	state['df0'] = df0
	state['df1'] = df1
	state['df2'] = df2
	state['df3'] = df3
	state['x0'] = x0
	state['s'] = s
	return x, fx, i