blob: 118de3bd96aac8a88598d310eafdf562b7587807 [file] [log] [blame]
import math
INFINITY = float('inf')
def sqrt_nothrow(x):
return math.sqrt(x) if x >= 0 else float('nan')
def cg(opfunc, x, config, state=None):
"""
This cg implementation is a rewrite of minimize.m written by Carl
E. Rasmussen. It is supposed to produce exactly same results (give
or take numerical accuracy due to some changed order of
operations). You can compare the result on rosenbrock with minimize.m.
http://www.gatsby.ucl.ac.uk/~edward/code/minimize/example.html
[x fx c] = minimize([0 0]', 'rosenbrock', -25)
Note that we limit the number of function evaluations only, it seems much
more important in practical use.
ARGS:
- `opfunc` : a function that takes a single input, the point of evaluation.
- `x` : the initial point
- `state` : a table of parameters and temporary allocations.
- `state['maxEval']` : max number of function evaluations
- `state['maxIter']` : max number of iterations
- `state['df0']` : if you pass torch.Tensor they will be used for temp storage
- `state['df1']` : if you pass torch.Tensor they will be used for temp storage
- `state['df2']` : if you pass torch.Tensor they will be used for temp storage
- `state['df3']` : if you pass torch.Tensor they will be used for temp storage
- `state['s']` : if you pass torch.Tensor they will be used for temp storage
- `state['x0']` : if you pass torch.Tensor they will be used for temp storage
RETURN:
- `x*` : the new x vector, at the optimal point
- `f` : a table of all function values where
`f[1]` is the value of the function before any optimization and
`f[#f]` is the final fully optimized value, at x*
(Koray Kavukcuoglu, 2012)
"""
# parameters
if config is None and state is None:
raise ValueError("cg requires a dictionary to retain state between iterations")
state = state if state is not None else config
rho = config.get('rho', 0.01)
sig = config.get('sig', 0.5)
_int = config.get('int', 0.1)
ext = config.get('ext', 3.0)
maxIter = config.get('maxIter', 20)
ratio = config.get('ratio', 100)
maxEval = config.get('maxEval', maxIter * 1.25)
red = 1
i = 0
ls_failed = 0
fx = []
# we need three points for the interpolation/extrapolation stuff
z1, z2, z3 = 0, 0, 0
d1, d2, d3 = 0, 0, 0
f1, f2, f3 = 0, 0, 0
df1 = state.get('df1', x.new())
df2 = state.get('df2', x.new())
df3 = state.get('df3', x.new())
df1.resize_as_(x)
df2.resize_as_(x)
df3.resize_as_(x)
# search direction
s = state.get('s', x.new())
s.resize_as_(x)
# we need a temp storage for X
x0 = state.get('x0', x.new())
f0 = 0
df0 = state.get('df0', x.new())
x0.resize_as_(x)
df0.resize_as_(x)
# evaluate at initial point
f1, tdf = opfunc(x)
fx.append(f1)
df1.copy_(tdf)
i = i + 1
# initial search direction
s.copy_(df1).mul_(-1)
d1 = -s.dot(s) # slope
z1 = red / (1 - d1) # initial step
while i < abs(maxEval):
x0.copy_(x)
f0 = f1
df0.copy_(df1)
x.add_(z1, s)
f2, tdf = opfunc(x)
df2.copy_(tdf)
i = i + 1
d2 = df2.dot(s)
f3, d3, z3 = f1, d1, -z1 # init point 3 equal to point 1
m = min(maxIter, maxEval - i)
success = 0
limit = -1
while True:
while (f2 > f1 + z1 * rho * d1 or d2 > -sig * d1) and m > 0:
limit = z1
if f2 > f1:
z2 = z3 - (0.5 * d3 * z3 * z3) / (d3 * z3 + f2 - f3)
else:
A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3)
B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2)
z2 = (sqrt_nothrow(B * B - A * d2 * z3 * z3) - B) / A
if z2 != z2 or z2 == INFINITY or z2 == -INFINITY:
z2 = z3 / 2
z2 = max(min(z2, _int * z3), (1 - _int) * z3)
z1 = z1 + z2
x.add_(z2, s)
f2, tdf = opfunc(x)
df2.copy_(tdf)
i = i + 1
m = m - 1
d2 = df2.dot(s)
z3 = z3 - z2
if f2 > f1 + z1 * rho * d1 or d2 > -sig * d1:
break
elif d2 > sig * d1:
success = 1
break
elif m == 0:
break
A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3)
B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2)
_denom = (B + sqrt_nothrow(B * B - A * d2 * z3 * z3))
z2 = -d2 * z3 * z3 / _denom if _denom != 0 else float('nan')
if z2 != z2 or z2 == INFINITY or z2 == -INFINITY or z2 < 0:
if limit < -0.5:
z2 = z1 * (ext - 1)
else:
z2 = (limit - z1) / 2
elif (limit > -0.5) and (z2 + z1) > limit:
z2 = (limit - z1) / 2
elif limit < -0.5 and (z2 + z1) > z1 * ext:
z2 = z1 * (ext - 1)
elif z2 < -z3 * _int:
z2 = -z3 * _int
elif limit > -0.5 and z2 < (limit - z1) * (1 - _int):
z2 = (limit - z1) * (1 - _int)
f3 = f2
d3 = d2
z3 = -z2
z1 = z1 + z2
x.add_(z2, s)
f2, tdf = opfunc(x)
df2.copy_(tdf)
i = i + 1
m = m - 1
d2 = df2.dot(s)
if success == 1:
f1 = f2
fx.append(f1)
ss = (df2.dot(df2) - df2.dot(df1)) / df1.dot(df1)
s.mul_(ss)
s.add_(-1, df2)
tmp = df1.clone()
df1.copy_(df2)
df2.copy_(tmp)
d2 = df1.dot(s)
if d2 > 0:
s.copy_(df1)
s.mul_(-1)
d2 = -s.dot(s)
z1 = z1 * min(ratio, d1 / (d2 - 1e-320))
d1 = d2
ls_failed = 0
else:
x.copy_(x0)
f1 = f0
df1.copy_(df0)
if ls_failed or i > maxEval:
break
tmp = df1.clone()
df1.copy_(df2)
df2.copy_(tmp)
s.copy_(df1)
s.mul_(-1)
d1 = -s.dot(s)
z1 = 1 / (1 - d1)
ls_failed = 1
state['df0'] = df0
state['df1'] = df1
state['df2'] = df2
state['df3'] = df3
state['x0'] = x0
state['s'] = s
return x, fx, i