""" Generic optimization solvers """
# Author: Remi Flamary <remi.flamary@unice.fr>
#
# License: MIT License
import numpy as np
from .utils import armijo, norm, line_search_armijo
# format string for printing in verbose mode
prnt_str_name = "|{it:>5}|{loss:>13}|{dloss:>13}|{step:>13}|\n" \
"|-----|-------------|-------------|-------------|"
prnt_str_loop = "|{it:5d}|{loss: 10e}|{dloss: 10e}|{step: 10e}|"
[docs]def fmin_prox(f, df, g, prox_g, x0, lambd=1., backtrack=True, nbitermax=1000,
stopvarx=1e-9, stopvarj=1e-9, t0=10., verbose=False, m_back=1,
sigma=1e-9, eta=2, nbitermax_back=100, bbrule=True, log=False,
**kwargs):
r""" Minimize a sum of smooth and nonsmooth function using proximal splitting
The algorithm is the classical Forward Backward Splitting [1]_
with BB rule for step estimation [2]_.
Solve the optimization problem:
.. math::
min_x \quad f(x)+\lambda g(x)
where:
- f is differentiable (df) and Lipshictz gradient
- g is non-differentiable but has a proximal operator (prox_g)
prox_g is a function providing the solution to problem
.. math::
min_x \quad \frac{1}{2}\|x_0-x\|^2+\lambda*g(x)
Several proximal operators are available at optim.prox
Parameters
----------
f : function
Smooth function f: R^d -> R
df : function
Gradient of f, df:R^d -> R^d
g : function
Nonsmooth function g: R^d -> R
prox_g : function
Proximal of g, df:R^d -> R^d
x_0 : (d,) numpy.array
Initial point
lambda : float
Regularization parameter >0
backtrack : boolean, optional
Perform backtracking if true (default: True).
bbrule : boolean, optional
update step with bb rule.
nbitermax : int, optional
Max number of iterations in algorithm
stopvarx : float, optional
Stopping criterion for relative variation of the
norm of x
stopvarj : float, optional
Stopping criterion for relative variation of the cost
t0 : float, optional
initial descent step
verbose : boolean, optional
prinrt optimization information
m_back : int, optional
Window size for backtrack (if <1 then non decreasing)
sigma : float, optional
descent parameter for backtrack
eta : float, optional
value multiplying t during backtrack
nbitermax_back : int, optional
Max number of backtrack iterations
Returns
-------
x: (d,) ndarray
Optimal solution x
val: float
optimal value of the objective (None if optimization error)
log: dict
Optional log output
References
----------
.. [1] Combettes, P. L., & Wajs, V. R. (2005). Signal recovery by proximal
forward-backward splitting. Multiscale Modeling & Simulation, 4(4),
1168-1200.
.. [2] Barzilai, J., & Borwein, J. M. (1988). Two-point step size
gradient methods. IMA journal of numerical analysis, 8(1), 141-148.
See Also
--------
optim.prox : Module containing proximal operators
optim.fmin_proj : Projected gradient (special case of proximal gradient)
"""
x = x0.copy()
grad = df(x, **kwargs)
# grad[:]=0
loss = list()
loss.append(f(x, **kwargs) + g(x, lambd, **kwargs))
if log:
log = dict()
log['loss'] = loss
t = t0
if verbose:
print((prnt_str_name.format(it="It. ", loss='Loss ',
dloss="Delta Loss ", step="Step ")))
print((prnt_str_loop.format(it=0, loss=loss[-1], dloss=0, step=1 / t)))
loop = True
it = 1
while loop:
x_1 = x.copy()
grad_1 = grad.copy()
# gradient
grad = df(x, **kwargs)
# prox operator
x = prox_g(x_1 - grad / t, lambd / t, **kwargs)
# cost computation
loss.append(f(x, **kwargs) + g(x, lambd, **kwargs))
# line search backtrack
it2 = 0
thr_back = np.max([loss[-2 - k] - sigma / 2 * t * norm(x - x_1)
** 2 for k in range(min(m_back, len(loss) - 1))])
while loss[-1] > thr_back and it2 < nbitermax_back and backtrack:
t = t * eta
x = prox_g(x_1 - grad / t, lambd / t, **kwargs)
loss[-1] = f(x, **kwargs) + g(x, lambd, **kwargs)
thr_back = np.max([loss[-2 - k] - sigma / 2 * t * norm(x - x_1)
** 2 for k in range(min(m_back, len(loss) - 1))
])
# print '\t',loss[-1],thr_back
it2 += 1
if it2 == nbitermax_back:
print("Warning: backtrack failed")
# print loss[-1],t
# print information
if verbose:
if not (it) % 20:
print((prnt_str_name.format(it="It. ", loss='Loss ',
dloss="Delta Loss ",
step="Step ")))
print(prnt_str_loop.format(it=it,
loss=loss[-1],
dloss=(loss[-1] - loss[-2]
) / abs(loss[-2]),
step=1 / t))
# BB rule
xbb = x - x_1
ybb = grad - grad_1
if it >= 1 and norm(xbb) > 1e-12 and norm(ybb) > 1e-12 and bbrule:
t = abs(np.sum(xbb * ybb) / np.sum(xbb * xbb))
# else:
# t=t0
# test convergence
if norm(x - x_1) / norm(x) < stopvarx:
loop = False
if verbose:
print("delta x convergence")
#
if abs(loss[-1] - loss[-2]) / abs(loss[-2]) < stopvarj:
loop = False
if verbose:
print("delta loss convergence")
if it >= nbitermax:
loop = False
if verbose:
print("Max number of iteration reached")
# increment iteration
it += 1
if log:
log['loss'] = loss
return x, loss[-1], log
else:
return x, loss[-1]
[docs]def fmin_proj(f, df, proj, x0, nbitermax=1000, stopvarx=1e-9, stopvarj=1e-9,
t0=1., verbose=False, bbrule=True, log=False, **kwargs):
r""" Minimize a constrained optimization problem with projected gradient
The algorithm is the classical Spectral Projected Gradient [3]_
with BB rule for step estimation [4]_.
Solve the optimization problem:
.. math::
min_x \quad f(x)
\text{s.t. }\quad s\in P
where:
- `f` is differentiable (df) and Lipshictz gradient
- proj is a projection onto P
`proj` is a projection function providing the solution to problem
.. math::
min_x \quad \frac{1}{2}\|x_0-x\|^2
\text{s.t. }\quad s\in P
Several projection functions are available at optim.proj
Parameters
----------
f : function
Smooth function f: R^d -> R
df : function
Gradient of f, df:R^d -> R^d
proj : function
Projection unto P, proj:R^d -> R^d
x_0 : (d,) numpy.array
Initial point
bbrule : boolean, optional
update step with bb rule.
nbitermax : int, optional
Max number of iterations in algorithm
stopvarx : float, optional
Stopping criterion for relative variation of the
norm of x
stopvarj : float, optional
Stopping criterion for relative variation of the cost
t0 : float, optional
initial descent step
verbose : boolean, optional
prinrt optimization information
Returns
-------
x: (d,) ndarray
Optimal solution x
val: float
optimal value of the objective (None if optimization error)
log: dict
Optional log output
References
----------
.. [3] Birgin, E. G., Martinez, J. M., & Raydan, M. (2000). Nonmonotone
spectral projected gradient methods on convex sets. SIAM Journal
on Optimization, 10(4), 1196-1211.
.. [4] Barzilai, J., & Borwein, J. M. (1988). Two-point step size
gradient methods. IMA journal of numerical analysis, 8(1), 141-148.
See Also
--------
optim.proj : Module containing projection operators
optim.fmin_prox : Proximal splitting (generalization of projected gradient)
"""
x = x0.copy()
grad = df(x, **kwargs)
# grad[:]=0
loss = list()
deltax = list()
loss.append(f(x, **kwargs))
grad = df(x, **kwargs)
deltax.append(
np.linalg.norm(x - proj(x - grad, **kwargs), np.inf) /
np.linalg.norm(x, np.inf))
if log:
log = dict()
log['loss'] = loss
t = t0
if verbose:
print(
prnt_str_name.format(
it="It. ",
loss='Loss ',
dloss="Delta Loss ",
step="Step "))
print(prnt_str_loop.format(it=0, loss=loss[-1], dloss=0, step=1 / t))
def fproj(x):
return f(proj(x, **kwargs), **kwargs)
loop = True
it = 1
while loop:
x_1 = x.copy()
grad_1 = grad.copy()
# gradient
grad = df(x, **kwargs)
# prox operator
d = -grad / t
x, tau, fnew = armijo(x_1, d, fproj, loss[-1], grad, tau=1, gamma=1e-6)
x = proj(x)
# cost computation
loss.append(fnew)
# line search backtrack
# print loss[-1],t
# print information
if verbose:
if not (it) % 20:
print(
prnt_str_name.format(
it="It. ",
loss='Loss ',
dloss="Delta Loss ",
step="Step "))
print(prnt_str_loop.format(it=it,
loss=loss[-1],
dloss=(loss[-1] - loss[-2]
) / abs(loss[-2]),
step=tau / t))
# BB rule
xbb = x - x_1
ybb = grad - grad_1
if it >= 1 and norm(xbb) > 1e-12 and norm(ybb) > 1e-12 and bbrule:
t = abs(np.sum(xbb * ybb) / np.sum(xbb * xbb))
else:
t = t0
# test convergence
deltax.append(
np.linalg.norm(
x -
proj(
x -
grad,
**kwargs),
np.inf) /
np.linalg.norm(
x,
np.inf))
if deltax[-1] < stopvarx:
loop = False
if verbose:
print("delta x convergence")
#
if abs(loss[-1] - loss[-2]) / abs(loss[-2]) < stopvarj:
loop = False
if verbose:
print("delta loss convergence")
if it >= nbitermax:
loop = False
if verbose:
print("Max number of iteration reached")
# increment iteration
it += 1
if log:
log['loss'] = loss
log['deltax'] = deltax
return x, loss[-1], log
else:
return x, loss[-1]
[docs]def fmin_cond(f, df, solve_c, x0, nbitermax=200,
stopvarj=1e-9, verbose=False, log=False):
r""" Solve constrained optimization with conditional gradient
The function solves the following optimization problem:
.. math::
\min_x \quad f(x)
\text{s.t.} \quad x\in P
where :
- f is differentiable (df) and Lipshictz gradient
- solve_c is the solver for the linearized problem of the form
.. math::
\min_x \quad x^T v
\text{s.t.} \quad x\in P
Parameters
----------
f : function
Smooth function f: R^d -> R
df : function
Gradient of f, df:R^d -> R^d
solve_c : function
Solver for linearized problem, solve_c:R^d -> R^d
x_0 : (d,) numpy.array
Initial point
nbitermax : int, optional
Max number of iterations
stopThr : float, optional
Stop threshol on error (>0)
verbose : bool, optional
Print information along iterations
log : bool, optional
record log if True
Returns
-------
x : ndarray
solution
val : float
Optimal value at solution
log : dict
log dictionary return only if log==True in parameters
References
----------
"""
loop = 1
if log:
log = {'loss': []}
x = x0
f_val = f(x0)
if log:
log['loss'].append(f_val)
it = 0
if verbose:
print(('{:5s}|{:12s}|{:8s}'.format(
'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32))
print(('{:5d}|{:8e}|{:8e}'.format(it, f_val, 0)))
while loop:
it += 1
old_fval = f_val
# problem linearization
g = df(x)
# solve linearization
xc = solve_c(x, g)
deltax = xc - x
# line search
alpha, fc, f_val = line_search_armijo(f, x, deltax, g, f_val)
x = x + alpha * deltax
# test convergence
if it >= nbitermax:
loop = 0
delta_fval = (f_val - old_fval) / abs(f_val)
if abs(delta_fval) < stopvarj:
loop = 0
if log:
log['loss'].append(f_val)
if verbose:
if it % 20 == 0:
print(('{:5s}|{:12s}|{:8s}'.format(
'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32))
print(('{:5d}|{:8e}|{:8e}'.format(it, f_val, delta_fval)))
if log:
return x, f_val, log
else:
return x, f_val