Source code for optim.solvers

""" Generic optimization solvers """

# Author: Remi Flamary <remi.flamary@unice.fr>
#
# License: MIT License


import numpy as np

from .utils import armijo, norm, line_search_armijo


# format string for printing in verbose mode
prnt_str_name = "|{it:>5}|{loss:>13}|{dloss:>13}|{step:>13}|\n" \
                "|-----|-------------|-------------|-------------|"
prnt_str_loop = "|{it:5d}|{loss: 10e}|{dloss: 10e}|{step: 10e}|"


[docs]def fmin_prox(f, df, g, prox_g, x0, lambd=1., backtrack=True, nbitermax=1000, stopvarx=1e-9, stopvarj=1e-9, t0=10., verbose=False, m_back=1, sigma=1e-9, eta=2, nbitermax_back=100, bbrule=True, log=False, **kwargs): r""" Minimize a sum of smooth and nonsmooth function using proximal splitting The algorithm is the classical Forward Backward Splitting [1]_ with BB rule for step estimation [2]_. Solve the optimization problem: .. math:: min_x \quad f(x)+\lambda g(x) where: - f is differentiable (df) and Lipshictz gradient - g is non-differentiable but has a proximal operator (prox_g) prox_g is a function providing the solution to problem .. math:: min_x \quad \frac{1}{2}\|x_0-x\|^2+\lambda*g(x) Several proximal operators are available at optim.prox Parameters ---------- f : function Smooth function f: R^d -> R df : function Gradient of f, df:R^d -> R^d g : function Nonsmooth function g: R^d -> R prox_g : function Proximal of g, df:R^d -> R^d x_0 : (d,) numpy.array Initial point lambda : float Regularization parameter >0 backtrack : boolean, optional Perform backtracking if true (default: True). bbrule : boolean, optional update step with bb rule. nbitermax : int, optional Max number of iterations in algorithm stopvarx : float, optional Stopping criterion for relative variation of the norm of x stopvarj : float, optional Stopping criterion for relative variation of the cost t0 : float, optional initial descent step verbose : boolean, optional prinrt optimization information m_back : int, optional Window size for backtrack (if <1 then non decreasing) sigma : float, optional descent parameter for backtrack eta : float, optional value multiplying t during backtrack nbitermax_back : int, optional Max number of backtrack iterations Returns ------- x: (d,) ndarray Optimal solution x val: float optimal value of the objective (None if optimization error) log: dict Optional log output References ---------- .. [1] Combettes, P. L., & Wajs, V. R. (2005). Signal recovery by proximal forward-backward splitting. Multiscale Modeling & Simulation, 4(4), 1168-1200. .. [2] Barzilai, J., & Borwein, J. M. (1988). Two-point step size gradient methods. IMA journal of numerical analysis, 8(1), 141-148. See Also -------- optim.prox : Module containing proximal operators optim.fmin_proj : Projected gradient (special case of proximal gradient) """ x = x0.copy() grad = df(x, **kwargs) # grad[:]=0 loss = list() loss.append(f(x, **kwargs) + g(x, lambd, **kwargs)) if log: log = dict() log['loss'] = loss t = t0 if verbose: print((prnt_str_name.format(it="It. ", loss='Loss ', dloss="Delta Loss ", step="Step "))) print((prnt_str_loop.format(it=0, loss=loss[-1], dloss=0, step=1 / t))) loop = True it = 1 while loop: x_1 = x.copy() grad_1 = grad.copy() # gradient grad = df(x, **kwargs) # prox operator x = prox_g(x_1 - grad / t, lambd / t, **kwargs) # cost computation loss.append(f(x, **kwargs) + g(x, lambd, **kwargs)) # line search backtrack it2 = 0 thr_back = np.max([loss[-2 - k] - sigma / 2 * t * norm(x - x_1) ** 2 for k in range(min(m_back, len(loss) - 1))]) while loss[-1] > thr_back and it2 < nbitermax_back and backtrack: t = t * eta x = prox_g(x_1 - grad / t, lambd / t, **kwargs) loss[-1] = f(x, **kwargs) + g(x, lambd, **kwargs) thr_back = np.max([loss[-2 - k] - sigma / 2 * t * norm(x - x_1) ** 2 for k in range(min(m_back, len(loss) - 1)) ]) # print '\t',loss[-1],thr_back it2 += 1 if it2 == nbitermax_back: print("Warning: backtrack failed") # print loss[-1],t # print information if verbose: if not (it) % 20: print((prnt_str_name.format(it="It. ", loss='Loss ', dloss="Delta Loss ", step="Step "))) print(prnt_str_loop.format(it=it, loss=loss[-1], dloss=(loss[-1] - loss[-2] ) / abs(loss[-2]), step=1 / t)) # BB rule xbb = x - x_1 ybb = grad - grad_1 if it >= 1 and norm(xbb) > 1e-12 and norm(ybb) > 1e-12 and bbrule: t = abs(np.sum(xbb * ybb) / np.sum(xbb * xbb)) # else: # t=t0 # test convergence if norm(x - x_1) / norm(x) < stopvarx: loop = False if verbose: print("delta x convergence") # if abs(loss[-1] - loss[-2]) / abs(loss[-2]) < stopvarj: loop = False if verbose: print("delta loss convergence") if it >= nbitermax: loop = False if verbose: print("Max number of iteration reached") # increment iteration it += 1 if log: log['loss'] = loss return x, loss[-1], log else: return x, loss[-1]
[docs]def fmin_proj(f, df, proj, x0, nbitermax=1000, stopvarx=1e-9, stopvarj=1e-9, t0=1., verbose=False, bbrule=True, log=False, **kwargs): r""" Minimize a constrained optimization problem with projected gradient The algorithm is the classical Spectral Projected Gradient [3]_ with BB rule for step estimation [4]_. Solve the optimization problem: .. math:: min_x \quad f(x) \text{s.t. }\quad s\in P where: - `f` is differentiable (df) and Lipshictz gradient - proj is a projection onto P `proj` is a projection function providing the solution to problem .. math:: min_x \quad \frac{1}{2}\|x_0-x\|^2 \text{s.t. }\quad s\in P Several projection functions are available at optim.proj Parameters ---------- f : function Smooth function f: R^d -> R df : function Gradient of f, df:R^d -> R^d proj : function Projection unto P, proj:R^d -> R^d x_0 : (d,) numpy.array Initial point bbrule : boolean, optional update step with bb rule. nbitermax : int, optional Max number of iterations in algorithm stopvarx : float, optional Stopping criterion for relative variation of the norm of x stopvarj : float, optional Stopping criterion for relative variation of the cost t0 : float, optional initial descent step verbose : boolean, optional prinrt optimization information Returns ------- x: (d,) ndarray Optimal solution x val: float optimal value of the objective (None if optimization error) log: dict Optional log output References ---------- .. [3] Birgin, E. G., Martinez, J. M., & Raydan, M. (2000). Nonmonotone spectral projected gradient methods on convex sets. SIAM Journal on Optimization, 10(4), 1196-1211. .. [4] Barzilai, J., & Borwein, J. M. (1988). Two-point step size gradient methods. IMA journal of numerical analysis, 8(1), 141-148. See Also -------- optim.proj : Module containing projection operators optim.fmin_prox : Proximal splitting (generalization of projected gradient) """ x = x0.copy() grad = df(x, **kwargs) # grad[:]=0 loss = list() deltax = list() loss.append(f(x, **kwargs)) grad = df(x, **kwargs) deltax.append( np.linalg.norm(x - proj(x - grad, **kwargs), np.inf) / np.linalg.norm(x, np.inf)) if log: log = dict() log['loss'] = loss t = t0 if verbose: print( prnt_str_name.format( it="It. ", loss='Loss ', dloss="Delta Loss ", step="Step ")) print(prnt_str_loop.format(it=0, loss=loss[-1], dloss=0, step=1 / t)) def fproj(x): return f(proj(x, **kwargs), **kwargs) loop = True it = 1 while loop: x_1 = x.copy() grad_1 = grad.copy() # gradient grad = df(x, **kwargs) # prox operator d = -grad / t x, tau, fnew = armijo(x_1, d, fproj, loss[-1], grad, tau=1, gamma=1e-6) x = proj(x) # cost computation loss.append(fnew) # line search backtrack # print loss[-1],t # print information if verbose: if not (it) % 20: print( prnt_str_name.format( it="It. ", loss='Loss ', dloss="Delta Loss ", step="Step ")) print(prnt_str_loop.format(it=it, loss=loss[-1], dloss=(loss[-1] - loss[-2] ) / abs(loss[-2]), step=tau / t)) # BB rule xbb = x - x_1 ybb = grad - grad_1 if it >= 1 and norm(xbb) > 1e-12 and norm(ybb) > 1e-12 and bbrule: t = abs(np.sum(xbb * ybb) / np.sum(xbb * xbb)) else: t = t0 # test convergence deltax.append( np.linalg.norm( x - proj( x - grad, **kwargs), np.inf) / np.linalg.norm( x, np.inf)) if deltax[-1] < stopvarx: loop = False if verbose: print("delta x convergence") # if abs(loss[-1] - loss[-2]) / abs(loss[-2]) < stopvarj: loop = False if verbose: print("delta loss convergence") if it >= nbitermax: loop = False if verbose: print("Max number of iteration reached") # increment iteration it += 1 if log: log['loss'] = loss log['deltax'] = deltax return x, loss[-1], log else: return x, loss[-1]
[docs]def fmin_cond(f, df, solve_c, x0, nbitermax=200, stopvarj=1e-9, verbose=False, log=False): r""" Solve constrained optimization with conditional gradient The function solves the following optimization problem: .. math:: \min_x \quad f(x) \text{s.t.} \quad x\in P where : - f is differentiable (df) and Lipshictz gradient - solve_c is the solver for the linearized problem of the form .. math:: \min_x \quad x^T v \text{s.t.} \quad x\in P Parameters ---------- f : function Smooth function f: R^d -> R df : function Gradient of f, df:R^d -> R^d solve_c : function Solver for linearized problem, solve_c:R^d -> R^d x_0 : (d,) numpy.array Initial point nbitermax : int, optional Max number of iterations stopThr : float, optional Stop threshol on error (>0) verbose : bool, optional Print information along iterations log : bool, optional record log if True Returns ------- x : ndarray solution val : float Optimal value at solution log : dict log dictionary return only if log==True in parameters References ---------- """ loop = 1 if log: log = {'loss': []} x = x0 f_val = f(x0) if log: log['loss'].append(f_val) it = 0 if verbose: print(('{:5s}|{:12s}|{:8s}'.format( 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)) print(('{:5d}|{:8e}|{:8e}'.format(it, f_val, 0))) while loop: it += 1 old_fval = f_val # problem linearization g = df(x) # solve linearization xc = solve_c(x, g) deltax = xc - x # line search alpha, fc, f_val = line_search_armijo(f, x, deltax, g, f_val) x = x + alpha * deltax # test convergence if it >= nbitermax: loop = 0 delta_fval = (f_val - old_fval) / abs(f_val) if abs(delta_fval) < stopvarj: loop = 0 if log: log['loss'].append(f_val) if verbose: if it % 20 == 0: print(('{:5s}|{:12s}|{:8s}'.format( 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)) print(('{:5d}|{:8e}|{:8e}'.format(it, f_val, delta_fval))) if log: return x, f_val, log else: return x, f_val