Source code for pycif.plugins.minimizers.m1qn3.opt

import copy

import numpy as np

from logging import info
from .aux import descentdir


[docs] def m1qn3(self, f, g, chi, **kwargs): """Execute the M1QN3 L-BFGS main loop. Runs the limited-memory quasi-Newton iterations until the gradient convergence criterion or the iteration/simulation limit is reached. At each step, the descent direction is computed from the L-BFGS two-loop recursion and a step length is found by the ``mlis0`` Wolfe line search. Args: self (Plugin): M1QN3 minimizer plugin instance with all parameters already resolved by ``check_options`` (``niter``, ``nsim``, ``epsg``, ``dxmin``, ``df1``, ``nupdates``, ``mode``, ``iz``). f (float): initial cost function value. g (np.ndarray): initial gradient, shape ``(n,)``. chi (np.ndarray): initial iterate, shape ``(n,)``. **kwargs: forwarded to the simulator. Returns: tuple: ``(xopt, fopt, gradopt, niter, nsim, epsg, mode)`` * ``xopt`` (np.ndarray): optimal iterate. * ``fopt`` (float): cost at ``xopt``. * ``gradopt`` (np.ndarray): gradient at ``xopt``. * ``niter`` (int): number of iterations performed. * ``nsim`` (int): number of simulator calls performed. * ``epsg`` (float): achieved relative gradient precision. * ``mode`` (int): exit mode (0 = converged on gradient, 1 = converged on *x*, 4 = iteration limit, 5 = simulation limit). Note: Wolfe parameter ``rm2`` is set to 0.99 (since version 2.0d, July 1996) to accelerate step acceptance in the line search. """ # M1QN3 Options: mode = self.mode m = self.nupdates niter = self.niter nsim = self.nsim jmin = self.jmin jmax = self.jmax epsg = self.epsg dxmin = self.dxmin df1 = self.df1 rm1 = getattr(self, "rm1", 1e-4) rm2 = getattr(self, "rm2", 0.99) rmin = getattr(self, "rmin", 1e-20) nverbose = kwargs.get("verbose", 2) logfile = kwargs.get("logfile", None) # M1QN3 main inputs: x = chi n = len(x) # Initialization d = np.zeros(n) gg = np.zeros(n) diag = np.ones(n) aux = np.zeros(n) alpha = np.zeros(m) ybar = np.zeros((10, n)) sbar = np.zeros((10, n)) sscale = 1 if mode - int(mode / 2.0) * 2.0 == 0: sscale = 0 warm = 0 if mode // 2 == 1: warm = 1 cold = not warm itmax = niter niter = 0 isim = 1 eps1 = 1.0 ps = np.dot(g, g) dk = ps gnorm = ps gnorm = np.sqrt(gnorm) if nverbose >= 1: info(" f = " + str(f)) info(" norm of g = " + str(gnorm)) if gnorm < rmin: mode = 2 if nverbose >= 1: info(" >>> m1qn3a: initial gradient is too small") nsim = isim epsg = eps1 return x, f, g, niter, nsim, epsg, mode # # --- initializing descent direction # if cold: jmin = 0 jmax = -1 jcour = jmax # # --- mise a l'echelle de la premiere direction de descente # if cold: # # --- use Fletcher's scaling and initialize diag to 1. # precos = 2.0 * df1 / (gnorm * gnorm) d = -g * precos if nverbose >= 5: info(" m1qn3a: descent direction -g: precon = " + str(precos)) else: # # --- use the matrix stored in [diag and] the (y,s) pairs # if sscale: ps = np.dot(ybar[jcour, :], ybar[jcour, :]) precos = 1.0 / ps ctonb = None ctcab = None d = descentdir( n, sscale, m, -g, jmin, jmax, precos, diag, alpha, ybar, sbar ) # # --- initialisation pour mlis0 # tmax = 1.0e20 hp0 = np.dot(d, g) if hp0 >= 0.0: mode = 7 if nverbose >= 1: info(" >>> m1qn3 (iteration " + str(niter) + "):") info( " the search direction d is not a descent direction: (g," "d) = " + str(hp0) ) nsim = isim epsg = eps1 return x, f, g, niter, nsim, epsg, mode # # --- compute the angle (-g,d) # if warm and nverbose >= 5: ps = np.sqrt(np.dot(g, g)) ps2 = np.sqrt(np.dot(d, d)) ps = hp0 / ps / ps2 ps = min(-ps, 1.0) r1 = np.degrees(np.arccos(ps)) info( " m1qn3: descent direction d: angle(-g,d) = " + str(r1) + "degrees", ) # # ---- Debut de l'iteration. on cherche x(k+1) de la forme x(k) + t*d, # avec t > 0. On connait d. # # Debut de la boucle: etiquette 100, # Sortie de la boucle: goto 1000. # while 1: niter += 1 # if verbose < 0: # if niter - int(niter/(-verbose))*(-verbose) == 0 : # indic=1 # f, g = simul.simul (x,niter+999,auxil,io) # continue if nverbose >= 3: info( " m1qn3: iter " + str(niter) + " simul " + str(isim) + ", f=" + str(f) + ", h'(0)=" + str(hp0) ) gg = copy.copy(g) ff = f # # --- recherche lineaire et nouveau point x(k+1) # if nverbose >= 5: info(" m1qn3: line search") # # --- calcul de tmin # tmin = 0.0 for i in range(n): tmin = max(tmin, abs(d[i])) tmin = dxmin / tmin t = 1.0 r1 = hp0 x, f, g, isim, moderl = self.mlis0( x, f, r1, t, tmin, tmax, d, g, rm2, rm1, nverbose, logfile, isim, nsim, niter, **kwargs ) # # --- mlis0 renvoie les nouvelles valeurs de x, f et g # if moderl != 0: if moderl < 0: # # --- calcul impossible # t, g: ou les calculs sont impossibles # x, f: ceux du t_gauche (donc f <= ff) # mode = moderl elif moderl == 1: # # --- descente bloquee sur tmax # [sortie rare (!!) d'apres le code de mlis0] # mode = 3 if nverbose >= 1: info( " >>> m1qn3 (iteration " "): line search blocked on tmax: decrease " "the scaling".format(niter) ) elif moderl == 4: # # --- nsim atteint # x, f: ceux du t_gauche (donc f <= ff) # mode = 5 elif moderl == 5: # # --- arret demande par l'utilisateur (indic = 0) # x, f: ceux en sortie du simulateur # mode = 0 elif moderl == 6: # # --- arret sur dxmin ou appel incoherent # x, f: ceux du t_gauche (donc f <= ff) # mode = 6 nsim = isim epsg = eps1 return x, f, g, niter, nsim, epsg, mode # # NOTE: stopping tests are now done after having updated the matrix, so # that update information can be stored in case of a later warm restart # # --- mise a jour de la matrice # if m > 0: # # --- mise a jour des pointeurs # jmax += 1 if jmax >= m: jmax = jmax - m if (cold and niter > m) or (warm and jmin == jmax): jmin += 1 if jmin >= m: jmin -= m jcour = jmax # # --- y, s et (y,s) # sbar[jcour, :] = t * d ybar[jcour, :] = g - gg if nverbose >= 5: ps = np.dot(sbar[jcour, :], sbar[jcour, :]) dk1 = np.sqrt(ps) if niter > 1: info( " m1qn3: convergence rate, s(k)/s(k-1) = " + str(dk1 / dk) ) dk = dk1 ps = np.dot(ybar[jcour, :], sbar[jcour, :]) ys = ps if ys <= 0.0: mode = 7 if nverbose >= 1: info( " >>> m1qn3 (iteration " + str(niter) + "): the scalar product (y,s) = " + str(ys) + "is not positive" ) nsim = isim epsg = eps1 return x, f, g, niter, nsim, epsg, mode # # --- ybar et sbar # r1 = np.sqrt(1.0 / ys) sbar[jcour, :] = r1 * sbar[jcour, :] ybar[jcour, :] = r1 * ybar[jcour, :] # # --- compute the scalar or diagonal preconditioner # if nverbose >= 5: info(" m1qn3: matrix update:") # # --- Here is the Oren-Spedicato factor, for scalar # scaling # if sscale: ps = np.dot(ybar[jcour, :], ybar[jcour, :]) precos = 1.0 / ps if nverbose >= 5: info("Oren-Spedicato factor = " + str(precos)) # # --- Scale the diagonal to Rayleigh's ellipsoid. # Initially (niter.eq.1) and for a cold start, # this is # equivalent to an Oren-Spedicato scaling of the # identity matrix. # else: aux = ybar[jcour, :] ps = 0.0 for i in range(n): ps += diag[i] * aux[i] * aux[i] r1 = 1.0 / ps if nverbose >= 5: info(" fitting the ellipsoid: factor = " + str(r1)) diag = diag * r1 # # --- update the diagonal # (gg is used as an auxiliary vector) # gg = sbar[jcour, :] ps = 0.0 for i in range(n): ps += gg[i] * gg[i] / diag[i] den = ps iprint = 0 for i in range(n): diag[i] = 1.0 / ( 1.0 / diag[i] + aux[i] * aux[i] - (gg[i] / diag[i]) * (gg[i] / diag[i]) / den ) if diag[i] <= 0.0: diag[i] = rmin iprint = i if iprint != 0 and nverbose >= 5: info( " >>> m1qn3-WARNING: diagonal element " + str(iprint) + " is negative (" + str(diag[iprint]) + "), reset to " + str(rmin) ) if nverbose >= 5: ps = 0.0 for i in range(n): ps += diag[i] ps = ps / n preco = ps ps2 = 0.0 for i in range(n): ps2 += (diag[i] - ps) * (diag[i] - ps) ps2 = np.sqrt(ps2 / n) info( " updated diagonal: average value = " + str(preco) + ", sqrt(variance) = " + str(ps2) ) # # --- tests d'arret # ps = np.dot(g, g) eps1 = ps eps1 = np.sqrt(eps1) / gnorm # Some verbose toprint = """ M1QN3: Iteration number: {} Simulation number: {} over {} Cost function: {} Gradient norm ratio: {} """.format( niter, isim, nsim, f, eps1 ) info(toprint) if nverbose >= 5: info(" m1qn3: stopping criterion on g: " + str(eps1)) if eps1 < epsg: mode = 1 nsim = isim epsg = eps1 return x, f, g, niter, nsim, epsg, mode if niter == itmax: mode = 4 if nverbose >= 1: info( " >>> m1qn3 (iteration " + str(niter) + "): maximal number of iterations" ) nsim = isim epsg = eps1 return x, f, g, niter, nsim, epsg, mode if isim > nsim: mode = 5 if nverbose >= 1: info( " >>> m1qn3 (iteration " + str(niter) + "): " + str(isim) + " simulations (maximal number reached)" ) nsim = isim epsg = eps1 return x, f, g, niter, nsim, epsg, mode # # --- calcul de la nouvelle direction de descente d = - H.g # if m == 0: preco = 2.0 * (ff - f) / ((eps1 * gnorm) * (eps1 * gnorm)) d[:] = -g[:] * preco else: info("Computing new descent direction") d = descentdir( n, sscale, m, -g, jmin, jmax, precos, diag, alpha, ybar, sbar ) # # --- test: la direction d est-elle de descente ? # hp0 sera utilise par mlis0 # hp0 = np.dot(d, g) if hp0 >= 0.0: mode = 7 if nverbose >= 1: info(" >>> m1qn3 (iteration " + str(niter) + "):") info( " the search direction d is not a descent direction: " "(g,d) = " + str(hp0) ) nsim = isim epsg = eps1 return x, f, g, niter, nsim, epsg, mode if nverbose >= 5: ps = np.dot(g, g) ps = np.sqrt(ps) ps2 = np.dot(d, d) ps2 = np.sqrt(ps2) ps = hp0 / ps / ps2 ps = min(-ps, 1.0) r1 = np.degrees(np.arccos(ps)) info( " m1qn3: descent direction d: angle(-g,d) = " + str(r1) + "degrees" )