Source code for pycif.plugins.minimizers.m1qn3.aux


import numpy as np


from logging import info


[docs] def mlis0( self, xn, fn, fpn, t, tmin, tmax, d, g, amd, amf, imp, io, nap, napmax, niter, **kwargs ): """Wolfe line search for M1QN3 (cubic interpolation with safeguards). Finds a step ``t`` satisfying the Wolfe conditions starting from ``xn`` along descent direction ``d``. Uses systematic cubic interpolation and a progressively shrinking barrier (``barr``) to prevent overshoots. Args: self (Plugin): M1QN3 plugin instance (provides the simulator). xn (np.ndarray): current iterate, shape ``(n,)``. fn (float): function value at ``xn``. fpn (float): directional derivative at ``xn`` (must be < 0). t (float): initial step-length trial. tmin (float): minimum acceptable step length. tmax (float): maximum acceptable step length. d (np.ndarray): descent direction, shape ``(n,)``. g (np.ndarray): gradient at ``xn``, shape ``(n,)``; updated on exit. amd (float): sufficient-curvature (Wolfe) parameter (0 < amf < amd < 1). amf (float): sufficient-decrease (Armijo) parameter. imp (int): verbosity level. io (int): log-file unit (unused in Python version). nap (int): simulator call counter on entry; incremented on exit. napmax (int): maximum number of simulator calls allowed. niter (int): current iteration count (for logging only). **kwargs: forwarded to the simulator. Returns: tuple: ``(xn, fn, g, nap, logic)`` * ``xn`` (np.ndarray): accepted point. * ``fn`` (float): function value at the accepted point. * ``g`` (np.ndarray): gradient at the accepted point. * ``nap`` (int): updated simulator call counter. * ``logic`` (int): exit code — 0 = serious step accepted, 1 = step blocked, 4 = simulator call limit reached, 5 = user-requested return, 6 = function/gradient inconsistency on entry. < 0 = implicit constraint activated """ # Simulator simulator = self.simulator n = len(xn) if not ( n > 0 and fpn < 0.0 and t > 0.0 and tmax > 0.0 and amf > 0.0 and amd > amf and amd < 1.0 ): toprint = """ MODE == 6!!!! n = {} fpn = {} t = {} tmax = {} amf = {} amd = {} """.format( n, fpn, t, tmax, amf, amd ) info(toprint) logic = 6 return xn, fn, g, nap, logic tesf = amf * fpn tesd = amd * fpn barmin = 0.01 barmul = 5.0 barmax = 0.3 barr = barmin td = 0.0 tg = 0.0 fg = fn fpg = fpn ta = 0.0 fa = fn fpa = fpn ps = np.dot(d, d) d2 = ps # # elimination d'un t initial ridiculement petit # t = max(t, tmin) if t > tmax: tmin = tmax info(" mlis0 tmin forced to tmax") while (fn + t * fpn) >= (fn + 0.9 * t * fpn): t = 2.0 * t indica = 1 logic = 0 if t > tmax: t = tmax logic = 1 if imp >= 4: info( " mlis0 fpn=" + str(fpn) + " d2=" + str(d2) + " tmin=" + str(tmin) + " tmax=" + str(tmax) + "degrees" ) # # --- nouveau x # x = xn + t * d # # --- boucle # while 1: nap += 1 if nap > napmax: logic = 4 fn = fg xn = xn + tg * d return x, fn, g, nap, logic indic = 4 # # --- appel simulateur # f, g = simulator.simul(x, run_id=nap, **kwargs) # , 1999 + nap - 1, io indic = 1 if indic == 0: # # --- arret demande par l'utilisateur # logic = 5 fn = f xn = x return x, fn, g, nap, logic if indic < 0: # # --- les calculs n'ont pas pu etre effectues par le # simulateur # td = t indicd = indic logic = 0 if imp >= 4: info(" mlis0 t=" + str(t) + " indic=" + str(indic)) t = tg + 0.1 * (td - tg) else: # # --- les tests elementaires sont faits, on y va # ps = np.dot(d, g) fp = ps # # --- premier test de Wolfe # ffn = f - fn goto900 = 0 if ffn > t * tesf: td = t fd = f fpd = fp indicd = indic logic = 0 if imp >= 4: info( " mlis0 t=" + str(t) + " ffn=" + str(ffn) + " fp=" + str(fp) ) else: # # --- test 1 ok, donc deuxieme test de Wolfe # if imp >= 4: info( " mlis0 t=" + str(t) + " ffn=" + str(ffn) + " fp=" + str(fp) ) if fp > tesd: logic = 0 fn = f xn = x return x, fn, g, nap, logic if logic != 0: # # --- test 2 ok, donc pas serieux, on sort # fn = f xn = x return x, fn, g, nap, logic tg = t fg = f fpg = fp if not td: # # extrapolation # taa = t gauche = (1.0 + barmin) * t droite = 10.0 * t t = ecube(t, f, fp, ta, fa, fpa, gauche, droite) ta = taa goto900 = 1 if t >= tmax: logic = 1 t = tmax # # interpolation # if not goto900 and indica <= 0: ta = t t = 0.9 * tg + 0.1 * td goto900 = 1 if not goto900: test = barr * (td - tg) gauche = tg + test droite = td - test taa = t t = ecube(t, f, fp, ta, fa, fpa, gauche, droite) ta = taa if gauche < t < droite: barr = max(barmin, barr / barmul) else: barr = min(barmul * barr, barmax) # # --- fin de boucle # - t peut etre bloque sur tmax # (venant de l'extrapolation avec logic=1) # fa = f fpa = fp indica = indic # # --- faut-il continuer ? # if td != 0.0: goto950 = 0 if (td - tg) >= tmin: # # --- limite de precision machine (arret de secours) ? # for i in range(n): z = xn[i] + t * d[i] if z != xn[i] and z != x[i]: goto950 = 1 break if not goto950: # # --- arret sur dxmin ou de secours # logic = 6 # # si indicd<0, derniers calculs non faits par simul # if indicd < 0: logic = indicd # # si tg=0, xn = xn_depart, # sinon on prend xn=x_gauche qui fait decroitre f # if tg != 0.0: fn = fg xn = xn + tg * d if imp <= 0: return x, fn, g, nap, logic info( " mlis0 stop_on_tmin step functions derivatives" ) info( " mlis0 " + str(tg) + " " + str(fg) + " " + str(fpg) ) if logic == 6: info( " mlis0 " + str(td) + " " + str(fg) + " " + str(fpd) ) if logic == 7: info( " mlis0 " + str(td) + " indic=" + str(indicd) ) return x, fn, g, nap, logic # # recopiage de x et boucle # x = xn + t * d return x, fn, g, nap, logic
# # ----------------------------------------------------------------------- #
[docs] def descentdir( n, sscale, nm, depl, jmin, jmax, precos, diag, alpha, ybar, sbar ): """Apply the inverse L-BFGS two-loop recursion (H·g product). Computes the product of the approximate inverse Hessian ``H`` with a vector ``depl`` (typically the gradient) using the L-BFGS two-loop recursion (Nocedal, 1980). ``H`` is constructed from ``nm`` stored curvature pairs (``ybar``, ``sbar``) and a diagonal preconditioner ``diag``. Reference: Nocedal (1980), *Updating Quasi-Newton Matrices with Limited Storage*, Mathematics of Computation 35/151, 773–782. Args: n (int): dimension of the problem. sscale (bool): ``True`` for scalar initial scaling (SIS), ``False`` for diagonal initial scaling (DIS). nm (int): number of stored L-BFGS memory pairs. depl (np.ndarray): input vector ``g``; overwritten with ``H·g`` on exit, shape ``(n,)``. jmin (int): index of oldest stored pair. jmax (int): index of most recent stored pair. precos (float): scalar preconditioner (used in SIS mode). diag (np.ndarray): diagonal preconditioner, shape ``(n,)``. alpha (np.ndarray): work array, shape ``(nm,)``. ybar (np.ndarray): stored gradient differences, shape ``(n, 10)``. sbar (np.ndarray): stored iterate differences, shape ``(n, 10)``. Returns: np.ndarray: ``depl`` updated in-place to ``H·g``. """ jfin = jmax if jfin < jmin: jfin = jmax + nm # # phase de descente # for j in range(jfin, jmin - 1, -1): jp = j if jp >= nm: jp -= nm ps = np.dot(depl, sbar[jp, :]) alpha[jp] = ps depl = depl - ps * ybar[jp, :] # # preconditionnement # if sscale: depl = depl * precos else: depl = depl * diag # # remontee # for j in range(jmin, jfin + 1): jp = j if jp >= nm: jp = jp - nm ps = np.dot(depl, ybar[jp, :]) r = alpha[jp] - ps depl = depl + r * sbar[jp, :] return depl
[docs] def ecube(t, f, fp, ta, fa, fpa, tlower, tupper): """Compute a safeguarded cubic interpolation step. Given function values and derivatives at two points ``t`` and ``ta``, fits a cubic polynomial and returns its minimiser clamped to ``[tlower, tupper]``. Args: t (float): current trial step. f (float): function value at ``t``. fp (float): derivative at ``t``. ta (float): previous trial step. fa (float): function value at ``ta``. fpa (float): derivative at ``ta``. tlower (float): lower bound for the new step. tupper (float): upper bound for the new step. Returns: float: new safeguarded cubic-interpolation step in ``[tlower, tupper]``. """ z1 = fp + fpa - 3.0 * (fa - f) / (ta - t) b = z1 + fp # # first compute the discriminant (without overflow) # if abs(z1) <= 1.0: discri = z1 * z1 - fp * fpa else: discri = fp / z1 discri = discri * fpa discri = z1 - discri if z1 * discri > 0.0: discri = z1 * discri else: discri = -1.0 if discri < 0.0: if fp < 0.0: t = tupper if fp >= 0.0: t = tlower else: # # discriminant nonnegative, compute solution (without overflow) # discri = np.sqrt(discri) if t - ta < 0.0: discri = -discri sign = (t - ta) / abs(t - ta) if b * sign > 0.0: t = t + fp * (ta - t) / (b + discri) else: den = z1 + b + fpa anum = b - discri if abs((t - ta) * anum) < (tupper - tlower) * abs(den): t = t + anum * (ta - t) / den else: t = tupper t = max(t, tlower) t = min(t, tupper) return t