Source code for pycif.plugins.minimizers.m1qn3.aux


import numpy as np


from logging import info



[docs]
def mlis0(
    self,
    xn,
    fn,
    fpn,
    t,
    tmin,
    tmax,
    d,
    g,
    amd,
    amf,
    imp,
    io,
    nap,
    napmax,
    niter,
    **kwargs
):
    """Wolfe line search for M1QN3 (cubic interpolation with safeguards).

    Finds a step ``t`` satisfying the Wolfe conditions starting from ``xn``
    along descent direction ``d``.  Uses systematic cubic interpolation and
    a progressively shrinking barrier (``barr``) to prevent overshoots.

    Args:
        self (Plugin): M1QN3 plugin instance (provides the simulator).
        xn (np.ndarray): current iterate, shape ``(n,)``.
        fn (float): function value at ``xn``.
        fpn (float): directional derivative at ``xn`` (must be < 0).
        t (float): initial step-length trial.
        tmin (float): minimum acceptable step length.
        tmax (float): maximum acceptable step length.
        d (np.ndarray): descent direction, shape ``(n,)``.
        g (np.ndarray): gradient at ``xn``, shape ``(n,)``; updated on exit.
        amd (float): sufficient-curvature (Wolfe) parameter (0 < amf < amd < 1).
        amf (float): sufficient-decrease (Armijo) parameter.
        imp (int): verbosity level.
        io (int): log-file unit (unused in Python version).
        nap (int): simulator call counter on entry; incremented on exit.
        napmax (int): maximum number of simulator calls allowed.
        niter (int): current iteration count (for logging only).
        **kwargs: forwarded to the simulator.

    Returns:
        tuple: ``(xn, fn, g, nap, logic)``

        * ``xn`` (np.ndarray): accepted point.
        * ``fn`` (float): function value at the accepted point.
        * ``g`` (np.ndarray): gradient at the accepted point.
        * ``nap`` (int): updated simulator call counter.
        * ``logic`` (int): exit code —
          0 = serious step accepted,
          1 = step blocked,
          4 = simulator call limit reached,
          5 = user-requested return,
          6 = function/gradient inconsistency on entry.
          < 0 = implicit constraint activated
    """

    # Simulator
    simulator = self.simulator

    n = len(xn)

    if not (
        n > 0
        and fpn < 0.0
        and t > 0.0
        and tmax > 0.0
        and amf > 0.0
        and amd > amf
        and amd < 1.0
    ):
        toprint = f"""
        MODE == 6!!!!
        n = {n}
        fpn = {fpn}
        t = {t}
        tmax = {tmax}
        amf = {amf}
        amd = {amd}
        """
        info(toprint)

        logic = 6
        return xn, fn, g, nap, logic

    tesf = amf * fpn
    tesd = amd * fpn
    barmin = 0.01
    barmul = 5.0
    barmax = 0.3
    barr = barmin
    td = 0.0
    tg = 0.0
    fg = fn
    fpg = fpn
    ta = 0.0
    fa = fn
    fpa = fpn
    ps = np.dot(d, d)
    d2 = ps
    #
    #               elimination d'un t initial ridiculement petit
    #
    t = max(t, tmin)
    if t > tmax:
        tmin = tmax
        info("     mlis0          tmin forced to tmax")

    while (fn + t * fpn) >= (fn + 0.9 * t * fpn):
        t = 2.0 * t

    indica = 1
    logic = 0
    if t > tmax:
        t = tmax
        logic = 1

    if imp >= 4:
        info(
            "     mlis0   fpn="
            + str(fpn)
            + " d2="
            + str(d2)
            + " tmin="
            + str(tmin)
            + " tmax="
            + str(tmax)
            + "degrees"
        )

    #
    #     --- nouveau x
    #
    x = xn + t * d

    #
    # --- boucle
    #
    while 1:

        nap += 1
        if nap > napmax:
            logic = 4
            fn = fg
            xn = xn + tg * d
            return x, fn, g, nap, logic
        indic = 4
        #
        #     --- appel simulateur
        #
        f, g = simulator.simul(x, run_id=nap, **kwargs)
        # , 1999 + nap - 1, io

        indic = 1

        if indic == 0:
            #
            #         --- arret demande par l'utilisateur
            #
            logic = 5
            fn = f
            xn = x
            return x, fn, g, nap, logic

        if indic < 0:
            #
            #         --- les calculs n'ont pas pu etre effectues par le
            # simulateur
            #
            td = t
            indicd = indic
            logic = 0
            if imp >= 4:
                info("     mlis0  t=" + str(t) + " indic=" + str(indic))
            t = tg + 0.1 * (td - tg)

        else:
            #
            #     --- les tests elementaires sont faits, on y va
            #
            ps = np.dot(d, g)
            fp = ps
            #
            #     --- premier test de Wolfe
            #
            ffn = f - fn
            goto900 = 0
            if ffn > t * tesf:
                td = t
                fd = f
                fpd = fp
                indicd = indic
                logic = 0
                if imp >= 4:
                    info(
                        "     mlis0  t="
                        + str(t)
                        + " ffn="
                        + str(ffn)
                        + " fp="
                        + str(fp)
                    )

            else:
                #
                #     --- test 1 ok, donc deuxieme test de Wolfe
                #
                if imp >= 4:
                    info(
                        "     mlis0  t="
                        + str(t)
                        + " ffn="
                        + str(ffn)
                        + " fp="
                        + str(fp)
                    )

                if fp > tesd:
                    logic = 0
                    fn = f
                    xn = x
                    return x, fn, g, nap, logic

                if logic != 0:
                    #
                    #     --- test 2 ok, donc pas serieux, on sort
                    #
                    fn = f
                    xn = x
                    return x, fn, g, nap, logic

                tg = t
                fg = f
                fpg = fp
                if not td:
                    #
                    #              extrapolation
                    #
                    taa = t
                    gauche = (1.0 + barmin) * t
                    droite = 10.0 * t
                    t = ecube(t, f, fp, ta, fa, fpa, gauche, droite)
                    ta = taa
                    goto900 = 1
                    if t >= tmax:
                        logic = 1
                        t = tmax
            #
            #              interpolation
            #
            if not goto900 and indica <= 0:
                ta = t
                t = 0.9 * tg + 0.1 * td
                goto900 = 1

            if not goto900:
                test = barr * (td - tg)
                gauche = tg + test
                droite = td - test
                taa = t
                t = ecube(t, f, fp, ta, fa, fpa, gauche, droite)
                ta = taa
                if gauche < t < droite:
                    barr = max(barmin, barr / barmul)
                else:
                    barr = min(barmul * barr, barmax)
            #
            # --- fin de boucle
            #     - t peut etre bloque sur tmax
            #       (venant de l'extrapolation avec logic=1)
            #
            fa = f
            fpa = fp

        indica = indic
        #
        # --- faut-il continuer ?
        #
        if td != 0.0:
            goto950 = 0
            if (td - tg) >= tmin:
                #
                #     --- limite de precision machine (arret de secours) ?
                #
                for i in range(n):
                    z = xn[i] + t * d[i]
                    if z != xn[i] and z != x[i]:
                        goto950 = 1
                        break
            if not goto950:
                #
                # --- arret sur dxmin ou de secours
                #
                logic = 6
                #
                #     si indicd<0, derniers calculs non faits par simul
                #
                if indicd < 0:
                    logic = indicd
                #
                #     si tg=0, xn = xn_depart,
                #     sinon on prend xn=x_gauche qui fait decroitre f
                #
                if tg != 0.0:
                    fn = fg
                    xn = xn + tg * d
                if imp <= 0:
                    return x, fn, g, nap, logic

                info(
                    "  mlis0   stop_on_tmin   step   functions   derivatives"
                )
                info(
                    "  mlis0   " + str(tg) + "  " + str(fg) + " " + str(fpg)
                )

                if logic == 6:
                    info(
                        "  mlis0   "
                        + str(td)
                        + "  "
                        + str(fg)
                        + " "
                        + str(fpd)
                    )

                if logic == 7:
                    info(
                        "  mlis0   " + str(td) + "    indic=" + str(indicd)
                    )

                return x, fn, g, nap, logic
        #
        #               recopiage de x et boucle
        #
        x = xn + t * d

    return x, fn, g, nap, logic



#
# -----------------------------------------------------------------------
#



[docs]
def descentdir(
    n, sscale, nm, depl, jmin, jmax, precos, diag, alpha, ybar, sbar
):
    """Apply the inverse L-BFGS two-loop recursion (H·g product).

    Computes the product of the approximate inverse Hessian ``H`` with a
    vector ``depl`` (typically the gradient) using the L-BFGS two-loop
    recursion (Nocedal, 1980).  ``H`` is constructed from ``nm`` stored
    curvature pairs (``ybar``, ``sbar``) and a diagonal preconditioner
    ``diag``.

    Reference: Nocedal (1980), *Updating Quasi-Newton Matrices with Limited
    Storage*, Mathematics of Computation 35/151, 773–782.

    Args:
        n (int): dimension of the problem.
        sscale (bool): ``True`` for scalar initial scaling (SIS),
            ``False`` for diagonal initial scaling (DIS).
        nm (int): number of stored L-BFGS memory pairs.
        depl (np.ndarray): input vector ``g``; overwritten with ``H·g``
            on exit, shape ``(n,)``.
        jmin (int): index of oldest stored pair.
        jmax (int): index of most recent stored pair.
        precos (float): scalar preconditioner (used in SIS mode).
        diag (np.ndarray): diagonal preconditioner, shape ``(n,)``.
        alpha (np.ndarray): work array, shape ``(nm,)``.
        ybar (np.ndarray): stored gradient differences, shape ``(n, 10)``.
        sbar (np.ndarray): stored iterate differences, shape ``(n, 10)``.

    Returns:
        np.ndarray: ``depl`` updated in-place to ``H·g``.
    """
    jfin = jmax
    if jfin < jmin:
        jfin = jmax + nm
    #
    #         phase de descente
    #
    for j in range(jfin, jmin - 1, -1):
        jp = j
        if jp >= nm:
            jp -= nm
        ps = np.dot(depl, sbar[jp, :])
        alpha[jp] = ps
        depl = depl - ps * ybar[jp, :]
    #
    #         preconditionnement
    #
    if sscale:
        depl = depl * precos
    else:
        depl = depl * diag
    #
    #         remontee
    #
    for j in range(jmin, jfin + 1):
        jp = j
        if jp >= nm:
            jp = jp - nm

        ps = np.dot(depl, ybar[jp, :])
        r = alpha[jp] - ps
        depl = depl + r * sbar[jp, :]

    return depl




[docs]
def ecube(t, f, fp, ta, fa, fpa, tlower, tupper):
    """Compute a safeguarded cubic interpolation step.

    Given function values and derivatives at two points ``t`` and ``ta``,
    fits a cubic polynomial and returns its minimiser clamped to
    ``[tlower, tupper]``.

    Args:
        t (float): current trial step.
        f (float): function value at ``t``.
        fp (float): derivative at ``t``.
        ta (float): previous trial step.
        fa (float): function value at ``ta``.
        fpa (float): derivative at ``ta``.
        tlower (float): lower bound for the new step.
        tupper (float): upper bound for the new step.

    Returns:
        float: new safeguarded cubic-interpolation step in ``[tlower, tupper]``.
    """

    z1 = fp + fpa - 3.0 * (fa - f) / (ta - t)
    b = z1 + fp
    #
    #              first compute the discriminant (without overflow)
    #
    if abs(z1) <= 1.0:
        discri = z1 * z1 - fp * fpa
    else:
        discri = fp / z1
        discri = discri * fpa
        discri = z1 - discri
        if z1 * discri > 0.0:
            discri = z1 * discri
        else:
            discri = -1.0

    if discri < 0.0:
        if fp < 0.0:
            t = tupper
        if fp >= 0.0:
            t = tlower

    else:
        #
        #  discriminant nonnegative, compute solution (without overflow)
        #
        discri = np.sqrt(discri)
        if t - ta < 0.0:
            discri = -discri

        sign = (t - ta) / abs(t - ta)
        if b * sign > 0.0:
            t = t + fp * (ta - t) / (b + discri)

        else:
            den = z1 + b + fpa
            anum = b - discri
            if abs((t - ta) * anum) < (tupper - tlower) * abs(den):
                t = t + anum * (ta - t) / den
            else:
                t = tupper

    t = max(t, tlower)
    t = min(t, tupper)

    return t