Source code for pycif.plugins.minimizers.congrad.congrad

import copy
import sys

import numpy as np

from logging import info, debug
from .wrevecs import wrevecs


[docs] def congrad(self, px, pgrad, planc1, **kwargs): """Execute the CONGRAD Lanczos inner loop. Builds a Krylov subspace basis via the Lanczos iteration, solves the resulting tridiagonal least-squares problem, and simultaneously accumulates the leading eigenvector estimates (``pevecs``) of the inverse Hessian. Args: self (Plugin): CONGRAD plugin instance with all parameters already resolved by ``check_options`` (``zreduc``, ``pevbnd``, ``kvadim``, ``maxiter``, ``kverbose``, ``ldsolve``). px (np.ndarray): initial iterate :math:`\chi_0`, shape ``(n,)``. pgrad (np.ndarray): initial gradient :math:`\\nabla J(\chi_0)`, shape ``(n,)``. planc1 (np.ndarray): first Lanczos vector (copy of ``pgrad`` on entry), shape ``(n,)``. **kwargs: forwarded to the simulator. Returns: tuple: ``(xopt, gradopt, preduc, pevecs, iiter)`` * ``xopt`` (np.ndarray): optimal iterate. * ``gradopt`` (np.ndarray): gradient at ``xopt``. * ``preduc`` (float): achieved relative gradient-norm reduction. * ``pevecs`` (np.ndarray): estimated leading eigenvectors of :math:`(\\mathbf{I} + \\mathbf{H}\\mathbf{B}\\mathbf{H}^\\top)^{-1}`, shape ``(maxiter, n)``. * ``iiter`` (int): number of Lanczos iterations performed. """ # Simulator simulator = self.simulator # Initializes variables from the minimizer zreqrd = self.zreduc pevbnd = self.pevbnd kvadim = self.kvadim kmaxit = self.maxiter knevecout = self.knevecout kverbose = self.kverbose ldsolve = self.ldsolve logfile = self.logfile if kverbose > 0: info("- Lanczos Solver -") info(f"Parameters for the Solver: \n" f"- zreqrd: {zreqrd}\n" f"- pevbnd: {pevbnd}\n" f"- kvadim: {kvadim}\n" f"- kmaxit: {kmaxit}\n" f"- knevecout: {knevecout}\n" f"- kverbose: {kverbose}\n" f"- ldsolve: {ldsolve}\n" f"- logfile: {logfile}\n") # Other local parameters idprob = kvadim itheta1 = 0 ztheta1 = 0.0 # Declare local arrays zqg0 = np.zeros(kmaxit + 1) zbeta = np.zeros(kmaxit + 1) zbnds = np.zeros(kmaxit) zdelta = np.zeros(kmaxit) zv = np.zeros((kmaxit + 1, kmaxit + 1)) zw = np.zeros(kvadim) zetheta = np.zeros(kmaxit) zcglwk = np.zeros((kmaxit + 1, kvadim)) # 'zeta' is an upper bound on the relative error of the gradient. zeta = 1e-4 preduc = 1.0 # (Note: the factor 0.5 below is because simul defines the cost # function to be J = transpose(chi)*(chi) + 2*Jo, which makes # J''b = 2I, whereas CONGRAD defines J to be half of this value, # so that J''b = I.) # pgrad /= 2.0 zgnorm = np.sqrt(np.dot(pgrad, pgrad)) if kverbose > 0: info(f"Initial norm of the gradient: {zgnorm}") znorm2l1 = np.dot(planc1, planc1) zcglwk[0] = planc1 / 2. zcglwk[0] /= np.sqrt(np.dot(zcglwk[0], zcglwk[0])) # Save initial control vector and gradient zgrad0 = copy.deepcopy(pgrad) zx0 = copy.deepcopy(px) zqg0[0] = np.dot(zcglwk[0], zgrad0) # Lanczos iteration starts here ingood = 0 iiter = 0 while 1: # Lanczos_loop # Evaluate the Hessian applied to the latest Lanczos vector print("AAAAAAAAAAAA") print(zx0.min(), zx0.max(), zcglwk[iiter, :].min(), zcglwk[iiter, :].max()) zw = zx0 + zcglwk[iiter, :] _, pgrad = simulator.simul( zw, run_id=iiter, linear=self.force_linearize, **kwargs) # pgrad *= 0.5 print("BBBBBBBBBBB") print(pgrad.min(), pgrad.max(), zgrad0.min(), zgrad0.max()) pgrad -= zgrad0 # Calculate zdelta zdelta[iiter] = np.dot(zcglwk[iiter, :], pgrad) if zdelta[iiter] <= 0.0: iiter -= 1 info("CONGRAD: Hessian is not positive definite") info("Stopping after {} iterations".format(iiter)) sys.exit() # Calculate the new Lanczos vector (This is the Lanczos recurrence) pgrad -= zdelta[iiter] * zcglwk[iiter] print("CCCCCCCCCCCC") print(pgrad.min(), pgrad.max()) if iiter > 0: pgrad -= zbeta[iiter] * zcglwk[iiter - 1] # Orthonormalize gradient against previous gradients for jm in range(iiter, -1, -1): dla = np.dot(pgrad, zcglwk[jm, :]) pgrad -= dla * zcglwk[jm, :] print("DDDDDDDDDDDDD") print(pgrad.min(), pgrad.max()) zbeta[iiter + 1] = np.sqrt(np.dot(pgrad, pgrad)) zcglwk[iiter + 1, :] = pgrad / zbeta[iiter + 1] zqg0[iiter + 1] = np.dot(zcglwk[iiter + 1, :], zgrad0) # Calculate the reduction in the gradient norm if ldsolve: zwork1 = zdelta[0: iiter + 1] zwork2 = zbeta[1: iiter + 1] zwork3 = np.zeros(iiter + 1) zwork3[0: iiter + 1] = -zqg0[0: iiter + 1] zmat = np.zeros((iiter + 1, iiter + 1)) for i in range(iiter + 1): zmat[i, i] = zwork1[i] for i in range(iiter): zmat[i, i + 1], zmat[i + 1, i] = zwork2[i], zwork2[i] zwork3 = np.linalg.solve(zmat, zwork3) zw[:] = ( zgrad0[:] + zbeta[iiter + 1] * zcglwk[iiter + 1, :] * zwork3[iiter] ) for j in range(iiter + 1): zw[:] = zw[:] - zcglwk[j, :] * zqg0[j] preduc_prev = copy.copy(preduc) preduc = np.sqrt(np.dot(zw, zw)) / zgnorm if kverbose > 1: debug("Reduction in norm of gradient is: " + str(preduc)) else: preduc = 1.0 # Determine eigenvalues and eigenvectors of the tri-diagonal problem zwork4 = zdelta[0: iiter + 1] zwork = zbeta[1: iiter + 1] if iiter != 0: zmat = np.diag(zwork4) for i in range(iiter): zmat[i, i + 1], zmat[i + 1, i] = zwork[i], zwork[i] zritz, zv = np.linalg.eigh(zmat) else: zv[0, 0] = 1.0 zritz = zwork4 if kverbose > 1: debug( "congrad: ritz values are: {}".format(zritz[0: iiter + 1]) ) # Estimate error bounds zbndlm = zeta * zritz[iiter] zbnds[0: iiter + 1] = abs(zbeta[iiter + 1] * zv[iiter, 0: iiter + 1]) if kverbose > 1: debug( "congrad: error bounds are: " + str(zbnds[0: iiter + 1]) ) # Check for exploding or negative Ritz values if np.min(zritz[0: iiter + 1]) < 0.0: if kverbose > 0: info("congrad: stopping: negative ritz value") preduc = preduc_prev iiter = iiter - 1 zwork4 = zdelta[0: iiter + 1] zwork = zbeta[1: iiter + 1] if iiter > 0: zmat = np.zeros((iiter + 1, iiter + 1)) for i in range(iiter + 1): zmat[i, i] = zwork4[i] for i in range(iiter): zmat[i, i + 1], zmat[i + 1, i] = zwork[i], zwork[i] zritz, zv = np.linalg.eigh(zmat) else: zv[0, 0] = 1.0 zbnds[0: iiter + 1] = abs( zbeta[iiter + 1] * zv[iiter, 0: iiter + 1] ) break # Lanczos loop if ingood > 0: if zritz[itheta1] > 1.01 * ztheta1 and kverbose > 0: info("CONGRAD: warning -- ritz values exploding") info("leading ritz value=" + str(zritz[itheta1])) info("leading converged eigenvalue=" + str(ztheta1)) # Count the converged eigenvectors ing = 0 for jm in range(iiter + 1): if zbnds[jm] <= zbndlm: ing = ing + 1 if kverbose > 1: debug( "leading converged eigenvalue=" + str(zritz[jm]) ) # Deal with newly converged eigenvector and recompute eigenvectors ingood = ing # Save leading converged eigenvalue for explosion test if ingood > 0: for jm in range(iiter, -1, -1): if zbnds[jm] <= zbndlm: ztheta1 = zritz[jm] itheta1 = jm break if kverbose > 1: debug("congrad: End of iteration: " + str(iiter + 1)) if iiter >= (kmaxit - 1) or preduc <= zreqrd: break # Lanczos loop iiter = iiter + 1 if ingood > 0: itheta1 = itheta1 + 1 # End of Lanczos iteration if kverbose > 0: info("Summary of Lanczos iteration:") info(" Number of iterations performed: " + str(iiter + 1)) if ldsolve: info(" Maximum allowed number of iterations: " + str(kmaxit)) info(" Required reduction in norm of gradient: " + str(zreqrd)) info(" Achieved reduction in norm of gradient: " + str(preduc)) if preduc > zreqrd: info(" *** Failed to achieve required reduction in gradient ***") # Calculate sufficiently converged eigenvectors of the # preconditioned Hessian if ldsolve: zbndlm = pevbnd zbnds[0: iiter + 1] = zbnds[0: iiter + 1] / zritz[0: iiter + 1] ztheta, zvcglev, zrcglev, ingood = wrevecs( idprob, iiter, kmaxit, kverbose, zritz, zbnds, zbndlm, zv, zcglwk, logfile ) if ingood == 0 and kverbose > 0: info("Warning: congrad found no eigenpairs") if kverbose > 0: info( "number of eigenpairs converged to requested accuracy=" + str(ingood) ) ########### # pevecs = np.zeros((knevecout,idprob)) # return px, pgrad, preduc, pevecs, iter ########### if not ldsolve: # Determine bounds on the quadratic form: v'*inv(J'')*v # (where v=zgrad0) using theorem 5.3 of Golub and Meurant 1994. zbjm1 = 1.0 / zdelta[0] zdjm1 = zdelta[0] zcjm1 = 1.0 zdhatjm1 = zdelta[0] - 1.0 zdbarjm1 = zdelta[0] - ztheta1 pgolubl_inv = np.zeros(iiter + 1) pgolubu_inv = np.zeros(iiter + 1) for j in range(1, iiter + 1): # zbj is the Gauss rule lower bound: zbj = zbjm1 + zbeta[j] * zbeta[j] * zcjm1 * zcjm1 / ( zdjm1 * (zdelta[j] * zdjm1 - zbeta[j] * zbeta[j]) ) zdj = zdelta[j] - zbeta[j] * zbeta[j] / zdjm1 zcj = zcjm1 * zbeta[j] / zdjm1 zdhatj = zdelta[j] - 1.0 - zbeta[j] * zbeta[j] / zdhatjm1 zdbarj = zdelta[j] - ztheta1 - zbeta[j] * zbeta[j] / zdbarjm1 zohatj = 1.0 + zbeta[j + 1] * zbeta[j + 1] / zdhatj zobarj = ztheta1 + zbeta[j + 1] * zbeta[j + 1] / zdbarj # zbhatj is the Gauss-Radau rule upper bound: # zbbarj is the Gauss-Radau rule lower bound: zbhatj = zbj + zbeta[j + 1] * zbeta[j + 1] * zcj * zcj / ( zdj * (zohatj * zdj - zbeta[j + 1] * zbeta[j + 1]) ) zbbarj = zbj + zbeta[j + 1] * zbeta[j + 1] * zcj * zcj / ( zdj * (zobarj * zdj - zbeta[j + 1] * zbeta[j + 1]) ) # zbuj is the Gauss-Lobatto rule upper bound: zouj = ( zdhatj * zdbarj * (ztheta1 / zdhatj - 1.0 / zdbarj) / (zdbarj - zdhatj) ) zgamuj2 = zdhatj * zdbarj * (ztheta1 - 1.0) / (zdbarj - zdhatj) zbuj = zbj + zgamuj2 * zcj * zcj / (zdj * (zouj * zdj - zgamuj2)) zbjm1 = zbj zdjm1 = zdj zcjm1 = zcj zdhatjm1 = zdhatj zdbarjm1 = zdbarj pgolubl_inv[j] = max(zbbarj, zbj) pgolubu_inv[j] = min(zbhatj, zbuj) # Calculate bounds on (planc1)' log[(J'')^-1] planc1 # note that the former computation is an alternate version of # the one just above # The lines for the inv have been commented in this alternate algorithm pgolubu_inv = np.zeros(iiter + 1) pgolubu_log = np.zeros(iiter + 1) pgolubu_inv[0] = zv[0, 0] * zv[0, 0] / zritz[0] pgolubu_log[0] = zv[0, 0] * zv[0, 0] * np.log(zritz[0]) for j in range(1, iiter + 1): pgolubu_inv[j] = ( pgolubu_inv[j - 1] + zv[0, j] * zv[0, j] / zritz[j] ) pgolubu_log[j] = pgolubu_log[j - 1] + zv[0, j] * zv[0, j] * np.log( zritz[j] ) za = 1.0 # za must be less than the smallest eigenvalue # za = 1.e-8 zwork1 = zdelta[0: iiter + 1] - za zwork2 = zbeta[1: iiter + 1] zwork3 = np.zeros(iiter + 1) zwork3[iiter] = zbeta[iiter + 1] * zbeta[iiter + 1] zmat = np.zeros((iiter + 1, iiter + 1)) for i in range(iiter + 1): zmat[i, i] = zwork1[i] for i in range(iiter): zmat[i, i + 1], zmat[i + 1, i] = zwork2[i], zwork2[i] zwork3 = np.linalg.solve(zmat, zwork3) zwork4 = np.zeros(iiter + 2) zwork4[0: iiter + 1] = zdelta[0: iiter + 1] zwork4[iiter + 1] = za + zwork3[iiter] zwork = np.zeros(iiter + 1) zwork[0: iiter + 1] = zbeta[1: iiter + 2] iiter += 1 zmat = np.zeros((iiter + 1, iiter + 1)) for i in range(iiter + 1): zmat[i, i] = zwork4[i] for i in range(iiter): zmat[i, i + 1], zmat[i + 1, i] = zwork[i], zwork[i] zritz, zv = np.linalg.eigh(zmat) iiter -= 1 pgolubl_inv = np.zeros(iiter + 1) pgolubl_log = np.zeros(iiter + 1) pgolubl_inv[0] = zv[0, 0] * zv[0, 0] / zwork4[0] pgolubl_log[0] = zv[0, 0] * zv[0, 0] * np.log(zwork4[0]) for j in range(1, iiter + 1): pgolubl_inv[j] = ( pgolubl_inv[j - 1] + zv[0, j] * zv[0, j] / zwork4[j] ) pgolubl_log[j] = pgolubl_log[j - 1] + zv[0, j] * zv[0, j] * np.log( zwork4[j] ) # Convert bounds to degrees of freedom for signal and entropy pgolubu_inv = znorm2l1 * (1.0 - pgolubu_inv) pgolubl_inv = znorm2l1 * (1.0 - pgolubl_inv) pgolubu_log = 0.5 * znorm2l1 * pgolubu_log / np.log(2.0) pgolubl_log = 0.5 * znorm2l1 * pgolubl_log / np.log(2.0) # Calculate the solution vector and gradient if ldsolve: zwork1 = zdelta[0: iiter + 1] zwork2 = zbeta[1: iiter + 1] zwork3 = np.zeros(iiter + 1) zwork3[0: iiter + 1] = -zqg0[0: iiter + 1] zmat = np.zeros((iiter + 1, iiter + 1)) for i in range(iiter + 1): zmat[i, i] = zwork1[i] for i in range(iiter): zmat[i, i + 1], zmat[i + 1, i] = zwork2[i], zwork2[i] zwork3 = np.linalg.solve(zmat, zwork3) for j in range(iiter + 1): px[:] = px[:] + zcglwk[j, :] * zwork3[j] pgrad -= zcglwk[j, :] * zqg0[j] else: pgrad = copy.deepcopy(zgrad0) # Determine eigenpairs of the un-preconditioned Hessian in 'chi' # space, transform to model variable space and write out. pevecs = np.zeros((knevecout, idprob)) if ldsolve: intot = ingood if intot > 0: # subroutine xformev removed - F Chevallier 4 April 2012 knevecout = min(intot, knevecout) for jk in range(knevecout): pevecs[jk, :] = zvcglev[jk, :] * np.sqrt( 1.0 - 1.0 / zrcglev[jk] ) # Transform control variable and gradient back to space with scalar # product SCAAS # pgrad *= 2.0 # Return the number of iterations actually performed iiter = iiter + 1 if ldsolve: return px, pgrad, preduc, pevecs, iiter else: return pgolubl_inv, pgolubu_inv, pgolubl_log, pgolubu_log, iiter