Source code for pycif.plugins.modes.ensrf.metrics

import numpy as np
import xarray as xr
from logging import info, warning
import scipy
import os
import copy
from .segment_cropping import get_cntrlv_idx_segment
from ....utils.dates import date_range

[docs] class Metrics_ensrf(): def __init__(self, controlvect, obsvect, window_length, windows_datei, nlag): # Get basic data from configuration self.windows_datei = windows_datei[:-1] self.windows_datef = windows_datei[1:] self.nwindows = len(self.windows_datei) self.window_length = window_length self.nlag = nlag self.ncycles = len(windows_datei[:-nlag]) self.stations = np.unique(obsvect.metadata['station']) self.nstations = len(self.stations) # Get a list of (comp, trcr) self.trids_cntrlv = [] self.trids_obs = [] components = controlvect.datavect.components for comp in components.attributes: component = getattr(components, comp) if not hasattr(component, "parameters"): continue for trcr in component.parameters.attributes: if comp in ["concs"]: self.trids_obs.append(("/".join([comp, trcr]))) else: tracer = getattr(component.parameters, trcr) if not tracer.iscontrol: continue self.trids_cntrlv.append(("/".join([comp, trcr]))) self.ntrids_obs = len(self.trids_obs) self.ntrids_cntrlv = len(self.trids_cntrlv) # Compute, the spatial resolution and # the dof of B for the horizontal domain dof_b_hresol, hresol = self.compute_dof_b_hresol(controlvect) # Create the metrics dataset self.data = xr.Dataset( data_vars=dict( hresol=([], hresol), dof_b_hresol=([], dof_b_hresol), first_cycle_idx=(['window'], np.empty((self.nwindows + 1))), last_cycle_idx=(['window'], np.empty((self.nwindows + 1))), nobs_assim=(['cycle', 'window', 'trid_obs', 'station'], np.empty((self.ncycles + 1, self.nwindows + 1, self.ntrids_obs + 1, self.nstations + 1))), nstations_assim=(['cycle', 'window', 'trid_obs',], np.empty((self.ncycles + 1, self.nwindows + 1, self.ntrids_obs + 1,))), mean_obserr=(['cycle', 'window', 'trid_obs', 'station'], np.empty((self.ncycles + 1, self.nwindows + 1, self.ntrids_obs + 1, self.nstations + 1))), rmsd=(['cycle', 'window', 'trid_obs', 'station', 'iteration'], np.empty((self.ncycles + 1, self.nwindows + 1, self.ntrids_obs + 1, self.nstations + 1, 3))), rmse=(['cycle', 'window', 'trid_obs', 'station', 'iteration'], np.empty((self.ncycles + 1, self.nwindows + 1, self.ntrids_obs + 1, self.nstations + 1, 3))), jo=(['cycle', 'window', 'iteration'], np.empty((self.ncycles + 1, self.nwindows + 1, 3))), jb=(['cycle', 'window', 'iteration'], np.empty((self.ncycles + 1, self.nwindows + 1, 3))), jtot=(['cycle', 'window', 'iteration'], np.empty((self.ncycles + 1, self.nwindows + 1, 3))), chi_square=(['cycle', 'window', 'iteration'], np.empty((self.ncycles + 1, self.nwindows + 1, 3))), mean_self_sens=(['cycle', 'window', 'trid_obs', 'station'], # accumulated np.empty((self.ncycles + 1, self.nwindows + 1, self.ntrids_obs + 1, self.nstations + 1))), dofs=(['cycle', 'window', 'trid_obs', 'station'], # accumulated np.empty((self.ncycles + 1, self.nwindows + 1, self.ntrids_obs + 1, self.nstations + 1))), dof_ens=(['cycle', 'window', 'iteration'], np.empty((self.ncycles + 1, self.nwindows + 1, 3))), ), coords=dict( cycle=np.append( (np.arange(self.ncycles) + 1).astype('object'), 'full' ), window=np.append(self.windows_datei, 'full'), iteration=['prior', 'prior_seg', 'posterior'], station=np.append(self.stations, 'all'), trid_obs=np.append(self.trids_obs, 'all'), trid_cntrlv=np.append(self.trids_cntrlv, 'all'), ), ) # Fill data with Nans for var in self.data.data_vars: if var not in ['hresol', 'dof_b_hresol']: self.data[var][:] = np.nan # Fill basic observation data for the entire period for trid in self.data.trid_obs.values: for station in self.data.station.values: mask_obs = obsvect.obsvect_mask if trid != 'all': trcr = trid.split("/")[1] mask_obs = mask_obs & (obsvect.metadata["parameter"] == trcr) if station != 'all': mask_obs = mask_obs & (obsvect.metadata["station"] == station) self.data['nobs_assim'].loc['full', 'full', trid, station] = \ np.sum(mask_obs) if station == 'all': self.data['nstations_assim'].loc['full', 'full', trid] = \ len(np.unique(obsvect.metadata.loc[mask_obs, 'station'])) if np.sum(mask_obs) == 0: self.data['mean_obserr'].loc['full', 'full', trid, station] = \ np.nan else: self.data['mean_obserr'].loc['full', 'full', trid, station] = \ np.sqrt(np.nanmean(obsvect.yobs_err[mask_obs] ** 2))
[docs] def update(self, level, segments_datei, iseg, seg_ddi, seg_ddf, controlvect, obsvect, mask_obsa, mask_obs_assimilated, x_samples, x_samples_segprior, hx_samples, sim_segprior, sim_post): x_samples_dev = copy.deepcopy(x_samples[3:] - x_samples[2]) x_samples_segprior_dev = copy.deepcopy(x_samples_segprior[3:] - x_samples_segprior[2]) hx_samples_dev = copy.deepcopy(hx_samples[:, 3:] - hx_samples[:, 2:3]) sim_prior = hx_samples[:, 1] windows_seg_datei = date_range(seg_ddi, seg_ddf, self.window_length) mask_cntrlv_idx_s = get_cntrlv_idx_segment(controlvect, seg_ddi, seg_ddf) chi_prior_seg = controlvect.sqrtbprod_ad(controlvect.x - controlvect.xb, inverse=True) chi_post = controlvect.sqrtbprod_ad(controlvect.xa - controlvect.xb, inverse=True) wdis = windows_seg_datei[:-1] wdfs = windows_seg_datei[1:] if level > 1: self.compute_dof_ensemble( iseg, wdis, x_samples_segprior_dev, x_samples_dev, mask_cntrlv_idx_s ) for iwdi, (wdi, wdf) in enumerate(zip(wdis, wdfs)): if np.isnan(self.data['first_cycle_idx'].loc[wdi]): self.data['first_cycle_idx'].loc[wdi] = iseg # Calculate observation mask for the current window mask_window = (wdi <= obsvect.metadata["date"]) \ & (obsvect.metadata["enddate"] < wdf) mask_obsa_w = mask_window & mask_obs_assimilated mask_cntrlv_idx_w = get_cntrlv_idx_segment(controlvect, wdi, wdf) is_prior_window = False is_post_window = False if iseg == len(segments_datei) - 1 or iwdi == 0: # means the window will not be optimized again is_post_window = True if iseg == 0 or iwdi == self.nlag - 1: # means the window is optimized for the first time is_prior_window = True if is_prior_window: self.compute_analysis_metrics( obsvect, iseg, wdi, hx_samples_dev, mask_obsa ) self.compute_obs_metrics( obsvect, iseg, wdi, is_prior_window, is_post_window, mask_obsa_w, sim_prior, sim_segprior, sim_post ) self.compute_cost_function( obsvect, iseg, wdi, is_post_window, chi_prior_seg, chi_post, hx_samples[:, 1], sim_segprior, sim_post, mask_obsa_w, mask_cntrlv_idx_w ) return
[docs] def compute_obs_metrics(self, obsvect, iseg, wdi, is_prior_window, is_post_window, mask_obsa_w, sim_prior, sim_segprior, sim_post): # Loop over all the trid and stations for trid in self.data.trid_obs.values: for station in self.data.station.values: mask_obs = obsvect.obsvect_mask if trid != 'all': trcr = trid.split("/")[1] mask_obs = mask_obs & (obsvect.metadata["parameter"] == trcr) if station != 'all': mask_obs = mask_obs & (obsvect.metadata["station"] == station) mask_obs = mask_obsa_w & mask_obs if is_prior_window: # Number of assimilated stations # --------------------------------------- if station == 'all': self.data['nstations_assim'].loc[iseg + 1, wdi, trid] = \ len(np.unique(obsvect.metadata.loc[mask_obs, 'station'])) # Number of assimilated observations # --------------------------------------- if np.sum(mask_obs) == 0: self.data['nobs_assim'].loc[iseg + 1, wdi, trid, station] = 0 else: self.data['nobs_assim'].loc[iseg + 1, wdi, trid, station] = \ len(obsvect.yobs[mask_obs]) # Mean observation error # --------------------------------------- if np.sum(mask_obs) == 0: self.data['mean_obserr'].loc[iseg + 1, wdi, trid, station] = np.nan else: self.data['mean_obserr'].loc[iseg + 1, wdi, trid, station] = \ np.sqrt(np.nanmean(obsvect.yobs_err[mask_obs] ** 2)) if np.sum(mask_obs) == 0: continue # Root Mean Square Deviation (RMSD) # --------------------------------------- # Fill the prior for all windows dep_prior = (sim_prior - obsvect.yobs)[mask_obs] rmsd_prior = np.sqrt(np.nanmean(dep_prior ** 2)) self.data['rmsd'].loc[iseg + 1, wdi, trid, station, 'prior'] = rmsd_prior # Fill the prior_seg for all windows dep_prior_seg = (sim_segprior - obsvect.yobs)[mask_obs] rmsd_prior_seg = np.sqrt(np.nanmean(dep_prior_seg ** 2)) self.data['rmsd'].loc[iseg + 1, wdi, trid, station, 'prior_seg'] = rmsd_prior_seg # Fill the posterior only for the window(s) run in the posterior simulation if is_post_window: dep_post = (sim_post - obsvect.yobs)[mask_obs] rmsd_post = np.sqrt(np.nanmean(dep_post ** 2)) self.data['rmsd'].loc[iseg + 1, wdi, trid, station, 'posterior'] = rmsd_post if trid != 'all' and station == 'all': info("--------------------------------------------------------") info(f"Prior RMSD for {trid}: {rmsd_prior}") info(f"Posterior RMSD for {trid}: {rmsd_post}") info(f"Ratio posterior/prior RMSD for {trid}: {rmsd_post / rmsd_prior}") info("--------------------------------------------------------") # Fill missing posteriors of previous segment with (false) prior of current segment if iseg > 0: self.data['rmsd'].loc[iseg, wdi, trid, station, 'posterior'] = rmsd_prior_seg # Root Mean Square Error (RMSE) # --------------------------------------- # based on formula presented by Sander Houweling in AVENGERS course on inversion # Fill the prior for all windows dep_prior = (sim_prior - obsvect.yobs)[mask_obs] / \ obsvect.yobs_err[mask_obs] rmse_prior = np.sqrt(np.nansum(dep_prior ** 2) / \ np.nansum(1 / obsvect.yobs_err[mask_obs] ** 2)) self.data['rmse'].loc[iseg + 1, wdi, trid, station, 'prior'] = rmse_prior # Fill the prior_seg for all windows dep_prior_seg = (sim_segprior - obsvect.yobs)[mask_obs] / \ obsvect.yobs_err[mask_obs] rmse_prior_seg = np.sqrt(np.nansum(dep_prior_seg ** 2) / \ np.nansum(1 / obsvect.yobs_err[mask_obs] ** 2)) self.data['rmse'].loc[iseg + 1, wdi, trid, station, 'prior_seg'] = rmse_prior_seg # Fill the posterior only for the window(s) run in the posterior simulation if is_post_window: dep_post = (sim_post - obsvect.yobs)[mask_obs] / \ obsvect.yobs_err[mask_obs] rmse_post = np.sqrt(np.nansum(dep_post ** 2) / \ np.nansum(1 / obsvect.yobs_err[mask_obs] ** 2)) self.data['rmse'].loc[iseg + 1, wdi, trid, station, 'posterior'] = rmse_post # Fill missing posteriors of previous segment with (false) prior of current segment if iseg > 0: self.data['rmse'].loc[iseg, wdi, trid, station, 'posterior'] = rmse_prior_seg return
[docs] def compute_dof_b_hresol(self, controlvect): """Compute the degrees of freedom of the inversion problem (B matrix) for the horizontal direction. It follows Patil et al. (2001) and Bretherton et al. (1999). """ dof_b_hresol = 0 hresol = 0 # Loop over components of the control vector components = controlvect.datavect.components for comp in components.attributes: component = getattr(components, comp) # Skip if component does not have parameters if not hasattr(component, "parameters"): continue for trcr in component.parameters.attributes: tracer = getattr(component.parameters, trcr) if not tracer.iscontrol: continue # Degrees of freedom for the horizontal dimension hdof = tracer.hresoldim if hasattr(tracer, "hcorrelations"): h_evalues = tracer.hcorrelations.sqrt_evalues ** 2 hdof = np.sum(h_evalues) ** 2 / np.sum(h_evalues ** 2) dof_b_hresol += hdof hresol += tracer.hresoldim return dof_b_hresol, hresol
[docs] def compute_dof_ensemble(self, iseg, wdis, x_samples_dev_prior, x_samples_dev, mask_cntrlv_idx): """Compute the degrees of freedom of the optimized ensemble following Patil et al. (2001) and Bretherton et al. (1999). """ xs_dev_prior = x_samples_dev_prior[:, mask_cntrlv_idx] nsample = xs_dev_prior.shape[0] pa_prior = np.dot(xs_dev_prior.T, xs_dev_prior) / (nsample - 1) evalues_pa_prior = scipy.sparse.linalg.eigsh(pa_prior, k=nsample, which='LM', return_eigenvectors=False) dof_ens_prior = np.sum(evalues_pa_prior) ** 2 / np.sum(evalues_pa_prior ** 2) for wdi in wdis: self.data['dof_ens'].loc[iseg + 1, wdi, 'prior'] = dof_ens_prior xs_dev_post = x_samples_dev[:, mask_cntrlv_idx] pa_post = np.dot(xs_dev_post.T, xs_dev_post) / (nsample - 1) evalues_pa_post = scipy.sparse.linalg.eigsh(pa_post, k=nsample, which='LM', return_eigenvectors=False) dof_ens_post = np.sum(evalues_pa_post) ** 2 / np.sum(evalues_pa_post ** 2) for wdi in wdis: self.data['dof_ens'].loc[iseg + 1, wdi, 'posterior'] = dof_ens_post return
[docs] def compute_analysis_metrics(self, obsvect, iseg, wdi, hxs_dev, mask_obsa): """Compute the influence matrix and the averaging kernel matrix following Cardinali et al. (2004) and Kim et al. (2014). """ ilag = int(iseg - self.data['first_cycle_idx'].loc[wdi]) nsample = hxs_dev.shape[1] obserr = obsvect.yobs_err # Calculate the influence matrix hd = hxs_dev[mask_obsa] hd = np.nan_to_num(hd) inf_matrix = hd.dot(hd.T) / (nsample - 1) inf_matrix /= obserr[mask_obsa] ** 2 self_sens = np.diag(inf_matrix) # Loop over all the trid and stations and extract mean self-sensitivity and dofs # for the cumulated influence matrix meta = obsvect.metadata.loc[mask_obsa] meta.index = range(len(meta)) for trid in self.data.trid_obs.values: for station in self.data.station.values: submask_obs = np.ones(meta.shape[0], dtype=bool) if trid != 'all': trcr = trid.split("/")[1] submask_obs = submask_obs & (meta["parameter"] == trcr) if station != 'all': submask_obs = submask_obs & (meta["station"] == station) if np.sum(submask_obs) != 0: mean_self_sens = np.nanmean(self_sens[submask_obs]) self.data['mean_self_sens'].loc[iseg + 1, wdi, trid, station] = mean_self_sens dofs = np.nansum(self_sens[submask_obs]) self.data['dofs'].loc[iseg + 1, wdi, trid, station] = dofs # For each window (W) in the segment, if it's the first time this window appears, it means observations are assimilated for the first time. # The influence matrix is then computed for this window alone. # However, this calculation will reflect a portion of the sensitivity matrix of the entire previous segment associated with W. # Consequently, the self-sensitivity and its associated DOFS will likely be overestimated compared to if observations in W # had been used solely to optimize W. A better estimate for the DOFS in this scenario could be the mean DOFS over all the windows of the previous segment. # In the next segment, if the window remains, we calculate the influence matrix over this window and the following ones in the segment. # The DOFS will likely be multiplied roughly by the number of windows considered. # The mean self-sensitivity is computed over all the windows considered, so it should remain relatively stable. # Another approach is to calculate the mean self-sensitivity over W, where it may decrease due to correlated observations # and the influence of new assimilated data on the optimized state of observations in W. # Alternatively, the mean self-sensitivity can be calculated by summing all self-sensitivities and dividing by the number of observations in W # (Kim et al., 2014?). In this case, the mean self-sensitivity should roughly be multiplied by the number of windows considered. # # Calculate the date range for the current window and the assimilated lags # iwdi = np.where(self.windows_datei == wdi)[0][0] # if iseg == 0 and iwdi < self.nlag - 1: # iwdf = self.nlag - 1 # else: # iwdf = iwdi + ilag if iwdi + ilag <= self.nwindows - 1 else -1 # wdf = self.windows_datef[iwdf] # # Influence matrix HK # # ------------------------------------------- # # Calculate observation mask # mask_obsa_w_l = (wdi <= obsvect.metadata["date"]) \ # & (obsvect.metadata["enddate"] < wdf) \ # & obsvect.obsvect_mask # # Calculate the influence matrix # hd = hxs_dev[mask_obsa_w_l] # hd = np.nan_to_num(hd) # inf_matrix = hd.dot(hd.T) # inf_matrix /= (nsample - 1) * obserr[mask_obsa_w_l] ** 2 # self_sens = np.diag(inf_matrix) # # Loop over all the trid and stations and extract mean self-sensitivity and dofs # # for the cumulated influence matrix # meta = obsvect.metadata.loc[mask_obsa_w_l] # for trid in self.data.trid_obs.values: # for station in self.data.station.values: # mask_obs = obsvect.obsvect_mask[mask_obsa_w_l] # if trid != 'all': # trcr = trid.split("/")[1] # mask_obs = mask_obs & (meta["parameter"] == trcr) # if station != 'all': # mask_obs = mask_obs & (meta["station"] == station) # if np.sum(mask_obs) != 0: # mean_self_sens = np.nanmean(self_sens[mask_obs]) # self.data['mean_self_sens'].loc[iseg + 1, wdi, trid, station] = mean_self_sens # dofs = np.nansum(self_sens[mask_obs]) # self.data['dofs'].loc[iseg + 1, wdi, trid, station] = dofs # Averaging kernel matrix KH # ------------------------------------------- # Seems impossible to get in with EnSRF ? # Using Pa = (In - A)B => A = In - PaB-1 does not give good results # It seems that we cannot invert B easily # wdi = self.windows_datei[1] # wdf = self.windows_datef[1] # mask_cntrlv_idx_w = get_cntrlv_idx_segment(controlvect, wdi, wdf) # xb = xs_segp_dev[:, mask_cntrlv_idx_w] # b = xb.T.dot(xb) / (nsample - 1) # xa = xs_dev[:, mask_cntrlv_idx_w] # pa = xa.T.dot(xa) / (nsample - 1) # b_true = controlvect.build_b(controlvect) # b_true_mask = b_true[mask_cntrlv_idx_w][:, mask_cntrlv_idx_w] # b_true_mask[range(len(b)), range(len(b))] = np.diag(b) # evalues, evectors = np.linalg.eigh(b_true_mask) # b_inv = evectors.dot(np.diag(evalues ** -1)).dot(evectors.T) # a = np.eye(len(b)) - pa.dot(b_inv) return
[docs] def compute_cost_function(self, obsvect, iseg, wdi, is_post_window, chi_prior_seg, chi_post, sim_prior, sim_segprior, sim_post, mask_obsa_w, mask_cntrlv_idx_w): """Compute individual cost function for each window. """ obserr = obsvect.yobs_err[mask_obsa_w] dep_prior = (sim_prior - obsvect.yobs)[mask_obsa_w] dep_prior_seg = (sim_segprior - obsvect.yobs)[mask_obsa_w] dep_post = (sim_post - obsvect.yobs)[mask_obsa_w] jo_prior = 0.5 * np.dot(dep_prior / obserr, dep_prior / obserr) jo_prior_seg = 0.5 * np.dot(dep_prior_seg / obserr, dep_prior_seg / obserr) jo_post = 0.5 * np.dot(dep_post / obserr, dep_post / obserr) jb_prior_seg = 0.5 * np.dot(chi_prior_seg[mask_cntrlv_idx_w], chi_prior_seg[mask_cntrlv_idx_w]) jb_post = 0.5 * np.dot(chi_post[mask_cntrlv_idx_w], chi_post[mask_cntrlv_idx_w]) self.data['jb'].loc[iseg + 1, wdi, 'prior'] = 0 self.data['jo'].loc[iseg + 1, wdi, 'prior'] = jo_prior self.data['jtot'].loc[iseg + 1, wdi, 'prior'] = jo_prior self.data['chi_square'].loc[iseg + 1, wdi, 'prior'] = \ 2 * self.data['jtot'].loc[iseg + 1, wdi, 'prior'] / \ self.data['nobs_assim'].loc[iseg + 1, wdi, 'all', 'all'] self.data['jb'].loc[iseg + 1, wdi, 'prior_seg'] = jb_prior_seg self.data['jo'].loc[iseg + 1, wdi, 'prior_seg'] = jo_prior_seg self.data['jtot'].loc[iseg + 1, wdi, 'prior_seg'] = jb_prior_seg + jo_prior_seg self.data['chi_square'].loc[iseg + 1, wdi, 'prior_seg'] = \ 2 * self.data['jtot'].loc[iseg + 1, wdi, 'prior_seg'] / \ self.data['nobs_assim'].loc[iseg + 1, wdi, 'all', 'all'] if is_post_window: self.data['jb'].loc[iseg + 1, wdi, 'posterior'] = jb_post self.data['jo'].loc[iseg + 1, wdi, 'posterior'] = jo_post self.data['jtot'].loc[iseg + 1, wdi, 'posterior'] = jb_post + jo_post self.data['chi_square'].loc[iseg + 1, wdi, 'posterior'] = \ 2 * self.data['jtot'].loc[iseg + 1, wdi, 'posterior'] / \ self.data['nobs_assim'].loc[iseg + 1, wdi, 'all', 'all'] # Fill missing posteriors of previous segment with (false) prior of current segment if iseg > 0: self.data['jb'].loc[iseg, wdi, 'posterior'] = jb_prior_seg self.data['jo'].loc[iseg, wdi, 'posterior'] = jo_prior_seg self.data['jtot'].loc[iseg, wdi, 'posterior'] = jb_prior_seg + jo_prior_seg self.data['chi_square'].loc[iseg, wdi, 'posterior'] = \ 2 * self.data['jtot'].loc[iseg, wdi, 'posterior'] / \ self.data['nobs_assim'].loc[iseg, wdi, 'all', 'all'] return
[docs] def final_update(self, obsvect, controlvect, sim_prior, sim_post, hx_samples): """Compute total cost function, RMSE and RMSD for the full period. """ # Total cost function for the full period # ------------------------------------------- mask_obsa = obsvect.obsvect_mask obserr = obsvect.yobs_err[mask_obsa] dep_prior = (sim_prior - obsvect.yobs)[mask_obsa] dep_post = (sim_post - obsvect.yobs)[mask_obsa] jo_prior = 0.5 * np.dot(dep_prior / obserr, dep_prior / obserr) jo_post = 0.5 * np.dot(dep_post / obserr, dep_post / obserr) chi = controlvect.sqrtbprod_ad(controlvect.xa - controlvect.xb, inverse=True) jb_post = 0.5 * np.dot(chi, chi) self.data['jo'].loc['full', 'full', 'prior'] = jo_prior self.data['jo'].loc['full', 'full', 'posterior'] = jo_post self.data['jb'].loc['full', 'full', 'prior'] = 0 self.data['jb'].loc['full', 'full', 'posterior'] = jb_post self.data['jtot'].loc['full', 'full', 'prior'] = jo_prior self.data['jtot'].loc['full', 'full', 'posterior'] = jb_post + jo_post self.data['chi_square'].loc['full', 'full', 'prior'] = \ 2 * self.data['jtot'].loc['full', 'full', 'prior'] / \ self.data['nobs_assim'].loc['full', 'full', 'all', 'all'] self.data['chi_square'].loc['full', 'full', 'posterior'] = \ 2 * self.data['jtot'].loc['full', 'full', 'posterior'] / \ self.data['nobs_assim'].loc['full', 'full', 'all', 'all'] ratio_costf = (jb_post + jo_post) / jo_prior info("--------------------------------------------------------") info(f"Prior cost function: {jo_prior}") info(f"Posterior cost function: {jb_post + jo_post}") info(f"Ratio posterior/prior cost function: {ratio_costf}") if ratio_costf > 1: warning("WARNING: EnSRF did not reduce the cost function. " "Please check your set-up.") # RMSD and RMSE for the full period # ------------------------------------------- for trid in self.data.trid_obs.values: for station in self.data.station.values: mask_obs = obsvect.obsvect_mask if trid != 'all': trcr = trid.split("/")[1] mask_obs = mask_obs & (obsvect.metadata["parameter"] == trcr) if station != 'all': mask_obs = mask_obs & (obsvect.metadata["station"] == station) if np.sum(mask_obs) == 0: continue # RMSD dep_prior = (sim_prior - obsvect.yobs)[mask_obs] dep_post = (sim_post - obsvect.yobs)[mask_obs] rmsd_prior = np.sqrt(np.nanmean(dep_prior ** 2)) rmsd_post = np.sqrt(np.nanmean(dep_post ** 2)) self.data['rmsd'].loc['full', 'full', trid, station, 'prior'] = rmsd_prior self.data['rmsd'].loc['full', 'full', trid, station, 'posterior'] = rmsd_post if station == 'all': ratio_rmsd = rmsd_post / rmsd_prior info("--------------------------------------------------------") info(f"Prior RMSD for {trid}: {rmsd_prior}") info(f"Posterior RMSD for {trid}: {rmsd_post}") info(f"Ratio posterior/prior RMSD for {trid}: {ratio_rmsd}") if ratio_rmsd > 1: warning(f"WARNING: EnSRF did not reduce the RMSD for {trid}. " "Please check your set-up.") # RMSE dep_prior = (sim_prior - obsvect.yobs)[mask_obs] / \ obsvect.yobs_err[mask_obs] dep_post = (sim_post - obsvect.yobs)[mask_obs] / \ obsvect.yobs_err[mask_obs] rmse_prior = np.sqrt(np.nansum(dep_prior ** 2) / \ np.nansum(1 / obsvect.yobs_err[mask_obs] ** 2)) rmse_post = np.sqrt(np.nansum(dep_post ** 2) / \ np.nansum(1 / obsvect.yobs_err[mask_obs] ** 2)) self.data['rmse'].loc['full', 'full', trid, station, 'prior'] = rmse_prior self.data['rmse'].loc['full', 'full', trid, station, 'posterior'] = rmse_post # DOFS for the full period # ------------------------------------------- hx_samples_dev = copy.deepcopy(hx_samples[:, 3:] - hx_samples[:, 2:3]) nsample = hx_samples_dev.shape[1] obserr = obsvect.yobs_err mask_obsa = obsvect.obsvect_mask hd = hx_samples_dev[mask_obsa] influence_matrix = hd.dot(hd.T) influence_matrix /= (nsample - 1) * obserr[mask_obsa] ** 2 self_sens = np.diag(influence_matrix) # Loop over all the trid and stations and extract mean self-sensitivity and dofs # for the cumulated influence matrix meta = obsvect.metadata.loc[mask_obsa] meta.index = range(len(meta)) for trid in self.data.trid_obs.values: for station in self.data.station.values: submask_obs = np.ones(meta.shape[0], dtype=bool) if trid != 'all': trcr = trid.split("/")[1] submask_obs = submask_obs & (meta["parameter"] == trcr) if station != 'all': submask_obs = submask_obs & (meta["station"] == station) if np.sum(submask_obs) != 0: mean_self_sens = np.nanmean(self_sens[submask_obs]) self.data['mean_self_sens'].loc['full', 'full', trid, station] = mean_self_sens dofs = np.sum(self_sens[submask_obs]) self.data['dofs'].loc['full', 'full', trid, station] = dofs if station == 'all': info("--------------------------------------------------------") info(f"Total DOFS {trid}: {dofs}") info(f"Mean self-sensitivity {trid}: {mean_self_sens}") info("--------------------------------------------------------") return
[docs] def dump(self, filename): if os.path.exists(filename): os.remove(filename) # Indexes to find first and last optimization cycle for each window ref_var = self.data['jb'][..., 0] self.data['first_cycle_idx'][:] = (~np.isnan(ref_var)).astype(int).argmax(axis=0).values self.data['last_cycle_idx'][:] = (~np.isnan(ref_var)).cumsum(axis=0).argmax(axis=0).values self.data['window'] = [str(w) for w in self.data.window.values] self.data['cycle'] = [str(c) for c in self.data.cycle.values] self.data.to_netcdf(filename)