import numpy as np
import xarray as xr
from logging import info, warning
import scipy
import os
import copy
from .segment_cropping import get_cntrlv_idx_segment
from ....utils.dates import date_range
[docs]
class Metrics_ensrf():
def __init__(self, controlvect, obsvect, window_length, windows_datei, nlag):
# Get basic data from configuration
self.windows_datei = windows_datei[:-1]
self.windows_datef = windows_datei[1:]
self.nwindows = len(self.windows_datei)
self.window_length = window_length
self.nlag = nlag
self.ncycles = len(windows_datei[:-nlag])
self.stations = np.unique(obsvect.metadata['station'])
self.nstations = len(self.stations)
# Get a list of (comp, trcr)
self.trids_cntrlv = []
self.trids_obs = []
components = controlvect.datavect.components
for comp in components.attributes:
component = getattr(components, comp)
if not hasattr(component, "parameters"):
continue
for trcr in component.parameters.attributes:
if comp in ["concs"]:
self.trids_obs.append(("/".join([comp, trcr])))
else:
tracer = getattr(component.parameters, trcr)
if not tracer.iscontrol:
continue
self.trids_cntrlv.append(("/".join([comp, trcr])))
self.ntrids_obs = len(self.trids_obs)
self.ntrids_cntrlv = len(self.trids_cntrlv)
# Compute, the spatial resolution and
# the dof of B for the horizontal domain
dof_b_hresol, hresol = self.compute_dof_b_hresol(controlvect)
# Create the metrics dataset
self.data = xr.Dataset(
data_vars=dict(
hresol=([], hresol),
dof_b_hresol=([], dof_b_hresol),
first_cycle_idx=(['window'],
np.empty((self.nwindows + 1))),
last_cycle_idx=(['window'],
np.empty((self.nwindows + 1))),
nobs_assim=(['cycle', 'window', 'trid_obs', 'station'],
np.empty((self.ncycles + 1, self.nwindows + 1,
self.ntrids_obs + 1, self.nstations + 1))),
nstations_assim=(['cycle', 'window', 'trid_obs',],
np.empty((self.ncycles + 1, self.nwindows + 1,
self.ntrids_obs + 1,))),
mean_obserr=(['cycle', 'window', 'trid_obs', 'station'],
np.empty((self.ncycles + 1, self.nwindows + 1,
self.ntrids_obs + 1, self.nstations + 1))),
rmsd=(['cycle', 'window', 'trid_obs', 'station', 'iteration'],
np.empty((self.ncycles + 1, self.nwindows + 1,
self.ntrids_obs + 1, self.nstations + 1, 3))),
rmse=(['cycle', 'window', 'trid_obs', 'station', 'iteration'],
np.empty((self.ncycles + 1, self.nwindows + 1,
self.ntrids_obs + 1, self.nstations + 1, 3))),
jo=(['cycle', 'window', 'iteration'],
np.empty((self.ncycles + 1, self.nwindows + 1, 3))),
jb=(['cycle', 'window', 'iteration'],
np.empty((self.ncycles + 1, self.nwindows + 1, 3))),
jtot=(['cycle', 'window', 'iteration'],
np.empty((self.ncycles + 1, self.nwindows + 1, 3))),
chi_square=(['cycle', 'window', 'iteration'],
np.empty((self.ncycles + 1, self.nwindows + 1, 3))),
mean_self_sens=(['cycle', 'window', 'trid_obs', 'station'], # accumulated
np.empty((self.ncycles + 1, self.nwindows + 1,
self.ntrids_obs + 1, self.nstations + 1))),
dofs=(['cycle', 'window', 'trid_obs', 'station'], # accumulated
np.empty((self.ncycles + 1, self.nwindows + 1,
self.ntrids_obs + 1, self.nstations + 1))),
dof_ens=(['cycle', 'window', 'iteration'],
np.empty((self.ncycles + 1, self.nwindows + 1, 3))),
),
coords=dict(
cycle=np.append(
(np.arange(self.ncycles) + 1).astype('object'),
'full'
),
window=np.append(self.windows_datei, 'full'),
iteration=['prior', 'prior_seg', 'posterior'],
station=np.append(self.stations, 'all'),
trid_obs=np.append(self.trids_obs, 'all'),
trid_cntrlv=np.append(self.trids_cntrlv, 'all'),
),
)
# Fill data with Nans
for var in self.data.data_vars:
if var not in ['hresol', 'dof_b_hresol']:
self.data[var][:] = np.nan
# Fill basic observation data for the entire period
for trid in self.data.trid_obs.values:
for station in self.data.station.values:
mask_obs = obsvect.obsvect_mask
if trid != 'all':
trcr = trid.split("/")[1]
mask_obs = mask_obs & (obsvect.metadata["parameter"] == trcr)
if station != 'all':
mask_obs = mask_obs & (obsvect.metadata["station"] == station)
self.data['nobs_assim'].loc['full', 'full', trid, station] = \
np.sum(mask_obs)
if station == 'all':
self.data['nstations_assim'].loc['full', 'full', trid] = \
len(np.unique(obsvect.metadata.loc[mask_obs, 'station']))
if np.sum(mask_obs) == 0:
self.data['mean_obserr'].loc['full', 'full', trid, station] = \
np.nan
else:
self.data['mean_obserr'].loc['full', 'full', trid, station] = \
np.sqrt(np.nanmean(obsvect.yobs_err[mask_obs] ** 2))
[docs]
def update(self, level, segments_datei, iseg, seg_ddi, seg_ddf,
controlvect, obsvect,
mask_obsa, mask_obs_assimilated,
x_samples, x_samples_segprior,
hx_samples, sim_segprior, sim_post):
x_samples_dev = copy.deepcopy(x_samples[3:] - x_samples[2])
x_samples_segprior_dev = copy.deepcopy(x_samples_segprior[3:] - x_samples_segprior[2])
hx_samples_dev = copy.deepcopy(hx_samples[:, 3:] - hx_samples[:, 2:3])
sim_prior = hx_samples[:, 1]
windows_seg_datei = date_range(seg_ddi, seg_ddf, self.window_length)
mask_cntrlv_idx_s = get_cntrlv_idx_segment(controlvect, seg_ddi, seg_ddf)
chi_prior_seg = controlvect.sqrtbprod_ad(controlvect.x - controlvect.xb, inverse=True)
chi_post = controlvect.sqrtbprod_ad(controlvect.xa - controlvect.xb, inverse=True)
wdis = windows_seg_datei[:-1]
wdfs = windows_seg_datei[1:]
if level > 1:
self.compute_dof_ensemble(
iseg,
wdis,
x_samples_segprior_dev,
x_samples_dev,
mask_cntrlv_idx_s
)
for iwdi, (wdi, wdf) in enumerate(zip(wdis, wdfs)):
if np.isnan(self.data['first_cycle_idx'].loc[wdi]):
self.data['first_cycle_idx'].loc[wdi] = iseg
# Calculate observation mask for the current window
mask_window = (wdi <= obsvect.metadata["date"]) \
& (obsvect.metadata["enddate"] < wdf)
mask_obsa_w = mask_window & mask_obs_assimilated
mask_cntrlv_idx_w = get_cntrlv_idx_segment(controlvect, wdi, wdf)
is_prior_window = False
is_post_window = False
if iseg == len(segments_datei) - 1 or iwdi == 0:
# means the window will not be optimized again
is_post_window = True
if iseg == 0 or iwdi == self.nlag - 1:
# means the window is optimized for the first time
is_prior_window = True
if is_prior_window:
self.compute_analysis_metrics(
obsvect,
iseg,
wdi,
hx_samples_dev,
mask_obsa
)
self.compute_obs_metrics(
obsvect,
iseg,
wdi,
is_prior_window,
is_post_window,
mask_obsa_w,
sim_prior,
sim_segprior,
sim_post
)
self.compute_cost_function(
obsvect,
iseg,
wdi,
is_post_window,
chi_prior_seg,
chi_post,
hx_samples[:, 1],
sim_segprior,
sim_post,
mask_obsa_w,
mask_cntrlv_idx_w
)
return
[docs]
def compute_obs_metrics(self, obsvect, iseg, wdi,
is_prior_window, is_post_window, mask_obsa_w,
sim_prior, sim_segprior, sim_post):
# Loop over all the trid and stations
for trid in self.data.trid_obs.values:
for station in self.data.station.values:
mask_obs = obsvect.obsvect_mask
if trid != 'all':
trcr = trid.split("/")[1]
mask_obs = mask_obs & (obsvect.metadata["parameter"] == trcr)
if station != 'all':
mask_obs = mask_obs & (obsvect.metadata["station"] == station)
mask_obs = mask_obsa_w & mask_obs
if is_prior_window:
# Number of assimilated stations
# ---------------------------------------
if station == 'all':
self.data['nstations_assim'].loc[iseg + 1, wdi, trid] = \
len(np.unique(obsvect.metadata.loc[mask_obs, 'station']))
# Number of assimilated observations
# ---------------------------------------
if np.sum(mask_obs) == 0:
self.data['nobs_assim'].loc[iseg + 1, wdi, trid, station] = 0
else:
self.data['nobs_assim'].loc[iseg + 1, wdi, trid, station] = \
len(obsvect.yobs[mask_obs])
# Mean observation error
# ---------------------------------------
if np.sum(mask_obs) == 0:
self.data['mean_obserr'].loc[iseg + 1, wdi, trid, station] = np.nan
else:
self.data['mean_obserr'].loc[iseg + 1, wdi, trid, station] = \
np.sqrt(np.nanmean(obsvect.yobs_err[mask_obs] ** 2))
if np.sum(mask_obs) == 0:
continue
# Root Mean Square Deviation (RMSD)
# ---------------------------------------
# Fill the prior for all windows
dep_prior = (sim_prior - obsvect.yobs)[mask_obs]
rmsd_prior = np.sqrt(np.nanmean(dep_prior ** 2))
self.data['rmsd'].loc[iseg + 1, wdi, trid, station, 'prior'] = rmsd_prior
# Fill the prior_seg for all windows
dep_prior_seg = (sim_segprior - obsvect.yobs)[mask_obs]
rmsd_prior_seg = np.sqrt(np.nanmean(dep_prior_seg ** 2))
self.data['rmsd'].loc[iseg + 1, wdi, trid, station, 'prior_seg'] = rmsd_prior_seg
# Fill the posterior only for the window(s) run in the posterior simulation
if is_post_window:
dep_post = (sim_post - obsvect.yobs)[mask_obs]
rmsd_post = np.sqrt(np.nanmean(dep_post ** 2))
self.data['rmsd'].loc[iseg + 1, wdi, trid, station, 'posterior'] = rmsd_post
if trid != 'all' and station == 'all':
info("--------------------------------------------------------")
info(f"Prior RMSD for {trid}: {rmsd_prior}")
info(f"Posterior RMSD for {trid}: {rmsd_post}")
info(f"Ratio posterior/prior RMSD for {trid}: {rmsd_post / rmsd_prior}")
info("--------------------------------------------------------")
# Fill missing posteriors of previous segment with (false) prior of current segment
if iseg > 0:
self.data['rmsd'].loc[iseg, wdi, trid, station, 'posterior'] = rmsd_prior_seg
# Root Mean Square Error (RMSE)
# ---------------------------------------
# based on formula presented by Sander Houweling in AVENGERS course on inversion
# Fill the prior for all windows
dep_prior = (sim_prior - obsvect.yobs)[mask_obs] / \
obsvect.yobs_err[mask_obs]
rmse_prior = np.sqrt(np.nansum(dep_prior ** 2) / \
np.nansum(1 / obsvect.yobs_err[mask_obs] ** 2))
self.data['rmse'].loc[iseg + 1, wdi, trid, station, 'prior'] = rmse_prior
# Fill the prior_seg for all windows
dep_prior_seg = (sim_segprior - obsvect.yobs)[mask_obs] / \
obsvect.yobs_err[mask_obs]
rmse_prior_seg = np.sqrt(np.nansum(dep_prior_seg ** 2) / \
np.nansum(1 / obsvect.yobs_err[mask_obs] ** 2))
self.data['rmse'].loc[iseg + 1, wdi, trid, station, 'prior_seg'] = rmse_prior_seg
# Fill the posterior only for the window(s) run in the posterior simulation
if is_post_window:
dep_post = (sim_post - obsvect.yobs)[mask_obs] / \
obsvect.yobs_err[mask_obs]
rmse_post = np.sqrt(np.nansum(dep_post ** 2) / \
np.nansum(1 / obsvect.yobs_err[mask_obs] ** 2))
self.data['rmse'].loc[iseg + 1, wdi, trid, station, 'posterior'] = rmse_post
# Fill missing posteriors of previous segment with (false) prior of current segment
if iseg > 0:
self.data['rmse'].loc[iseg, wdi, trid, station, 'posterior'] = rmse_prior_seg
return
[docs]
def compute_dof_b_hresol(self, controlvect):
"""Compute the degrees of freedom of the inversion problem
(B matrix) for the horizontal direction.
It follows Patil et al. (2001) and Bretherton et al. (1999).
"""
dof_b_hresol = 0
hresol = 0
# Loop over components of the control vector
components = controlvect.datavect.components
for comp in components.attributes:
component = getattr(components, comp)
# Skip if component does not have parameters
if not hasattr(component, "parameters"):
continue
for trcr in component.parameters.attributes:
tracer = getattr(component.parameters, trcr)
if not tracer.iscontrol:
continue
# Degrees of freedom for the horizontal dimension
hdof = tracer.hresoldim
if hasattr(tracer, "hcorrelations"):
h_evalues = tracer.hcorrelations.sqrt_evalues ** 2
hdof = np.sum(h_evalues) ** 2 / np.sum(h_evalues ** 2)
dof_b_hresol += hdof
hresol += tracer.hresoldim
return dof_b_hresol, hresol
[docs]
def compute_dof_ensemble(self, iseg, wdis, x_samples_dev_prior, x_samples_dev, mask_cntrlv_idx):
"""Compute the degrees of freedom of the optimized ensemble
following Patil et al. (2001) and Bretherton et al. (1999).
"""
xs_dev_prior = x_samples_dev_prior[:, mask_cntrlv_idx]
nsample = xs_dev_prior.shape[0]
pa_prior = np.dot(xs_dev_prior.T, xs_dev_prior) / (nsample - 1)
evalues_pa_prior = scipy.sparse.linalg.eigsh(pa_prior, k=nsample, which='LM', return_eigenvectors=False)
dof_ens_prior = np.sum(evalues_pa_prior) ** 2 / np.sum(evalues_pa_prior ** 2)
for wdi in wdis:
self.data['dof_ens'].loc[iseg + 1, wdi, 'prior'] = dof_ens_prior
xs_dev_post = x_samples_dev[:, mask_cntrlv_idx]
pa_post = np.dot(xs_dev_post.T, xs_dev_post) / (nsample - 1)
evalues_pa_post = scipy.sparse.linalg.eigsh(pa_post, k=nsample, which='LM', return_eigenvectors=False)
dof_ens_post = np.sum(evalues_pa_post) ** 2 / np.sum(evalues_pa_post ** 2)
for wdi in wdis:
self.data['dof_ens'].loc[iseg + 1, wdi, 'posterior'] = dof_ens_post
return
[docs]
def compute_analysis_metrics(self, obsvect, iseg, wdi, hxs_dev, mask_obsa):
"""Compute the influence matrix and the averaging kernel matrix
following Cardinali et al. (2004) and Kim et al. (2014).
"""
ilag = int(iseg - self.data['first_cycle_idx'].loc[wdi])
nsample = hxs_dev.shape[1]
obserr = obsvect.yobs_err
# Calculate the influence matrix
hd = hxs_dev[mask_obsa]
hd = np.nan_to_num(hd)
inf_matrix = hd.dot(hd.T) / (nsample - 1)
inf_matrix /= obserr[mask_obsa] ** 2
self_sens = np.diag(inf_matrix)
# Loop over all the trid and stations and extract mean self-sensitivity and dofs
# for the cumulated influence matrix
meta = obsvect.metadata.loc[mask_obsa]
meta.index = range(len(meta))
for trid in self.data.trid_obs.values:
for station in self.data.station.values:
submask_obs = np.ones(meta.shape[0], dtype=bool)
if trid != 'all':
trcr = trid.split("/")[1]
submask_obs = submask_obs & (meta["parameter"] == trcr)
if station != 'all':
submask_obs = submask_obs & (meta["station"] == station)
if np.sum(submask_obs) != 0:
mean_self_sens = np.nanmean(self_sens[submask_obs])
self.data['mean_self_sens'].loc[iseg + 1, wdi, trid, station] = mean_self_sens
dofs = np.nansum(self_sens[submask_obs])
self.data['dofs'].loc[iseg + 1, wdi, trid, station] = dofs
# For each window (W) in the segment, if it's the first time this window appears, it means observations are assimilated for the first time.
# The influence matrix is then computed for this window alone.
# However, this calculation will reflect a portion of the sensitivity matrix of the entire previous segment associated with W.
# Consequently, the self-sensitivity and its associated DOFS will likely be overestimated compared to if observations in W
# had been used solely to optimize W. A better estimate for the DOFS in this scenario could be the mean DOFS over all the windows of the previous segment.
# In the next segment, if the window remains, we calculate the influence matrix over this window and the following ones in the segment.
# The DOFS will likely be multiplied roughly by the number of windows considered.
# The mean self-sensitivity is computed over all the windows considered, so it should remain relatively stable.
# Another approach is to calculate the mean self-sensitivity over W, where it may decrease due to correlated observations
# and the influence of new assimilated data on the optimized state of observations in W.
# Alternatively, the mean self-sensitivity can be calculated by summing all self-sensitivities and dividing by the number of observations in W
# (Kim et al., 2014?). In this case, the mean self-sensitivity should roughly be multiplied by the number of windows considered.
# # Calculate the date range for the current window and the assimilated lags
# iwdi = np.where(self.windows_datei == wdi)[0][0]
# if iseg == 0 and iwdi < self.nlag - 1:
# iwdf = self.nlag - 1
# else:
# iwdf = iwdi + ilag if iwdi + ilag <= self.nwindows - 1 else -1
# wdf = self.windows_datef[iwdf]
# # Influence matrix HK
# # -------------------------------------------
# # Calculate observation mask
# mask_obsa_w_l = (wdi <= obsvect.metadata["date"]) \
# & (obsvect.metadata["enddate"] < wdf) \
# & obsvect.obsvect_mask
# # Calculate the influence matrix
# hd = hxs_dev[mask_obsa_w_l]
# hd = np.nan_to_num(hd)
# inf_matrix = hd.dot(hd.T)
# inf_matrix /= (nsample - 1) * obserr[mask_obsa_w_l] ** 2
# self_sens = np.diag(inf_matrix)
# # Loop over all the trid and stations and extract mean self-sensitivity and dofs
# # for the cumulated influence matrix
# meta = obsvect.metadata.loc[mask_obsa_w_l]
# for trid in self.data.trid_obs.values:
# for station in self.data.station.values:
# mask_obs = obsvect.obsvect_mask[mask_obsa_w_l]
# if trid != 'all':
# trcr = trid.split("/")[1]
# mask_obs = mask_obs & (meta["parameter"] == trcr)
# if station != 'all':
# mask_obs = mask_obs & (meta["station"] == station)
# if np.sum(mask_obs) != 0:
# mean_self_sens = np.nanmean(self_sens[mask_obs])
# self.data['mean_self_sens'].loc[iseg + 1, wdi, trid, station] = mean_self_sens
# dofs = np.nansum(self_sens[mask_obs])
# self.data['dofs'].loc[iseg + 1, wdi, trid, station] = dofs
# Averaging kernel matrix KH
# -------------------------------------------
# Seems impossible to get in with EnSRF ?
# Using Pa = (In - A)B => A = In - PaB-1 does not give good results
# It seems that we cannot invert B easily
# wdi = self.windows_datei[1]
# wdf = self.windows_datef[1]
# mask_cntrlv_idx_w = get_cntrlv_idx_segment(controlvect, wdi, wdf)
# xb = xs_segp_dev[:, mask_cntrlv_idx_w]
# b = xb.T.dot(xb) / (nsample - 1)
# xa = xs_dev[:, mask_cntrlv_idx_w]
# pa = xa.T.dot(xa) / (nsample - 1)
# b_true = controlvect.build_b(controlvect)
# b_true_mask = b_true[mask_cntrlv_idx_w][:, mask_cntrlv_idx_w]
# b_true_mask[range(len(b)), range(len(b))] = np.diag(b)
# evalues, evectors = np.linalg.eigh(b_true_mask)
# b_inv = evectors.dot(np.diag(evalues ** -1)).dot(evectors.T)
# a = np.eye(len(b)) - pa.dot(b_inv)
return
[docs]
def compute_cost_function(self, obsvect, iseg, wdi, is_post_window,
chi_prior_seg, chi_post,
sim_prior, sim_segprior, sim_post,
mask_obsa_w, mask_cntrlv_idx_w):
"""Compute individual cost function for each window.
"""
obserr = obsvect.yobs_err[mask_obsa_w]
dep_prior = (sim_prior - obsvect.yobs)[mask_obsa_w]
dep_prior_seg = (sim_segprior - obsvect.yobs)[mask_obsa_w]
dep_post = (sim_post - obsvect.yobs)[mask_obsa_w]
jo_prior = 0.5 * np.dot(dep_prior / obserr, dep_prior / obserr)
jo_prior_seg = 0.5 * np.dot(dep_prior_seg / obserr, dep_prior_seg / obserr)
jo_post = 0.5 * np.dot(dep_post / obserr, dep_post / obserr)
jb_prior_seg = 0.5 * np.dot(chi_prior_seg[mask_cntrlv_idx_w], chi_prior_seg[mask_cntrlv_idx_w])
jb_post = 0.5 * np.dot(chi_post[mask_cntrlv_idx_w], chi_post[mask_cntrlv_idx_w])
self.data['jb'].loc[iseg + 1, wdi, 'prior'] = 0
self.data['jo'].loc[iseg + 1, wdi, 'prior'] = jo_prior
self.data['jtot'].loc[iseg + 1, wdi, 'prior'] = jo_prior
self.data['chi_square'].loc[iseg + 1, wdi, 'prior'] = \
2 * self.data['jtot'].loc[iseg + 1, wdi, 'prior'] / \
self.data['nobs_assim'].loc[iseg + 1, wdi, 'all', 'all']
self.data['jb'].loc[iseg + 1, wdi, 'prior_seg'] = jb_prior_seg
self.data['jo'].loc[iseg + 1, wdi, 'prior_seg'] = jo_prior_seg
self.data['jtot'].loc[iseg + 1, wdi, 'prior_seg'] = jb_prior_seg + jo_prior_seg
self.data['chi_square'].loc[iseg + 1, wdi, 'prior_seg'] = \
2 * self.data['jtot'].loc[iseg + 1, wdi, 'prior_seg'] / \
self.data['nobs_assim'].loc[iseg + 1, wdi, 'all', 'all']
if is_post_window:
self.data['jb'].loc[iseg + 1, wdi, 'posterior'] = jb_post
self.data['jo'].loc[iseg + 1, wdi, 'posterior'] = jo_post
self.data['jtot'].loc[iseg + 1, wdi, 'posterior'] = jb_post + jo_post
self.data['chi_square'].loc[iseg + 1, wdi, 'posterior'] = \
2 * self.data['jtot'].loc[iseg + 1, wdi, 'posterior'] / \
self.data['nobs_assim'].loc[iseg + 1, wdi, 'all', 'all']
# Fill missing posteriors of previous segment with (false) prior of current segment
if iseg > 0:
self.data['jb'].loc[iseg, wdi, 'posterior'] = jb_prior_seg
self.data['jo'].loc[iseg, wdi, 'posterior'] = jo_prior_seg
self.data['jtot'].loc[iseg, wdi, 'posterior'] = jb_prior_seg + jo_prior_seg
self.data['chi_square'].loc[iseg, wdi, 'posterior'] = \
2 * self.data['jtot'].loc[iseg, wdi, 'posterior'] / \
self.data['nobs_assim'].loc[iseg, wdi, 'all', 'all']
return
[docs]
def final_update(self, obsvect, controlvect, sim_prior, sim_post, hx_samples):
"""Compute total cost function, RMSE and RMSD for the full period.
"""
# Total cost function for the full period
# -------------------------------------------
mask_obsa = obsvect.obsvect_mask
obserr = obsvect.yobs_err[mask_obsa]
dep_prior = (sim_prior - obsvect.yobs)[mask_obsa]
dep_post = (sim_post - obsvect.yobs)[mask_obsa]
jo_prior = 0.5 * np.dot(dep_prior / obserr, dep_prior / obserr)
jo_post = 0.5 * np.dot(dep_post / obserr, dep_post / obserr)
chi = controlvect.sqrtbprod_ad(controlvect.xa - controlvect.xb, inverse=True)
jb_post = 0.5 * np.dot(chi, chi)
self.data['jo'].loc['full', 'full', 'prior'] = jo_prior
self.data['jo'].loc['full', 'full', 'posterior'] = jo_post
self.data['jb'].loc['full', 'full', 'prior'] = 0
self.data['jb'].loc['full', 'full', 'posterior'] = jb_post
self.data['jtot'].loc['full', 'full', 'prior'] = jo_prior
self.data['jtot'].loc['full', 'full', 'posterior'] = jb_post + jo_post
self.data['chi_square'].loc['full', 'full', 'prior'] = \
2 * self.data['jtot'].loc['full', 'full', 'prior'] / \
self.data['nobs_assim'].loc['full', 'full', 'all', 'all']
self.data['chi_square'].loc['full', 'full', 'posterior'] = \
2 * self.data['jtot'].loc['full', 'full', 'posterior'] / \
self.data['nobs_assim'].loc['full', 'full', 'all', 'all']
ratio_costf = (jb_post + jo_post) / jo_prior
info("--------------------------------------------------------")
info(f"Prior cost function: {jo_prior}")
info(f"Posterior cost function: {jb_post + jo_post}")
info(f"Ratio posterior/prior cost function: {ratio_costf}")
if ratio_costf > 1:
warning("WARNING: EnSRF did not reduce the cost function. "
"Please check your set-up.")
# RMSD and RMSE for the full period
# -------------------------------------------
for trid in self.data.trid_obs.values:
for station in self.data.station.values:
mask_obs = obsvect.obsvect_mask
if trid != 'all':
trcr = trid.split("/")[1]
mask_obs = mask_obs & (obsvect.metadata["parameter"] == trcr)
if station != 'all':
mask_obs = mask_obs & (obsvect.metadata["station"] == station)
if np.sum(mask_obs) == 0:
continue
# RMSD
dep_prior = (sim_prior - obsvect.yobs)[mask_obs]
dep_post = (sim_post - obsvect.yobs)[mask_obs]
rmsd_prior = np.sqrt(np.nanmean(dep_prior ** 2))
rmsd_post = np.sqrt(np.nanmean(dep_post ** 2))
self.data['rmsd'].loc['full', 'full', trid, station, 'prior'] = rmsd_prior
self.data['rmsd'].loc['full', 'full', trid, station, 'posterior'] = rmsd_post
if station == 'all':
ratio_rmsd = rmsd_post / rmsd_prior
info("--------------------------------------------------------")
info(f"Prior RMSD for {trid}: {rmsd_prior}")
info(f"Posterior RMSD for {trid}: {rmsd_post}")
info(f"Ratio posterior/prior RMSD for {trid}: {ratio_rmsd}")
if ratio_rmsd > 1:
warning(f"WARNING: EnSRF did not reduce the RMSD for {trid}. "
"Please check your set-up.")
# RMSE
dep_prior = (sim_prior - obsvect.yobs)[mask_obs] / \
obsvect.yobs_err[mask_obs]
dep_post = (sim_post - obsvect.yobs)[mask_obs] / \
obsvect.yobs_err[mask_obs]
rmse_prior = np.sqrt(np.nansum(dep_prior ** 2) / \
np.nansum(1 / obsvect.yobs_err[mask_obs] ** 2))
rmse_post = np.sqrt(np.nansum(dep_post ** 2) / \
np.nansum(1 / obsvect.yobs_err[mask_obs] ** 2))
self.data['rmse'].loc['full', 'full', trid, station, 'prior'] = rmse_prior
self.data['rmse'].loc['full', 'full', trid, station, 'posterior'] = rmse_post
# DOFS for the full period
# -------------------------------------------
hx_samples_dev = copy.deepcopy(hx_samples[:, 3:] - hx_samples[:, 2:3])
nsample = hx_samples_dev.shape[1]
obserr = obsvect.yobs_err
mask_obsa = obsvect.obsvect_mask
hd = hx_samples_dev[mask_obsa]
influence_matrix = hd.dot(hd.T)
influence_matrix /= (nsample - 1) * obserr[mask_obsa] ** 2
self_sens = np.diag(influence_matrix)
# Loop over all the trid and stations and extract mean self-sensitivity and dofs
# for the cumulated influence matrix
meta = obsvect.metadata.loc[mask_obsa]
meta.index = range(len(meta))
for trid in self.data.trid_obs.values:
for station in self.data.station.values:
submask_obs = np.ones(meta.shape[0], dtype=bool)
if trid != 'all':
trcr = trid.split("/")[1]
submask_obs = submask_obs & (meta["parameter"] == trcr)
if station != 'all':
submask_obs = submask_obs & (meta["station"] == station)
if np.sum(submask_obs) != 0:
mean_self_sens = np.nanmean(self_sens[submask_obs])
self.data['mean_self_sens'].loc['full', 'full', trid, station] = mean_self_sens
dofs = np.sum(self_sens[submask_obs])
self.data['dofs'].loc['full', 'full', trid, station] = dofs
if station == 'all':
info("--------------------------------------------------------")
info(f"Total DOFS {trid}: {dofs}")
info(f"Mean self-sensitivity {trid}: {mean_self_sens}")
info("--------------------------------------------------------")
return
[docs]
def dump(self, filename):
if os.path.exists(filename):
os.remove(filename)
# Indexes to find first and last optimization cycle for each window
ref_var = self.data['jb'][..., 0]
self.data['first_cycle_idx'][:] = (~np.isnan(ref_var)).astype(int).argmax(axis=0).values
self.data['last_cycle_idx'][:] = (~np.isnan(ref_var)).cumsum(axis=0).argmax(axis=0).values
self.data['window'] = [str(w) for w in self.data.window.values]
self.data['cycle'] = [str(c) for c in self.data.cycle.values]
self.data.to_netcdf(filename)