Inversions with surface stations#

1. Prepare the data to assimilate#

Constraints are put into a CIF monitor file for a set of core stations:

#####################################################
# This code is provided by: I. Pison
# contact: isabelle.pison@lsce.ipsl.fr
# date: June 2022
# project: H2020 VERIFY
# relation to CIF: uses plugins, not embedded in.
# It is an example, to be used as a basis
# for developing other work.
# No guarantee as to its compatibility or results
# is attached since it is not part of the CIF
# Please don't forget acknowledgements if you use it.
#####################################################

import pandas as pd
import numpy as np
from pycif.utils.datastores.dump import dump_datastore, read_datastore
from pycif.utils.datastores.empty import init_empty
import glob
import datetime
import sys

dir_data = '/home/chimereicos/VERIFYdataWP4/Obs2021/core/'
spec = 'ch4'
max_unc = 2000.

# Initializes an empty datastore
ds = pd.DataFrame()

# put errors from Szenasi et al., 2021: 
# A pragmatic protocol for characterising errors 
# in atmospheric inversions of methane emissions over Europe, 
# Tellus B: Chemical and Physical Meteorology, 
# 10.1080/16000889.2021.1914989 
TNO_obs_error_file = 'Obs_error_file.txt'
f = open(TNO_obs_error_file, 'r')
TNO_obs_error = f.readlines()[1:]
f.close()
TNO_error_station = []
TNO_station_category = []
TNO_LBC_error = np.empty([len(TNO_obs_error)])
TNO_T_error = np.empty([len(TNO_obs_error)])
TNO_repr_error = np.empty([len(TNO_obs_error)])
for ii in range(len(TNO_obs_error)):
    TNO_obs_error_list = TNO_obs_error[ii].split()
    TNO_error_station.append(TNO_obs_error_list[0])
    TNO_station_category.append(TNO_obs_error_list[1])
    if TNO_obs_error_list[2] != 'NaN' : 
        TNO_LBC_error[ii] = int(TNO_obs_error_list[2])
        TNO_T_error[ii] = int(TNO_obs_error_list[3])
        TNO_repr_error[ii] = int(TNO_obs_error_list[4])
    else:
        TNO_LBC_error[ii] = np.nan
        TNO_T_error[ii] = np.nan
        TNO_repr_error[ii] = np.nan
# squares of errors
montain_seor_mean = np.empty([len(TNO_obs_error)])
costal_seor_mean = np.empty([len(TNO_obs_error)])
other_seor_mean = np.empty([len(TNO_obs_error)])
for jj in range(len(TNO_obs_error)):
    if TNO_station_category[jj] == 'M':
        montain_seor_mean[jj] = np.sqrt((TNO_LBC_error[jj])**2 +\
                                    (TNO_T_error[jj])**2 +\
                                    (TNO_repr_error[jj])**2)
    else:
        montain_seor_mean[jj] = np.nan
    if TNO_station_category[jj] == 'C':
        costal_seor_mean[jj] = np.sqrt((TNO_LBC_error[jj])**2 +\
                                   (TNO_T_error[jj])**2 +\
                                   (TNO_repr_error[jj])**2)
    else:
        costal_seor_mean[jj] = np.nan
    if TNO_station_category[jj] == 'O':
        other_seor_mean[jj] = np.sqrt((TNO_LBC_error[jj])**2 +\
                                  (TNO_T_error[jj])**2 +\
                                  (TNO_repr_error[jj])**2)
    else:
        other_seor_mean[jj] = np.nan
# Mountain station or not, according to S. Houweling information
dt = pd.read_csv('/home/users/ipison/CHIMERE/VERIFY/intercomp_EU/CH4_observation_sitelist.csv', sep=',')

# Read VERIFY provided files
list_files = glob.glob("{}/{}_*.csv".format(dir_data,spec))
nb_header_lines = 9


for fi in list_files:

   fc=open(fi,'r')
   alllines = fc.readlines()
   data = alllines[nb_header_lines - 1]
   columns =  data.split(' ')
   fc.close()
   station = fi.split('_')[nbstat].upper()
   # time zone for selecting night/day hours
   for l in alllines[:nb_header_lines]:
      info = l.split(' ')[0]
      if info == 'timezone':
         utc = l.split('+')
         if len(utc) > 1:
            shift = int(utc[1])
         else:
            shift = 0
   df = pd.read_csv(fi, delimiter=',', header= nb_header_lines-1, names = columns) 
   # Information to put into the monitor file:
   # date
   df.loc[df['minute'] > 60, 'minute'] = 0
   df['Time']=df['year'].astype(str)+'/'+\
         df['month'].astype(str)+'/'+\
         df['day'].astype(str)+'/'+df['hour'].astype(str)+'/'+df['minute'].astype(str)
   df['date'] = pd.to_datetime(df['Time'],format='%Y/%m/%d/%H/%M')
   # duration (hours)
   df = df.assign(duration = 1.) 
   # station
   df.rename(columns={'siteid': 'station', 'longitude': 'lon', 'latitude': 'lat'}, inplace=True)
   # filter on time and type of station
   # 12:00 to 16:00 local time for not-mountain, 0:00 to 6:00 local time for mountain
   is_mountain = dt['Mountain site'].loc[dt['ID'] == station].values[0]
   df['local time'] =  df['hour'] + shift
   if is_mountain == 'No':
     df = df.loc[ (df['local time'] < 16) & (df['local time'] >= 12)]
   else:
     df = df.loc[ (df['local time'] < 6) & (df['local time'] >= 0)]
   # network
   df = df.assign(network = 'VERIFYcore')
   # parameter
   df = df.assign(parameter = spec.upper())
   # obs
   df.rename(columns={'value':'obs'}, inplace = True)
   # filter out negative concentrations!
   df = df.loc[df['obs'] >= 0.]
   # obserror
   # filter out negative uncertainties
   df = df.loc[df['value_unc'] >=0. ]
   if station in TNO_error_station:
       print(station + ' is in TNO_error_station')
       if (len(df) == (df['value_unc'] >= 0.).sum()) and \
          (len(df) == (df['value_unc'] - df['obs'] <0.).sum()) and \
          (len(df) == (df['value_unc'] < max_unc).sum()):
              print('OK error')
              if ~np.isnan(TNO_LBC_error[TNO_error_station.index(station)]):
                  df['obserror'] = np.sqrt( \
                        (TNO_LBC_error[TNO_error_station.index(station)])**2 + \
                        (TNO_T_error[TNO_error_station.index(station)])**2 + \
                        (TNO_repr_error[TNO_error_station.index(station)])**2 +
                        df['value_unc']**2 \
                        )
              elif TNO_station_category[TNO_error_station.index(station)] == 'C':
                  df['obserror'] = np.sqrt( \
                        (np.nanmean(costal_seor_mean))**2 +
                        df['value_unc']**2 \
                        )
              elif TNO_station_category[TNO_error_station.index(station)] == 'M':
                  df['obserror'] = np.sqrt( \
                        (np.nanmean(montain_seor_mean))**2 +
                        df['value_unc']**2 \
                        )          
              elif TNO_station_category[TNO_error_station.index(station)] == 'O':
                  df['obserror'] = np.sqrt( \
                        (np.nanmean(other_seor_mean))**2 +
                        df['value_unc']**2 \
                        )
       else:
         print('not OK for error')
         sys.exit()
         
                  
   else:
       print(station + ' is NOT in TNO_error_station')
       sys.exit()
   # alt (m a.s.l)
   df.rename(columns={'altitude': 'alt'}, inplace = True)

   # extend the output datastore
   column2save = ['date', 'duration', 'station', 'network', 'parameter', 'lon', 'lat', 'obs', 'obserror', 'alt']
   ds = ds.append(df.loc[:, column2save])

# Dump the datastore
dump_datastore(ds, "{}/monitor_VERIFYcore_{}.nc".format(dir_data, spec), dump_default=False, col2dump=ds.columns)

# Checking the consistency of the datastore when reading
ds2 = read_datastore("monitor.nc")


1. Automatically make one job per year#

One job per year is run, based on the following configuration file yml.sed:

#####################
# PYCIF config file #
#####################

#####################################################################
#####################################################################
# PYCIF parameters
pycif_root: &pycif_root /home/users/ipison/cif/
workdir: &workdir /home/chimereicos/VERIFYoutputs/surface-inv3-_YYYY_
outdir: &outdir !join [*workdir, /outdir]

# Verbose level
verbose: 1

# Log file (to be saved in $wordkir)
#logfile: tltestlog
logfile: pyCIF_inv.logtest

# Initial date
datei: _YYYY_-01-01 00:00:00
datef: _YYNN_-01-01 00:00:00

#####################################################################
#####################################################################
# Running mode for PYCIF
mode:
  minimizer:
    maxiter: 20
    epsg: 0.1
    df1: 0.01
    plugin:
      name: M1QN3
      version: std
    simulator:
      plugin:
        name: gausscost
        version: std
      reload_from_previous: true
  plugin:
    name: 4dvar
    version: std
  save_out_netcdf: true

#####################################################################
#####################################################################
# Observation operator
obsoperator:
  plugin:
    name: standard
    version: std
    type: obsoperator
  autorestart: True
#####################################################################
#####################################################################
controlvect:
  plugin:
    name: standard
    version: std
    type: controlvect
#  save_out_netcdf: True
  
#####################################################################
#####################################################################
# Transport model
model :
  plugin:
    name: CHIMERE
    version : std
    type: model

  direxec: !join [*pycif_root, /model_sources/chimereGES/]
  nphour_ref: 6
  ichemstep: 2
  ideepconv: 0
  nivout: 17
  nlevemis: 1

  nzdoms: 6
  nmdoms: 1
 
  force_clean_run: True
  useRAMonly: True 
  ignore_input_dates: True

####################################################################
#####################################################################
obsvect:
  plugin:
    name: standard
    version: std
    type: obsvect
  dir_obsvect: !join [*outdir, /ref_obsvect]
  dump_type: nc

#####################################################################
#####################################################################
platform:
  plugin:
    name: LSCE
    version: obelix

#####################################################################
#####################################################################
# Domain definition
domain :
  plugin:
   name    : CHIMERE
   version : std
  repgrid : /home/chimereges/PYCIF_TEST_DATA/CHIMERE/domains
  domid : EUROCOMex3
  nlev: 17
  p1: 997
  pmax: 200

#####################################################################
#####################################################################
# Chemical scheme
chemistry:
  plugin:
    name: CHIMERE
    version: gasJtab
  schemeid: ges.espigrad.humid
  dir_precomp : /home/users/ipison/CHIMERE/VERIFY/WP4_surface/

#####################################################################
#####################################################################
datavect:
  plugin:
    name: standard
    version: std
  components:
    meteo:
      plugin:
        name: CHIMERE
        version: std      
        type: meteo
      dir: /home/satellites15/afortems/data_EUROCOM/%Y/
      file: METEO.%Y%m%d%H.24.nc
      file_freq: 1D

    concs:
      parameters:
        CH4: 
          dir: /home/chimereicos/VERIFYdataWP4/Obs2021/core/
          file: monitor_VERIFYcore_ch4.nc
          vertical_interpolation:
            file_statlev: /home/users/ipison/CHIMERE/VERIFY/WP4_surface/stations_levels_CHIMERE17lev.txt
 
    flux:
      parameters:
        CH4:
          plugin:
            name: CHIMERE
            version: AEMISSIONS
            type: flux
          dir: /home/chimereicos/VERIFYdataWP4/Fluxes_intercomp_EU/
          file: AEMISSIONS.%Y%m0100.24.nc
          file_freq: 1D
          hresol : hpixels
          vresol : vpixels
          tresol: 24H
          nlev : 1
          err : 1.0 
          hcorrelations:
             landsea: True 
             sigma_land: 200 
             sigma_sea: 1000
             filelsm: /home/users/ipison/CHIMERE/VERIFY/intercomp_EU/lsmEUROCOMex3.nc
             dump_hcorr: True
          tcorrelations:
             sigma_t: 2500H
             dump_tcorr: True
    inicond:
      parameters:
        CH4: 
          plugin:
            name: CAMS
            version: netcdf
            type: field        
          dir: /home/comdata2/Fields/CAMSv19r1/
          comp_type: inicond
          file_freq: 1MS
          file:  cams73_v19r1_ch4_conc_surface_inst_%Y01.nc
          hresol: hpixels
          vresol: vpixels
          nlev: 34
          err: 0.1
          hcorrelations:
             sigma: 1000 
    latcond:
      parameters:
        CH4: 
          plugin:
            name: CAMS
            version: netcdf
            type: field        
          dir: /home/comdata2/Fields/CAMSv19r1/
          file:  cams73_v19r1_ch4_conc_surface_inst_%Y%m.nc
          comp_type: latcond
          file_freq: 1MS
          hresol: hpixels
          vresol: vpixels
          tresol : 2D
          nlev: 34 
          err: 0.1
          hcorrelations: 
             sigma: 1000 
          tcorrelations: 
             sigma_t: 120H

    topcond:
      parameters:
        CH4: 
          plugin:
            name: CAMS
            version: netcdf
            type: field     
          dir: /home/comdata2/Fields/CAMSv19r1/
          file: cams73_v19r1_ch4_conc_surface_inst_%Y%m.nc
          comp_type: topcond
          file_freq: 1MS
          hresol: global
          tresol : 2D
          err: 0.1
          tcorrelations:
            sigma_t: 120H
        

To fill-in a .sed file, you may use cat to replace the variables, as illustrated in the script for a job in 2015:

#!/bin/sh -login

echo "Machine on which the job is running:"
hostname

# User' choices:
# Give year to run
year=2015
# Give directory for the .yml.sed file
dirsed=$HOME/VERIFY/

# next year for end of simulation
ynext=$[ year + 1 ]

# Path of the yml.sed file to use
fyml=${dirsed}/config_chimere_inv_1Y

# Fill-in the yaml.sed into a .yml
cat << EOD > ${fyml}${year}.yml.sed-commands
s,_YYYY_,${year},
s,_YYNN_,${ynext},
s,_INI_,INI_CONCS.${year}0101_${year}0101.00_EUROCOM.nc,
EOD
sed -f ${fyml}${year}.yml.sed-commands ${fyml}.yaml.sed > ${fyml}${year}.yml

# Run the CIF
time python -m pycif ${fyml}${year}.yml

3. Post-processing#

Post-processing of the results include:

a. Maps of fluxes and increments#

Show/Hide Code

#####################################################
# This code is provided by: I. Pison
# contact: isabelle.pison@lsce.ipsl.fr
# date: June 2022
# project: H2020 VERIFY
# relation to CIF: uses plugins, not embedded in.
# It is an example, to be used as a basis
# for developing other work.
# No guarantee as to its compatibility or results
# is attached since it is not part of the CIF
# Please don't forget acknowledgements if you use it.
#####################################################


import numpy as np
import pandas as pd
from pycif.utils.netcdf import readnc
from pycif.utils.datastores import dump
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from matplotlib import ticker
import datetime
import xarray as xr
import cartopy.crs as crs
from mpl_toolkits.axes_grid1.inset_locator import InsetPosition
import cartopy
from cartopy.feature import ShapelyFeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

dir_results = '/home/chimereicos/VERIFYoutputs/surface-inv2-' 
dir_out = '/controlvect/flux/'

year_beg = 2005
year_end = 2017
Na = 6.022e23
fs = 15
list_pr = []
list_po =[]
list_diff= []

for yy in range(year_beg, year_end + 1, 1):
  print(yy)
  fcv = '{}{}{}/controlvect_flux_CH4.nc'.format(dir_results, yy, dir_out)
  flpr = readnc(fcv,['xb_phys'])
  flpo = readnc(fcv,['x_phys'])
  list_pr.append(flpr.mean(axis=0)[0,:,:]/ Na * 16.e6) # molec -> microgCH4
  list_po.append(flpo.mean(axis=0)[0,:,:]/ Na * 16.e6) # molec -> microgCH4
  diff = (flpo - flpr)/flpr * 100.
  list_diff.append(diff.mean(axis=0)[0,:,:])

clon = readnc(fcv,['longitudes_corner'])
clat = readnc(fcv,['latitudes_corner'])

############
# To plot: #
############
rivers = cartopy.feature.NaturalEarthFeature(
    category='physical', name='rivers_lake_centerlines',
    scale='10m', facecolor='none', edgecolor='blue')
borders = cartopy.feature.NaturalEarthFeature(
    category='cultural', name='admin_0_boundary_lines_land',
    scale='10m', facecolor='none', edgecolor='black')

#############
# plot maps #
#############
# color scale with Ncolor colors from cmap base colormap for fluxes
Ncolor = 15
cmap = plt.cm.jet
cmaplist = cmap(np.linspace(0, 1, Ncolor))
cmap_plot = cmap.from_list('custom', cmaplist, Ncolor)

# same for increments
Ncolor2 = 11
cmap2 = plt.cm.bwr
cmaplist2 = cmap2(np.linspace(0, 1, Ncolor2))
cmap2 = cmap2.from_list('custom', cmaplist2, Ncolor2)

lognorm = colors.LogNorm()

##plot a given year
yy = 2010
nby = yy - year_beg
fig = plt.figure(figsize=(21, 10))
gs = fig.add_gridspec(nrows = 2, ncols = 4, width_ratios = [2,2,2,0.1], 
                 left=0., right=0.97, bottom=0, top=1,
                  height_ratios = [1, 1 ] ,hspace = 0.01, wspace = 0.25)
subprior = fig.add_subplot(gs[0,1], projection=crs.PlateCarree())
subprior.pcolor(clon, clat, list_pr[nby],
                         norm=lognorm,
                         cmap = cmap,
                         transform=crs.PlateCarree())
subprior.coastlines(resolution='50m')
subprior.add_feature(borders,linewidth=0.5,linestyle='--')
subprior.set_title("Prior", fontsize = fs)

subpost = fig.add_subplot(gs[0,2], projection=crs.PlateCarree())
im = subpost.pcolor(clon, clat, list_po[nby],
                         norm=lognorm,
                         cmap = cmap,
                         transform=crs.PlateCarree())
subpost.coastlines(resolution='50m')
subpost.add_feature(borders,linewidth=0.5,linestyle='--')
subpost.set_title("Posterior", fontsize = f)

echelle = fig.add_subplot(gs[0,3])
ip = InsetPosition(echelle, [0,0,1,1]) 
echelle.set_axes_locator(ip)
p00 = echelle.get_position()
p01 = echelle.get_position()
p02 = subpost.get_position()
p00_new = [p00.x0, p02.y0, p00.width, p02.height]
echelle.set_position(p00_new)
formatter = ticker.LogFormatter(10, labelOnlyBase=False) 
cbar = plt.colorbar(im, format=formatter, cax=echelle)
cbar.ax.set_title('molec.cm$^{-2}$.s$^{-1}$', fontsize = fs) 

subd = fig.add_subplot(gs[1,2], projection=crs.PlateCarree())
im2 = subd.pcolor(clon, clat, list_diff[nby],
                           vmin=-100, vmax=100,
                           cmap = cmap2,
                           transform=crs.PlateCarree())
subd.coastlines(resolution='50m')
subd.add_feature(borders,linewidth=0.5,linestyle='--')
subd.set_title("Increments: posterior - prior (%)", fontsize = fs)
echelle2 = fig.add_subplot(gs[1,3])
ip = InsetPosition(echelle2, [0,0,1,1]) 
echelle2.set_axes_locator(ip)
p00 = echelle2.get_position()
p01 = echelle2.get_position()
p02 = subd.get_position()
p00_new = [p00.x0, p02.y0, p00.width, p02.height]
echelle2.set_position(p00_new)
cbar2 = plt.colorbar(im2, cax=echelle2, ax = subd)
cbar2.ax.set_title('%', fontsize = fs) 

plt.savefig('flux{}.png'.format(yy))



b. Time series of mixing ratios and maps of statistical indicators at the stations#

Time series of mixing ratios at the stations make it easy to compare the observation, the prior and the analysis (obtained with the posterior emissions).

The maps of statistical indicators at the stations show the ratios of the posterior to the prior values of the bias, RMS, correlation for example.

Show/Hide Code

#####################################################
# This code is provided by: I. Pison
# contact: isabelle.pison@lsce.ipsl.fr
# date: June 2022
# project: H2020 VERIFY
# relation to CIF: uses plugins, not embedded in.
# It is an example, to be used as a basis
# for developing other work.
# No guarantee as to its compatibility or results
# is attached since it is not part of the CIF
# Please don't forget acknowledgements if you use it.
#####################################################

import numpy as np
import pandas as pd
from pycif.utils.netcdf import readnc
from pycif.utils.datastores import dump
import matplotlib.pyplot as plt
import datetime
import cartopy.crs
from cartopy.feature import ShapelyFeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from mpl_toolkits.axes_grid1.inset_locator import InsetPosition

dir_results = '/home/chimereicos/VERIFYoutputs/surface-inv2-' 
dir_out = '/obsoperator/fwd_'
dir_monit = '/obsvect/concs/CH4/'

year_beg = 2005
year_end = 2017

fs = 30 # fontsize

# read monitors of all years
ds_all_years = pd.DataFrame()
for yy in range(year_beg, year_end + 1, 1):
  print(yy)
  fmonitpr = '{}{}{}-001{}/monitor.nc'.format(dir_results, yy, dir_out, dir_monit)
  fmonitpo = '{}{}{}-002{}/monitor.nc'.format(dir_results, yy, dir_out, dir_monit) 
  dspr = dump.read_datastore(fmonitpr)
  dspo = dump.read_datastore(fmonitpo)
  dsprpo = pd.concat( [dspr, dspo['maindata']['sim']], axis = 1)
  ds_all_years = pd.concat( [dsprpo, ds_all_years], axis = 0)

list_stations = list(ds_all_years[('metadata', 'station')].unique())
list_lat = []
list_lon = []
lat_etiq = []
lon_etiq = []
for stat in list_stations:
  lat = ds_all_years[('metadata', 'lat')].loc[ds_all_years[('metadata', 'station')] == stat].iloc[0]
  lon = ds_all_years[('metadata', 'lon')].loc[ds_all_years[('metadata', 'station')] == stat].iloc[0]
  list_lat.append(lat)
  list_lon.append(lon)
  if stat =='ZSF':
    lat_etiq.append(lat - 0.3)
    lon_etiq.append(lon + 0.2) 
  elif stat =='KIT':
    lat_etiq.append(lat - 0.5)
    lon_etiq.append(lon + 0.2) 
  else:
    lat_etiq.append(lat + 0.2)
    lon_etiq.append(lon + 0.2)
    
dstat = pd.DataFrame(columns = ['station','Prior bias', 'Posterior bias', 'Prior rms', 'Posterior rms', 'Prior corr', 'Posterior corr'])
dstat['station'] = list_stations
for stat in list_stations:
  print(stat)
  ds = ds_all_years.loc[ ds_all_years[('metadata', 'station')] == stat ]
  ds['var'] = ds[('maindata','obserror')].pow(2)
  # compute bias, standard deviation, time correlation
  ds['prior difference'] = ds[('maindata', 'obs')] - ds[('maindata', 'sim')]
  ds['post difference'] = ds[('maindata', 'obs')] - ds['sim']
  dstat['Prior bias'].loc[dstat['station'] == stat ] = ds['prior difference'].mean()
  dstat['Posterior bias'].loc[dstat['station'] == stat ] = ds['post difference'].mean()
  dstat['Prior rms'].loc[dstat['station'] == stat ] =  np.sqrt(ds['prior difference'].pow(2).mean())
  dstat['Posterior rms'].loc[dstat['station'] == stat ] =  np.sqrt(ds['post difference'].pow(2).mean())
  dstat['Prior corr'].loc[dstat['station'] == stat ] =  ds[('maindata', 'obs')].corr(ds[('maindata', 'sim')])
  dstat['Posterior corr'].loc[dstat['station'] == stat] = ds[('maindata', 'obs')].corr(ds['sim'])
  # monthly means
  ds2 = ds.resample('MS', on = ('metadata', 'date'), closed = 'left')
  dsm = ds2.mean()
  counts = ds2.size()
  dsm['meanobserror'] =  dsm['var'].pow(0.5)/counts.pow(0.5)
# plot times series at each station
  plt.figure(figsize=(21,11))
  ax = plt.gca()
  ax.tick_params(axis = 'both', which = 'major', labelsize = fs/2)
  ax.set_xticks([datetime.datetime(i,1,1) for i in range(year_beg, year_end + 1)])
  plt.title('Methane mixing ratios at {}'.format(stat), fontsize = fs)
  plt.xlabel('Date', size = fs)
  plt.ylabel('[CH4] (ppb)', size = fs)
  plt.errorbar(dsm.index, dsm[('maindata','obs')], \
               dsm['meanobserror'], label = 'measurements', \
               linestyle = '-', marker = 'o', ms = 1)
  plt.plot(dsm.index, dsm[('maindata','sim')], \
               label = 'prior', \
               linestyle = '-', marker = 'o', ms = 1)
  plt.plot(dsm.index, dsm['sim'], \
               label = 'analysis', \
               linestyle = '-', marker = 'o', ms = 1)
  plt.xlim(datetime.datetime(2005,1,1,0),datetime.datetime(2018,1,1,0))  
  plt.legend(fontsize = fs/2)
  plt.savefig('ts_{}.png'.format(stat))
  plt.close()  

# maps for statistics
Ncolor = 10
cmap = plt.cm.bwr
cmaplist = cmap(np.linspace(0, 1, Ncolor))
cmap_plot = cmap.from_list('custom', cmaplist, Ncolor)
cmin, cmax = 0, 2
rivers = cartopy.feature.NaturalEarthFeature(
    category='physical', name='rivers_lake_centerlines',
    scale='10m', facecolor='none', edgecolor='blue')
borders = cartopy.feature.NaturalEarthFeature(
    category='cultural', name='admin_0_boundary_lines_land',
    scale='10m', facecolor='none', edgecolor='black')
latmin=33.
latmax=73.
longmin=-15.
longmax=35.
fig = plt.figure(figsize=(21, 10))
gs = fig.add_gridspec(nrows = 2, ncols = 3, height_ratios = [1, 0.02 ] )
s0 = fig.add_subplot(gs[0, 0], projection=cartopy.crs.PlateCarree())
s0.scatter(list_lon, list_lat, c=np.abs(dstat['Posterior bias']/dstat['Prior bias']), 
                    s=20, cmap = cmap, marker = 's', vmin = 0, vmax = 2, edgecolors = 'k',
                    linewidths = 0.4)
s0.set_title('Ratio of posterior to prior mean difference\n between simulation and measurements')
s0.set_extent([longmin, longmax, latmin, latmax])
s0.coastlines(linewidth=0.5, resolution='50m')
s0.add_feature(borders, linewidth=0.5, linestyle='--')

s1 = fig.add_subplot(gs[0, 1], projection=cartopy.crs.PlateCarree())
im = s1.scatter(list_lon, list_lat, c=dstat['Posterior rms']/dstat['Prior rms'], 
                    s=20, cmap = cmap, marker = 's',vmin = 0, vmax = 2, edgecolors = 'k',
                    linewidths = 0.4)
s1.set_title('Ratio of posterior to prior quadratic mean\n of the differences\n between simulation and measurements')
s1.set_extent([longmin, longmax, latmin, latmax])
s1.coastlines(linewidth=0.5, resolution='50m')
s1.add_feature(borders, linewidth=0.5, linestyle='--')

s2 = fig.add_subplot(gs[0, 2], projection=cartopy.crs.PlateCarree())
s2.scatter(list_lon, list_lat, c=np.abs(dstat['Posterior corr'])/np.abs(dstat['Prior corr']), 
                    s=20, cmap = cmap, marker = 's', vmin = 0, vmax = 2, edgecolors = 'k',
                    linewidths = 0.4)
s2.set_title('Ratio of posterior to prior absolute values\n  of time correlation\n between simulation and measurements')
s2.set_extent([longmin, longmax, latmin, latmax])
s2.coastlines(linewidth=0.5, resolution='50m')
s2.add_feature(borders, linewidth=0.5, linestyle='--')

cb = fig.add_subplot(gs[1,1])
cbar = plt.colorbar(im, cax = cb, ax = s0 , orientation = 'horizontal')
cbar.ax.tick_params(labelsize=18)
plt.savefig('map_statistics.png')


c. Yearly emission budgets for various regions#

Examples here of regions are EU27+UK, EU27 countries, groups of countries:

Show/Hide Code

#####################################################
# This code is provided by: I. Pison
# contact: isabelle.pison@lsce.ipsl.fr
# date: June 2022
# project: H2020 VERIFY
# relation to CIF: uses plugins, not embedded in.
# It is an example, to be used as a basis
# for developing other work.
# No guarantee as to its compatibility or results
# is attached since it is not part of the CIF
# Please don't forget acknowledgements if you use it.
#####################################################

import numpy as np
import pandas as pd
from pycif.utils.netcdf import readnc
from pycif.utils.datastores import dump
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import datetime
import xarray as xr
import cartopy.crs as crs
import netCDF4 as nc
from mpl_toolkits.axes_grid1.inset_locator import InsetPosition
import calendar

# read masks for various regions
fmasks = '/home/chimereicos/VERIFYoutputs/masks_for_EU27+UK.nc'
fm = nc.Dataset(fmasks)
nom_reg = 'EU_27+UK'
reg = np.array(fm[nom_reg])
mask = (reg > 0 )

dir_results = '/home/chimereicos/VERIFYoutputs/surface-inv2-' 
dir_out = '/controlvect/flux/'

year_beg = 2005
year_end = 2017
dy = 0.5
dx = 0.5
rearth=637103000. 
Na = 6.022e23
fcv = '{}{}{}/controlvect_flux_CH4.nc'.format(dir_results, year_beg, dir_out)
zlon = readnc(fcv,['longitudes'])[0,:]
zlat = readnc(fcv,['latitudes'])[:,0]
nlat = len(zlat)
nlon = len(zlon)
area = np.zeros((nlat,nlon))
for i in range(nlat):
  for j in range(nlon):
     lat1 = zlat[i]
     lat2 =  zlat[i] + dy
     # formula with lat between -90:90
     area[i,j] = rearth * rearth * dx * np.pi/180. * \
                 (np.cos(lat1 * np.pi/180. + np.pi/2.) - \
                  np.cos(lat2 * np.pi/180. + np.pi/2.))
                  
yrflregpr = []
yrflregpo = []
summ = np.zeros((2,12))
# read controlvect
for yy in range(year_beg, year_end + 1, 1):
  print(yy)
  fcv = '{}{}{}/controlvect_flux_CH4.nc'.format(dir_results, yy, dir_out)
  ds = xr.open_dataset(fcv)
  flpr = readnc(fcv,['xb_phys'])
  flpo = readnc(fcv,['x_phys'])
  flpran = np.sum(np.sum(flpr,axis=1),axis=0) * area * 3600. / Na * 16.e-3
  flpoan = np.sum(np.sum(flpo,axis=1),axis=0) * area * 3600. / Na * 16.e-3
  flreg = np.sum(flpran * mask) * 1e-3 * 1e-3 # kgCH4.yr -> t/an -> kt/an
  yrflregpr.append(flreg)
  flreg = np.sum(flpoan * mask) * 1e-3 * 1e-3 # kgCH4.yr -> t/an -> kt/an
  yrflregpo.append(flreg)
  beg = 0
  for m in range(12):
    mm = m + 1
    nbd = calendar.mdays[mm] + ( mm ==2 and calendar.isleap(yy))
    flprmm = np.sum(np.sum(flpr,axis=1)[beg:beg + nbd * 24],axis=0) * area * 3600. / Na * 16.e-3
    flpomm = np.sum(np.sum(flpo,axis=1)[beg:beg + nbd * 24],axis=0) * area * 3600. / Na * 16.e-3
    beg = beg + nbd * 24
    summ[0, m] = summ[0, m] + np.sum(flprmm * mask) * 1e-3 * 1e-3
    summ[1, m] = summ[1, m] + np.sum(flpomm * mask) * 1e-3 * 1e-3

####################
# plot time series #
####################
fs = 25
plt.figure(figsize=(21,11))
plt.title('Methane emissions in '+nom_reg, fontsize=fs)
plt.xlabel('Year', fontsize=fs)
plt.ylabel('ktCH$_4$', fontsize=fs)
list_ticks = [ i  for i in range(year_end - year_beg + 1)]
list_names = [ year_beg + i for i in range(year_end - year_beg + 1)]
plt.xticks(list_ticks, list_names, fontsize=15)
plt.tick_params(axis='y', labelsize=30)
plt.yticks(fontsize=fs)
plt.bar(list_ticks, yrflregpr, label = 'prior', width=-0.4, align='edge')
plt.bar(list_ticks, yrflregpo, label = 'posterior', width=0.4, align='edge')
plt.legend(fontsize=fs)
plt.savefig('ts_fluxes_{}_yearly.png'.format(nom_reg))
plt.close()

plt.figure(figsize=(21,11))
plt.title('Methane emissions: average seasonal cycle over '+str(year_beg)+'-'+str(year_end)+' in '+nom_reg, fontsize=fs)
plt.xlabel('Month', fontsize=fs)
plt.ylabel('ktCH$_4$', fontsize=fs)
ax = plt.gca()
ax.tick_params(axis = 'both', which = 'major', labelsize = fs/2)
plt.xticks([i+1 for i in range(12)] , [str(i+1) for i in range(12) ])
nbyears = year_end - year_beg + 1
plt.plot([i+1 for i in range(12)], summ[0,:]/nbyears, label = 'prior', \
               linestyle = '--', marker = 'o', ms = 8)
plt.plot([i+1 for i in range(12)], summ[1,:]/nbyears, label = 'posterior', \
              linestyle = '-', marker = 'o', ms = 8)
plt.legend(fontsize=fs-5)
plt.savefig('ts_seasonal_{}_mass.png'.format(nom_reg))

d. Time series of emissions for various regions#

Examples here of regions are EU27+UK, EU27 countries, groups of countries:

Show/Hide Code

#####################################################
# This code is provided by: I. Pison
# contact: isabelle.pison@lsce.ipsl.fr
# date: June 2022
# project: H2020 VERIFY
# relation to CIF: uses plugins, not embedded in.
# It is an example, to be used as a basis
# for developing other work.
# No guarantee as to its compatibility or results
# is attached since it is not part of the CIF
# Please don't forget acknowledgements if you use it.
#####################################################

import numpy as np
import pandas as pd
from pycif.utils.netcdf import readnc
from pycif.utils.datastores import dump
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import datetime
import xarray as xr
import cartopy.crs as crs
import netCDF4 as nc
from mpl_toolkits.axes_grid1.inset_locator import InsetPosition

# read masks for various regions
fmasks = '/home/chimereicos/VERIFYoutputs/masks_for_EU27.nc'
fm = nc.Dataset(fmasks)
reg_name = 'EU_27'
reg = np.array(fm[reg_name])

dir_results = '/home/chimereicos/VERIFYoutputs/surface-inv-' 
dir_out = '/controlvect/flux/'

year_beg = 2005
year_end = 2017

# read controlvect
list_dates = []
tsprEU = []
tspoEU = []
for yy in range(year_beg, year_end + 1, 1):
  print(yy)
  fcv = '{}{}{}/controlvect_flux_CH4.nc'.format(dir_results, yy, dir_out)
  ds = xr.open_dataset(fcv)
  dates = ds['time_phys'].to_dataframe()
  list_dates.append(dates['time_phys'].loc[(dates.index.day == 1) & (dates.index.hour == 1)].tolist())
  flpr = readnc(fcv,['xb_phys'])
  flpo = readnc(fcv,['x_phys'])
  # over the selected region
  flprEU = flpr[:-1,:,:,:] * reg
  tsprEU.append(np.mean(flprEU))
  flpoEU = flpo[:-1,:,:,:] * reg
  tspoEU.append(np.mean(flpoEU))
   
####################
# plot time series #
####################
plt.figure(figsize=(21,11))
plt.title('Methane emissions: average in {}'.format(reg_name), fontsize=15)
plt.xlabel('Date', fontsize=15)
plt.ylabel('molec.cm$^{-2}$.s$^{-1}$', fontsize=15)
list_ticks = []
for ld in list_dates:
    list_ticks.append(ld[0])
list_names = []
for d in list_ticks:
 list_names.append(d.strftime('%Y'))
plt.xticks(list_ticks, list_names, fontsize=15)
plt.tick_params(axis='y', labelsize=30)
plt.yticks(fontsize=15)
plt.bar(list_ticks, tsprEU, label = 'prior', width=-100, align='edge')
plt.bar(list_tciks, tspoEU, label = 'posterior', width=100, align='edge')
plt.legend(fontsize=15)
plt.savefig('ts_fluxes_{}.png'.format(reg_name))
plt.close()