Inversions with surface stations#

1. Prepare the data to assimilate#

Constraints are put into a CIF monitor file for a set of core stations:

#####################################################
# This code is provided by: I. Pison
# contact: isabelle.pison@lsce.ipsl.fr
# date: June 2022
# project: H2020 VERIFY
# relation to CIF: uses plugins, not embedded in.
# It is an example, to be used as a basis
# for developing other work.
# No guarantee as to its compatibility or results
# is attached since it is not part of the CIF
# Please don't forget acknowledgements if you use it.
#####################################################

import pandas as pd
import numpy as np
from pycif.utils.datastores.dump import dump_datastore, read_datastore
from pycif.utils.datastores.empty import init_empty
import glob
import datetime
import sys

dir_data = '/home/chimereicos/VERIFYdataWP4/Obs2021/core/'
spec = 'ch4'
max_unc = 2000.

# Initializes an empty datastore
ds = pd.DataFrame()

# put errors from Szenasi et al., 2021: 
# A pragmatic protocol for characterising errors 
# in atmospheric inversions of methane emissions over Europe, 
# Tellus B: Chemical and Physical Meteorology, 
# 10.1080/16000889.2021.1914989 
TNO_obs_error_file = 'Obs_error_file.txt'
f = open(TNO_obs_error_file, 'r')
TNO_obs_error = f.readlines()[1:]
f.close()
TNO_error_station = []
TNO_station_category = []
TNO_LBC_error = np.empty([len(TNO_obs_error)])
TNO_T_error = np.empty([len(TNO_obs_error)])
TNO_repr_error = np.empty([len(TNO_obs_error)])
for ii in range(len(TNO_obs_error)):
    TNO_obs_error_list = TNO_obs_error[ii].split()
    TNO_error_station.append(TNO_obs_error_list[0])
    TNO_station_category.append(TNO_obs_error_list[1])
    if TNO_obs_error_list[2] != 'NaN' : 
        TNO_LBC_error[ii] = int(TNO_obs_error_list[2])
        TNO_T_error[ii] = int(TNO_obs_error_list[3])
        TNO_repr_error[ii] = int(TNO_obs_error_list[4])
    else:
        TNO_LBC_error[ii] = np.nan
        TNO_T_error[ii] = np.nan
        TNO_repr_error[ii] = np.nan
# squares of errors
montain_seor_mean = np.empty([len(TNO_obs_error)])
costal_seor_mean = np.empty([len(TNO_obs_error)])
other_seor_mean = np.empty([len(TNO_obs_error)])
for jj in range(len(TNO_obs_error)):
    if TNO_station_category[jj] == 'M':
        montain_seor_mean[jj] = np.sqrt((TNO_LBC_error[jj])**2 +\
                                    (TNO_T_error[jj])**2 +\
                                    (TNO_repr_error[jj])**2)
    else:
        montain_seor_mean[jj] = np.nan
    if TNO_station_category[jj] == 'C':
        costal_seor_mean[jj] = np.sqrt((TNO_LBC_error[jj])**2 +\
                                   (TNO_T_error[jj])**2 +\
                                   (TNO_repr_error[jj])**2)
    else:
        costal_seor_mean[jj] = np.nan
    if TNO_station_category[jj] == 'O':
        other_seor_mean[jj] = np.sqrt((TNO_LBC_error[jj])**2 +\
                                  (TNO_T_error[jj])**2 +\
                                  (TNO_repr_error[jj])**2)
    else:
        other_seor_mean[jj] = np.nan
# Mountain station or not, according to S. Houweling information
dt = pd.read_csv('/home/users/ipison/CHIMERE/VERIFY/intercomp_EU/CH4_observation_sitelist.csv', sep=',')

# Read VERIFY provided files
list_files = glob.glob("{}/{}_*.csv".format(dir_data,spec))
nb_header_lines = 9


for fi in list_files:

   fc=open(fi,'r')
   alllines = fc.readlines()
   data = alllines[nb_header_lines - 1]
   columns =  data.split(' ')
   fc.close()
   station = fi.split('_')[nbstat].upper()
   # time zone for selecting night/day hours
   for l in alllines[:nb_header_lines]:
      info = l.split(' ')[0]
      if info == 'timezone':
         utc = l.split('+')
         if len(utc) > 1:
            shift = int(utc[1])
         else:
            shift = 0
   df = pd.read_csv(fi, delimiter=',', header= nb_header_lines-1, names = columns) 
   # Information to put into the monitor file:
   # date
   df.loc[df['minute'] > 60, 'minute'] = 0
   df['Time']=df['year'].astype(str)+'/'+\
         df['month'].astype(str)+'/'+\
         df['day'].astype(str)+'/'+df['hour'].astype(str)+'/'+df['minute'].astype(str)
   df['date'] = pd.to_datetime(df['Time'],format='%Y/%m/%d/%H/%M')
   # duration (hours)
   df = df.assign(duration = 1.) 
   # station
   df.rename(columns={'siteid': 'station', 'longitude': 'lon', 'latitude': 'lat'}, inplace=True)
   # filter on time and type of station
   # 12:00 to 16:00 local time for not-mountain, 0:00 to 6:00 local time for mountain
   is_mountain = dt['Mountain site'].loc[dt['ID'] == station].values[0]
   df['local time'] =  df['hour'] + shift
   if is_mountain == 'No':
     df = df.loc[ (df['local time'] < 16) & (df['local time'] >= 12)]
   else:
     df = df.loc[ (df['local time'] < 6) & (df['local time'] >= 0)]
   # network
   df = df.assign(network = 'VERIFYcore')
   # parameter
   df = df.assign(parameter = spec.upper())
   # obs
   df.rename(columns={'value':'obs'}, inplace = True)
   # filter out negative concentrations!
   df = df.loc[df['obs'] >= 0.]
   # obserror
   # filter out negative uncertainties
   df = df.loc[df['value_unc'] >=0. ]
   if station in TNO_error_station:
       print(station + ' is in TNO_error_station')
       if (len(df) == (df['value_unc'] >= 0.).sum()) and \
          (len(df) == (df['value_unc'] - df['obs'] <0.).sum()) and \
          (len(df) == (df['value_unc'] < max_unc).sum()):
              print('OK error')
              if ~np.isnan(TNO_LBC_error[TNO_error_station.index(station)]):
                  df['obserror'] = np.sqrt( \
                        (TNO_LBC_error[TNO_error_station.index(station)])**2 + \
                        (TNO_T_error[TNO_error_station.index(station)])**2 + \
                        (TNO_repr_error[TNO_error_station.index(station)])**2 +
                        df['value_unc']**2 \
                        )
              elif TNO_station_category[TNO_error_station.index(station)] == 'C':
                  df['obserror'] = np.sqrt( \
                        (np.nanmean(costal_seor_mean))**2 +
                        df['value_unc']**2 \
                        )
              elif TNO_station_category[TNO_error_station.index(station)] == 'M':
                  df['obserror'] = np.sqrt( \
                        (np.nanmean(montain_seor_mean))**2 +
                        df['value_unc']**2 \
                        )          
              elif TNO_station_category[TNO_error_station.index(station)] == 'O':
                  df['obserror'] = np.sqrt( \
                        (np.nanmean(other_seor_mean))**2 +
                        df['value_unc']**2 \
                        )
       else:
         print('not OK for error')
         sys.exit()
         
                  
   else:
       print(station + ' is NOT in TNO_error_station')
       sys.exit()
   # alt (m a.s.l)
   df.rename(columns={'altitude': 'alt'}, inplace = True)

   # extend the output datastore
   column2save = ['date', 'duration', 'station', 'network', 'parameter', 'lon', 'lat', 'obs', 'obserror', 'alt']
   ds = ds.append(df.loc[:, column2save])

# Dump the datastore
dump_datastore(ds, "{}/monitor_VERIFYcore_{}.nc".format(dir_data, spec), dump_default=False, col2dump=ds.columns)

# Checking the consistency of the datastore when reading
ds2 = read_datastore("monitor.nc")

1. Automatically make one job per year#

One job per year is run, based on the following configuration file yml.sed:

To fill-in a .sed file, you may use cat to replace the variables, as illustrated in the script for a job in 2015:

#!/bin/sh -login

echo "Machine on which the job is running:"
hostname

# User' choices:
# Give year to run
year=2015
# Give directory for the .yml.sed file
dirsed=$HOME/VERIFY/

# next year for end of simulation
ynext=$[ year + 1 ]

# Path of the yml.sed file to use
fyml=${dirsed}/config_chimere_inv_1Y

# Fill-in the yaml.sed into a .yml
cat << EOD > ${fyml}${year}.yml.sed-commands
s,_YYYY_,${year},
s,_YYNN_,${ynext},
s,_INI_,INI_CONCS.${year}0101_${year}0101.00_EUROCOM.nc,
EOD
sed -f ${fyml}${year}.yml.sed-commands ${fyml}.yaml.sed > ${fyml}${year}.yml

# Run the CIF
time python -m pycif ${fyml}${year}.yml

3. Post-processing#

Post-processing of the results include:

a. Maps of fluxes and increments#

#####################################################
# This code is provided by: I. Pison
# contact: isabelle.pison@lsce.ipsl.fr
# date: June 2022
# project: H2020 VERIFY
# relation to CIF: uses plugins, not embedded in.
# It is an example, to be used as a basis
# for developing other work.
# No guarantee as to its compatibility or results
# is attached since it is not part of the CIF
# Please don't forget acknowledgements if you use it.
#####################################################


import numpy as np
import pandas as pd
from pycif.utils.netcdf import readnc
from pycif.utils.datastores import dump
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from matplotlib import ticker
import datetime
import xarray as xr
import cartopy.crs as crs
from mpl_toolkits.axes_grid1.inset_locator import InsetPosition
import cartopy
from cartopy.feature import ShapelyFeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

dir_results = '/home/chimereicos/VERIFYoutputs/surface-inv2-' 
dir_out = '/controlvect/flux/'

year_beg = 2005
year_end = 2017
Na = 6.022e23
fs = 15
list_pr = []
list_po =[]
list_diff= []

for yy in range(year_beg, year_end + 1, 1):
  print(yy)
  fcv = '{}{}{}/controlvect_flux_CH4.nc'.format(dir_results, yy, dir_out)
  flpr = readnc(fcv,['xb_phys'])
  flpo = readnc(fcv,['x_phys'])
  list_pr.append(flpr.mean(axis=0)[0,:,:]/ Na * 16.e6) # molec -> microgCH4
  list_po.append(flpo.mean(axis=0)[0,:,:]/ Na * 16.e6) # molec -> microgCH4
  diff = (flpo - flpr)/flpr * 100.
  list_diff.append(diff.mean(axis=0)[0,:,:])

clon = readnc(fcv,['longitudes_corner'])
clat = readnc(fcv,['latitudes_corner'])

############
# To plot: #
############
rivers = cartopy.feature.NaturalEarthFeature(
    category='physical', name='rivers_lake_centerlines',
    scale='10m', facecolor='none', edgecolor='blue')
borders = cartopy.feature.NaturalEarthFeature(
    category='cultural', name='admin_0_boundary_lines_land',
    scale='10m', facecolor='none', edgecolor='black')

#############
# plot maps #
#############
# color scale with Ncolor colors from cmap base colormap for fluxes
Ncolor = 15
cmap = plt.cm.jet
cmaplist = cmap(np.linspace(0, 1, Ncolor))
cmap_plot = cmap.from_list('custom', cmaplist, Ncolor)

# same for increments
Ncolor2 = 11
cmap2 = plt.cm.bwr
cmaplist2 = cmap2(np.linspace(0, 1, Ncolor2))
cmap2 = cmap2.from_list('custom', cmaplist2, Ncolor2)

lognorm = colors.LogNorm()

##plot a given year
yy = 2010
nby = yy - year_beg
fig = plt.figure(figsize=(21, 10))
gs = fig.add_gridspec(nrows = 2, ncols = 4, width_ratios = [2,2,2,0.1], 
                 left=0., right=0.97, bottom=0, top=1,
                  height_ratios = [1, 1 ] ,hspace = 0.01, wspace = 0.25)
subprior = fig.add_subplot(gs[0,1], projection=crs.PlateCarree())
subprior.pcolor(clon, clat, list_pr[nby],
                         norm=lognorm,
                         cmap = cmap,
                         transform=crs.PlateCarree())
subprior.coastlines(resolution='50m')
subprior.add_feature(borders,linewidth=0.5,linestyle='--')
subprior.set_title("Prior", fontsize = fs)

subpost = fig.add_subplot(gs[0,2], projection=crs.PlateCarree())
im = subpost.pcolor(clon, clat, list_po[nby],
                         norm=lognorm,
                         cmap = cmap,
                         transform=crs.PlateCarree())
subpost.coastlines(resolution='50m')
subpost.add_feature(borders,linewidth=0.5,linestyle='--')
subpost.set_title("Posterior", fontsize = f)

echelle = fig.add_subplot(gs[0,3])
ip = InsetPosition(echelle, [0,0,1,1]) 
echelle.set_axes_locator(ip)
p00 = echelle.get_position()
p01 = echelle.get_position()
p02 = subpost.get_position()
p00_new = [p00.x0, p02.y0, p00.width, p02.height]
echelle.set_position(p00_new)
formatter = ticker.LogFormatter(10, labelOnlyBase=False) 
cbar = plt.colorbar(im, format=formatter, cax=echelle)
cbar.ax.set_title('molec.cm$^{-2}$.s$^{-1}$', fontsize = fs) 

subd = fig.add_subplot(gs[1,2], projection=crs.PlateCarree())
im2 = subd.pcolor(clon, clat, list_diff[nby],
                           vmin=-100, vmax=100,
                           cmap = cmap2,
                           transform=crs.PlateCarree())
subd.coastlines(resolution='50m')
subd.add_feature(borders,linewidth=0.5,linestyle='--')
subd.set_title("Increments: posterior - prior (%)", fontsize = fs)
echelle2 = fig.add_subplot(gs[1,3])
ip = InsetPosition(echelle2, [0,0,1,1]) 
echelle2.set_axes_locator(ip)
p00 = echelle2.get_position()
p01 = echelle2.get_position()
p02 = subd.get_position()
p00_new = [p00.x0, p02.y0, p00.width, p02.height]
echelle2.set_position(p00_new)
cbar2 = plt.colorbar(im2, cax=echelle2, ax = subd)
cbar2.ax.set_title('%', fontsize = fs) 

plt.savefig('flux{}.png'.format(yy))

b. Time series of mixing ratios and maps of statistical indicators at the stations#

Time series of mixing ratios at the stations make it easy to compare the observation, the prior and the analysis (obtained with the posterior emissions).

The maps of statistical indicators at the stations show the ratios of the posterior to the prior values of the bias, RMS, correlation for example.

#####################################################
# This code is provided by: I. Pison
# contact: isabelle.pison@lsce.ipsl.fr
# date: June 2022
# project: H2020 VERIFY
# relation to CIF: uses plugins, not embedded in.
# It is an example, to be used as a basis
# for developing other work.
# No guarantee as to its compatibility or results
# is attached since it is not part of the CIF
# Please don't forget acknowledgements if you use it.
#####################################################

import numpy as np
import pandas as pd
from pycif.utils.netcdf import readnc
from pycif.utils.datastores import dump
import matplotlib.pyplot as plt
import datetime
import cartopy.crs
from cartopy.feature import ShapelyFeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from mpl_toolkits.axes_grid1.inset_locator import InsetPosition

dir_results = '/home/chimereicos/VERIFYoutputs/surface-inv2-' 
dir_out = '/obsoperator/fwd_'
dir_monit = '/obsvect/concs/CH4/'

year_beg = 2005
year_end = 2017

fs = 30 # fontsize

# read monitors of all years
ds_all_years = pd.DataFrame()
for yy in range(year_beg, year_end + 1, 1):
  print(yy)
  fmonitpr = '{}{}{}-001{}/monitor.nc'.format(dir_results, yy, dir_out, dir_monit)
  fmonitpo = '{}{}{}-002{}/monitor.nc'.format(dir_results, yy, dir_out, dir_monit) 
  dspr = dump.read_datastore(fmonitpr)
  dspo = dump.read_datastore(fmonitpo)
  dsprpo = pd.concat( [dspr, dspo['maindata']['sim']], axis = 1)
  ds_all_years = pd.concat( [dsprpo, ds_all_years], axis = 0)

list_stations = list(ds_all_years[('metadata', 'station')].unique())
list_lat = []
list_lon = []
lat_etiq = []
lon_etiq = []
for stat in list_stations:
  lat = ds_all_years[('metadata', 'lat')].loc[ds_all_years[('metadata', 'station')] == stat].iloc[0]
  lon = ds_all_years[('metadata', 'lon')].loc[ds_all_years[('metadata', 'station')] == stat].iloc[0]
  list_lat.append(lat)
  list_lon.append(lon)
  if stat =='ZSF':
    lat_etiq.append(lat - 0.3)
    lon_etiq.append(lon + 0.2) 
  elif stat =='KIT':
    lat_etiq.append(lat - 0.5)
    lon_etiq.append(lon + 0.2) 
  else:
    lat_etiq.append(lat + 0.2)
    lon_etiq.append(lon + 0.2)
    
dstat = pd.DataFrame(columns = ['station','Prior bias', 'Posterior bias', 'Prior rms', 'Posterior rms', 'Prior corr', 'Posterior corr'])
dstat['station'] = list_stations
for stat in list_stations:
  print(stat)
  ds = ds_all_years.loc[ ds_all_years[('metadata', 'station')] == stat ]
  ds['var'] = ds[('maindata','obserror')].pow(2)
  # compute bias, standard deviation, time correlation
  ds['prior difference'] = ds[('maindata', 'obs')] - ds[('maindata', 'sim')]
  ds['post difference'] = ds[('maindata', 'obs')] - ds['sim']
  dstat['Prior bias'].loc[dstat['station'] == stat ] = ds['prior difference'].mean()
  dstat['Posterior bias'].loc[dstat['station'] == stat ] = ds['post difference'].mean()
  dstat['Prior rms'].loc[dstat['station'] == stat ] =  np.sqrt(ds['prior difference'].pow(2).mean())
  dstat['Posterior rms'].loc[dstat['station'] == stat ] =  np.sqrt(ds['post difference'].pow(2).mean())
  dstat['Prior corr'].loc[dstat['station'] == stat ] =  ds[('maindata', 'obs')].corr(ds[('maindata', 'sim')])
  dstat['Posterior corr'].loc[dstat['station'] == stat] = ds[('maindata', 'obs')].corr(ds['sim'])
  # monthly means
  ds2 = ds.resample('MS', on = ('metadata', 'date'), closed = 'left')
  dsm = ds2.mean()
  counts = ds2.size()
  dsm['meanobserror'] =  dsm['var'].pow(0.5)/counts.pow(0.5)
# plot times series at each station
  plt.figure(figsize=(21,11))
  ax = plt.gca()
  ax.tick_params(axis = 'both', which = 'major', labelsize = fs/2)
  ax.set_xticks([datetime.datetime(i,1,1) for i in range(year_beg, year_end + 1)])
  plt.title('Methane mixing ratios at {}'.format(stat), fontsize = fs)
  plt.xlabel('Date', size = fs)
  plt.ylabel('[CH4] (ppb)', size = fs)
  plt.errorbar(dsm.index, dsm[('maindata','obs')], \
               dsm['meanobserror'], label = 'measurements', \
               linestyle = '-', marker = 'o', ms = 1)
  plt.plot(dsm.index, dsm[('maindata','sim')], \
               label = 'prior', \
               linestyle = '-', marker = 'o', ms = 1)
  plt.plot(dsm.index, dsm['sim'], \
               label = 'analysis', \
               linestyle = '-', marker = 'o', ms = 1)
  plt.xlim(datetime.datetime(2005,1,1,0),datetime.datetime(2018,1,1,0))  
  plt.legend(fontsize = fs/2)
  plt.savefig('ts_{}.png'.format(stat))
  plt.close()  

# maps for statistics
Ncolor = 10
cmap = plt.cm.bwr
cmaplist = cmap(np.linspace(0, 1, Ncolor))
cmap_plot = cmap.from_list('custom', cmaplist, Ncolor)
cmin, cmax = 0, 2
rivers = cartopy.feature.NaturalEarthFeature(
    category='physical', name='rivers_lake_centerlines',
    scale='10m', facecolor='none', edgecolor='blue')
borders = cartopy.feature.NaturalEarthFeature(
    category='cultural', name='admin_0_boundary_lines_land',
    scale='10m', facecolor='none', edgecolor='black')
latmin=33.
latmax=73.
longmin=-15.
longmax=35.
fig = plt.figure(figsize=(21, 10))
gs = fig.add_gridspec(nrows = 2, ncols = 3, height_ratios = [1, 0.02 ] )
s0 = fig.add_subplot(gs[0, 0], projection=cartopy.crs.PlateCarree())
s0.scatter(list_lon, list_lat, c=np.abs(dstat['Posterior bias']/dstat['Prior bias']), 
                    s=20, cmap = cmap, marker = 's', vmin = 0, vmax = 2, edgecolors = 'k',
                    linewidths = 0.4)
s0.set_title('Ratio of posterior to prior mean difference\n between simulation and measurements')
s0.set_extent([longmin, longmax, latmin, latmax])
s0.coastlines(linewidth=0.5, resolution='50m')
s0.add_feature(borders, linewidth=0.5, linestyle='--')

s1 = fig.add_subplot(gs[0, 1], projection=cartopy.crs.PlateCarree())
im = s1.scatter(list_lon, list_lat, c=dstat['Posterior rms']/dstat['Prior rms'], 
                    s=20, cmap = cmap, marker = 's',vmin = 0, vmax = 2, edgecolors = 'k',
                    linewidths = 0.4)
s1.set_title('Ratio of posterior to prior quadratic mean\n of the differences\n between simulation and measurements')
s1.set_extent([longmin, longmax, latmin, latmax])
s1.coastlines(linewidth=0.5, resolution='50m')
s1.add_feature(borders, linewidth=0.5, linestyle='--')

s2 = fig.add_subplot(gs[0, 2], projection=cartopy.crs.PlateCarree())
s2.scatter(list_lon, list_lat, c=np.abs(dstat['Posterior corr'])/np.abs(dstat['Prior corr']), 
                    s=20, cmap = cmap, marker = 's', vmin = 0, vmax = 2, edgecolors = 'k',
                    linewidths = 0.4)
s2.set_title('Ratio of posterior to prior absolute values\n  of time correlation\n between simulation and measurements')
s2.set_extent([longmin, longmax, latmin, latmax])
s2.coastlines(linewidth=0.5, resolution='50m')
s2.add_feature(borders, linewidth=0.5, linestyle='--')

cb = fig.add_subplot(gs[1,1])
cbar = plt.colorbar(im, cax = cb, ax = s0 , orientation = 'horizontal')
cbar.ax.tick_params(labelsize=18)
plt.savefig('map_statistics.png')

c. Yearly emission budgets for various regions#

Examples here of regions are EU27+UK, EU27 countries, groups of countries:

#####################################################
# This code is provided by: I. Pison
# contact: isabelle.pison@lsce.ipsl.fr
# date: June 2022
# project: H2020 VERIFY
# relation to CIF: uses plugins, not embedded in.
# It is an example, to be used as a basis
# for developing other work.
# No guarantee as to its compatibility or results
# is attached since it is not part of the CIF
# Please don't forget acknowledgements if you use it.
#####################################################

import numpy as np
import pandas as pd
from pycif.utils.netcdf import readnc
from pycif.utils.datastores import dump
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import datetime
import xarray as xr
import cartopy.crs as crs
import netCDF4 as nc
from mpl_toolkits.axes_grid1.inset_locator import InsetPosition
import calendar

# read masks for various regions
fmasks = '/home/chimereicos/VERIFYoutputs/masks_for_EU27+UK.nc'
fm = nc.Dataset(fmasks)
nom_reg = 'EU_27+UK'
reg = np.array(fm[nom_reg])
mask = (reg > 0 )

dir_results = '/home/chimereicos/VERIFYoutputs/surface-inv2-' 
dir_out = '/controlvect/flux/'

year_beg = 2005
year_end = 2017
dy = 0.5
dx = 0.5
rearth=637103000. 
Na = 6.022e23
fcv = '{}{}{}/controlvect_flux_CH4.nc'.format(dir_results, year_beg, dir_out)
zlon = readnc(fcv,['longitudes'])[0,:]
zlat = readnc(fcv,['latitudes'])[:,0]
nlat = len(zlat)
nlon = len(zlon)
area = np.zeros((nlat,nlon))
for i in range(nlat):
  for j in range(nlon):
     lat1 = zlat[i]
     lat2 =  zlat[i] + dy
     # formula with lat between -90:90
     area[i,j] = rearth * rearth * dx * np.pi/180. * \
                 (np.cos(lat1 * np.pi/180. + np.pi/2.) - \
                  np.cos(lat2 * np.pi/180. + np.pi/2.))
                  
yrflregpr = []
yrflregpo = []
summ = np.zeros((2,12))
# read controlvect
for yy in range(year_beg, year_end + 1, 1):
  print(yy)
  fcv = '{}{}{}/controlvect_flux_CH4.nc'.format(dir_results, yy, dir_out)
  ds = xr.open_dataset(fcv)
  flpr = readnc(fcv,['xb_phys'])
  flpo = readnc(fcv,['x_phys'])
  flpran = np.sum(np.sum(flpr,axis=1),axis=0) * area * 3600. / Na * 16.e-3
  flpoan = np.sum(np.sum(flpo,axis=1),axis=0) * area * 3600. / Na * 16.e-3
  flreg = np.sum(flpran * mask) * 1e-3 * 1e-3 # kgCH4.yr -> t/an -> kt/an
  yrflregpr.append(flreg)
  flreg = np.sum(flpoan * mask) * 1e-3 * 1e-3 # kgCH4.yr -> t/an -> kt/an
  yrflregpo.append(flreg)
  beg = 0
  for m in range(12):
    mm = m + 1
    nbd = calendar.mdays[mm] + ( mm ==2 and calendar.isleap(yy))
    flprmm = np.sum(np.sum(flpr,axis=1)[beg:beg + nbd * 24],axis=0) * area * 3600. / Na * 16.e-3
    flpomm = np.sum(np.sum(flpo,axis=1)[beg:beg + nbd * 24],axis=0) * area * 3600. / Na * 16.e-3
    beg = beg + nbd * 24
    summ[0, m] = summ[0, m] + np.sum(flprmm * mask) * 1e-3 * 1e-3
    summ[1, m] = summ[1, m] + np.sum(flpomm * mask) * 1e-3 * 1e-3

####################
# plot time series #
####################
fs = 25
plt.figure(figsize=(21,11))
plt.title('Methane emissions in '+nom_reg, fontsize=fs)
plt.xlabel('Year', fontsize=fs)
plt.ylabel('ktCH$_4$', fontsize=fs)
list_ticks = [ i  for i in range(year_end - year_beg + 1)]
list_names = [ year_beg + i for i in range(year_end - year_beg + 1)]
plt.xticks(list_ticks, list_names, fontsize=15)
plt.tick_params(axis='y', labelsize=30)
plt.yticks(fontsize=fs)
plt.bar(list_ticks, yrflregpr, label = 'prior', width=-0.4, align='edge')
plt.bar(list_ticks, yrflregpo, label = 'posterior', width=0.4, align='edge')
plt.legend(fontsize=fs)
plt.savefig('ts_fluxes_{}_yearly.png'.format(nom_reg))
plt.close()

plt.figure(figsize=(21,11))
plt.title('Methane emissions: average seasonal cycle over '+str(year_beg)+'-'+str(year_end)+' in '+nom_reg, fontsize=fs)
plt.xlabel('Month', fontsize=fs)
plt.ylabel('ktCH$_4$', fontsize=fs)
ax = plt.gca()
ax.tick_params(axis = 'both', which = 'major', labelsize = fs/2)
plt.xticks([i+1 for i in range(12)] , [str(i+1) for i in range(12) ])
nbyears = year_end - year_beg + 1
plt.plot([i+1 for i in range(12)], summ[0,:]/nbyears, label = 'prior', \
               linestyle = '--', marker = 'o', ms = 8)
plt.plot([i+1 for i in range(12)], summ[1,:]/nbyears, label = 'posterior', \
              linestyle = '-', marker = 'o', ms = 8)
plt.legend(fontsize=fs-5)
plt.savefig('ts_seasonal_{}_mass.png'.format(nom_reg))

d. Time series of emissions for various regions#