The script below converts satellite data from IASI to the standard CIF monitor format.
1from netCDF4 import Dataset
2from pycif.utils.datastores import dump
3from pycif.utils.netcdf import readnc
4from pycif.utils.datastores import empty
5from pycif.utils.datastores.dump import dump_datastore
6import calendar
7import os
8import pandas as pd
9import datetime
10import numpy as np
11import xarray as xr
12
13
14##### raw data to read
15year = 2011
16maindir = '/home/chimereicos/ipison/IASI_Toulouse/'
17month_name = [ 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre' ]
18# get the satellite's vertical grid from the prior profile description
19prior_profile_file = 'TN2OR_N2O_apriori.nc'
20prior_prof = readnc(prior_profile_file, ['n2o_apriori'] )
21fixed_nlevels = len(prior_prof)
22# satellite formula = XXX
23# take level at 308 hPa = the 5th one
24# retained_lev begins at 0
25retained_lev = 4
26
27##### datastore to dump in netcdf file for use by the CIF
28# list of infos required for any type of obs
29list_basic_cols = [ 'date', 'duration', 'station' , 'network', 'parameter', 'lon', 'lat', 'obs', 'obserror' ]
30# fixed information for station, network and parameter + duration columns
31stationID = 'IASI'
32networkID = 'Toulouse'
33spec = 'N2O'
34obsduration = 1./60. # following Audrey's recommendation for sat data in general
35# associated infos for satellites
36list_prior_cols = [ ('qa0lu', k) for k in range(fixed_nlevels -1 )]
37list_pres_cols = [('pavg', k) for k in range(fixed_nlevels)]
38list_ak_cols = [('ak', k) for k in range(fixed_nlevels - 1 )]
39
40# loop over the available files
41for mm in range(12):
42 month = '{0:02d}'.format(mm + 1)
43 for dd in range(calendar.mdays[mm + 1]):
44 day = '{0:02d}'.format(dd + 1)
45 filein = '{}N2O_{}_{yy}_netcdf/iasi_ret_L2_n2o_ch4_{yy}_{}_{}.nc'.format(maindir, month_name[mm], month, day, yy = year)
46 if not os.path.exists(filein): continue
47 ### fill in sub-datastore for the current file
48 subdata = pd.DataFrame( columns = list_basic_cols )
49
50 ## check number of levels
51 pressure_lev = readnc(filein, ['level'] )
52 nlevels_max = len(pressure_lev) + 1
53 if ( nlevels_max != fixed_nlevels) :
54 print('ERROR NUMBER OF LEVELS')
55 break
56
57 ##date: starting date
58 # must be a datetime object
59 hh, mn, ms = readnc(filein, ['hour', 'minute', 'millisecs'] )
60 dates = [datetime.datetime( year = year, month = mm +1 , day = dd + 1, hour = h, minute = m, second = int(s/1000) ) for h, m, s in zip(hh, mn, ms)]
61 subdata['date'] = dates
62 ## duration duration in hours
63 subdata = subdata.assign(duration = obsduration)
64 ## lon longitude of the measurement
65 ## lat latitude of the measurement
66 subdata['lon'] = readnc(filein, ['lon'])
67 subdata['lat'] = readnc(filein, ['lat'])
68 ## obs observed value
69 subdata['obs'] = readnc(filein, ['n2o_retrieval'])[:,retained_lev]
70 ## obserror error on the observation
71 subdata['obserror' ]= readnc(filein, ['n2o_error'])[:,retained_lev]
72 ## station station ID
73 ## network network name
74 ## parameter name of the observed parameter or species
75 subdata = subdata.assign(station = stationID)
76 subdata = subdata.assign(network = networkID)
77 subdata = subdata.assign(parameter = spec)
78
79 ## pressure levels:
80 # particular case of IASI Toulouse:
81 # must put the surface in the right level and NaN lower
82 ## same for prior profile
83 pressures = pd.DataFrame(columns = [ 'surf_lev', 'surf_P'] )
84 pressures[ 'surf_lev' ] = readnc(filein, ['Surf_P_id'])
85 pressures[ 'surf_P' ] = readnc(filein, ['Surf_P'])
86 # array with the right values of pressure: from top to the surface + NaN lower
87 nrows = len(pressures)
88 pavg = np.zeros((nrows, nlevels_max))
89 qa0 = np.zeros((nrows, nlevels_max - 1 ))
90 pavg[range(len(pressures)), pressures["surf_lev"]] = pressures["surf_P"]
91 indj, indi = np.meshgrid(range(nlevels_max), range(nrows))
92 indjj, indii = np.meshgrid(range(nlevels_max - 1 ), range(nrows))
93 pavg[indj > pressures["surf_lev"][:, np.newaxis]] = np.nan
94 qa0[indjj >= pressures["surf_lev"][:, np.newaxis]] = np.nan # no prior conc at the surface: TO CHECK
95 mask = indj < pressures["surf_lev"][:, np.newaxis]
96 pavg[mask] = pressure_lev[indj[mask]]
97 mask = indjj < pressures["surf_lev"][:, np.newaxis]
98 qa0[mask] = prior_prof[indjj[mask]]
99 ## averaging kernels
100 ak = pd.DataFrame(readnc(filein, ['n2o_AVK'] )[:, retained_lev,:-1], columns = list_ak_cols)
101 ## put everything in a xarray
102 # satellite specific info
103 ds = xr.Dataset({'qa0': (['index', 'level'], qa0),
104 'ak': (['index', 'level'], ak.values),
105 'pavg0': (['index', 'level_pressure'], pavg)},
106 coords={'index': subdata.index,
107 'level': range(fixed_nlevels - 1),
108 'level_pressure': range(fixed_nlevels)})
109 # put together basic info and satellite info
110 subdata = subdata.to_xarray()
111 subdata = xr.merge([ds, subdata])
112
113 ## filtering according to information provided with data
114 # filter on daytime/nigthttime, chi2 and land in the tropical band
115 subfilter = pd.DataFrame(columns = ['day_night', 'chi2', 'land_sea'])
116 subfilter['day_night'] = readnc(filein, ['day_night_index'])
117 subfilter['chi2'] = readnc(filein, ['xhi2'])
118 subfilter['land_sea'] = readnc(filein, ['land_sea_index'])
119 # plus beware of surface level: Surf_P_id must be > retained_lev
120 # (both indices begin at 0)
121 subfilter['nb_surf_lev'] = readnc(filein, ['Surf_P_id'])
122 mask = np.where((subfilter['day_night'] == 1) & (subfilter['chi2'] < 5) & ( subfilter['nb_surf_lev'] > retained_lev ))[0]
123 subdata = subdata.isel(index=mask)
124
125 ## index = number of the observation among the remaining ones
126 subdata["index"] = np.arange(len(subdata["index"]))
127 ## dump to netCDF - here, one per month to keep size not too large
128 subdata.to_netcdf('~/monitor_{}{}_{}_{}{}.nc'.format(stationID, networkID, spec, year, month))
129
130