The script below converts satellite data from IASI to the standard CIF monitor format.

  1from netCDF4 import Dataset
  2from pycif.utils.datastores import dump
  3from pycif.utils.netcdf import readnc
  4from pycif.utils.datastores import empty
  5from pycif.utils.datastores.dump import dump_datastore
  6import calendar
  7import os
  8import pandas as pd
  9import datetime
 10import numpy as np
 11import xarray as xr
 12
 13
 14##### raw data to read
 15year = 2011
 16maindir = '/home/chimereicos/ipison/IASI_Toulouse/'
 17month_name = [ 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre' ]
 18# get the satellite's vertical grid from the prior profile description
 19prior_profile_file = 'TN2OR_N2O_apriori.nc'
 20prior_prof = readnc(prior_profile_file, ['n2o_apriori'] )
 21fixed_nlevels = len(prior_prof)
 22# satellite formula = XXX
 23# take level at 308 hPa = the 5th one 
 24# retained_lev begins at 0
 25retained_lev = 4
 26
 27##### datastore to dump in netcdf file for use by the CIF
 28# list of infos required for any type of obs
 29list_basic_cols = [ 'date', 'duration', 'station' , 'network', 'parameter', 'lon', 'lat', 'obs', 'obserror' ]
 30# fixed information for station, network and parameter + duration columns
 31stationID = 'IASI'
 32networkID = 'Toulouse'
 33spec = 'N2O'
 34obsduration = 1./60. # following Audrey's recommendation for sat data in general
 35# associated infos for satellites
 36list_prior_cols = [ ('qa0lu', k) for k in range(fixed_nlevels -1 )]
 37list_pres_cols = [('pavg', k) for k in range(fixed_nlevels)]
 38list_ak_cols = [('ak', k) for k in range(fixed_nlevels - 1 )]
 39
 40# loop over the available files
 41for mm in range(12):
 42    month = '{0:02d}'.format(mm + 1)  
 43    for dd in range(calendar.mdays[mm + 1]):
 44        day = '{0:02d}'.format(dd + 1) 
 45        filein = '{}N2O_{}_{yy}_netcdf/iasi_ret_L2_n2o_ch4_{yy}_{}_{}.nc'.format(maindir, month_name[mm], month, day, yy = year)
 46        if not os.path.exists(filein): continue
 47        ### fill in sub-datastore for the current file
 48        subdata = pd.DataFrame( columns = list_basic_cols )
 49    
 50        ## check number of levels 
 51        pressure_lev = readnc(filein, ['level'] )
 52        nlevels_max = len(pressure_lev) + 1
 53        if ( nlevels_max != fixed_nlevels) : 
 54            print('ERROR NUMBER OF LEVELS')
 55            break
 56
 57        ##date: starting date
 58        # must be a datetime object
 59        hh, mn, ms = readnc(filein, ['hour', 'minute', 'millisecs'] )
 60        dates = [datetime.datetime( year = year, month = mm +1 , day = dd + 1, hour = h, minute = m, second = int(s/1000) ) for h, m, s in zip(hh, mn, ms)]
 61        subdata['date'] = dates
 62        ## duration    duration in hours
 63        subdata = subdata.assign(duration = obsduration) 
 64        ## lon    longitude of the measurement
 65        ## lat    latitude of the measurement
 66        subdata['lon'] = readnc(filein, ['lon'])
 67        subdata['lat'] = readnc(filein, ['lat'])
 68        ## obs    observed value
 69        subdata['obs'] = readnc(filein, ['n2o_retrieval'])[:,retained_lev]
 70        ## obserror  error on the observation
 71        subdata['obserror' ]= readnc(filein, ['n2o_error'])[:,retained_lev]
 72        ## station    station ID
 73        ## network    network name 
 74        ## parameter    name of the observed parameter or species
 75        subdata = subdata.assign(station = stationID)
 76        subdata = subdata.assign(network = networkID)
 77        subdata = subdata.assign(parameter = spec)
 78
 79        ## pressure levels: 
 80        # particular case of IASI Toulouse:
 81        # must put the surface in the right level and NaN lower
 82        ## same for prior profile
 83        pressures = pd.DataFrame(columns = [ 'surf_lev', 'surf_P']  )
 84        pressures[ 'surf_lev' ] = readnc(filein, ['Surf_P_id'])
 85        pressures[ 'surf_P' ] = readnc(filein, ['Surf_P'])
 86        # array with the right values of pressure: from top to the surface + NaN lower
 87        nrows = len(pressures)
 88        pavg = np.zeros((nrows, nlevels_max))
 89        qa0 = np.zeros((nrows, nlevels_max - 1 ))
 90        pavg[range(len(pressures)), pressures["surf_lev"]] = pressures["surf_P"]
 91        indj, indi = np.meshgrid(range(nlevels_max), range(nrows))
 92        indjj, indii = np.meshgrid(range(nlevels_max - 1 ), range(nrows))
 93        pavg[indj > pressures["surf_lev"][:, np.newaxis]] = np.nan
 94        qa0[indjj >= pressures["surf_lev"][:, np.newaxis]] = np.nan # no prior conc at the surface: TO CHECK
 95        mask = indj < pressures["surf_lev"][:, np.newaxis]
 96        pavg[mask] = pressure_lev[indj[mask]]
 97        mask = indjj < pressures["surf_lev"][:, np.newaxis]
 98        qa0[mask] = prior_prof[indjj[mask]]
 99        ## averaging kernels
100        ak = pd.DataFrame(readnc(filein, ['n2o_AVK'] )[:, retained_lev,:-1], columns = list_ak_cols)
101        ## put everything in a xarray
102        # satellite specific info
103        ds = xr.Dataset({'qa0': (['index', 'level'], qa0),
104                     'ak': (['index', 'level'], ak.values),
105                     'pavg0': (['index', 'level_pressure'], pavg)},
106                     coords={'index': subdata.index,
107                             'level': range(fixed_nlevels - 1),
108                              'level_pressure': range(fixed_nlevels)})
109        # put together basic info and satellite info
110        subdata = subdata.to_xarray()
111        subdata = xr.merge([ds, subdata])
112        
113        ## filtering according to information provided with data
114        # filter on daytime/nigthttime, chi2 and land in the tropical band
115        subfilter = pd.DataFrame(columns = ['day_night', 'chi2', 'land_sea'])
116        subfilter['day_night'] =  readnc(filein, ['day_night_index'])
117        subfilter['chi2'] =  readnc(filein, ['xhi2'])
118        subfilter['land_sea'] = readnc(filein, ['land_sea_index'])
119        # plus beware of surface level: Surf_P_id must be > retained_lev
120        # (both indices begin at 0)
121        subfilter['nb_surf_lev'] = readnc(filein, ['Surf_P_id'])
122        mask = np.where((subfilter['day_night'] == 1) & (subfilter['chi2'] < 5) & ( subfilter['nb_surf_lev'] > retained_lev ))[0]
123        subdata = subdata.isel(index=mask)
124        
125        ## index = number of the observation among the remaining ones 
126        subdata["index"] = np.arange(len(subdata["index"]))         
127        ## dump to netCDF - here, one per month to keep size not too large
128        subdata.to_netcdf('~/monitor_{}{}_{}_{}{}.nc'.format(stationID, networkID, spec, year, month))
129    
130