Source code for pycif.plugins.datastreams.fluxes.tm5.read

import datetime
import os

import numpy as np
import xarray as xr
from logging import debug
from .....utils.netcdf import readnc


# JvP 20210609: added import statements, MODULE_NAME module level logger
import logging
import sys
import netCDF4 as nc4
from .....utils.check.errclass import CifRuntimeError
MODULE_NAME = __name__[__name__.index('TM5'):] if 'TM5' in __name__ else __name__
logger = logging.getLogger(MODULE_NAME)



# Original read function by A. Berchet

[docs]
def read_AB(
    self,
    name,
    tracdir,
    tracfile,
    varnames,
    dates,
    interpol_flx=False,
    tracer=None,
    model=None,
    **kwargs
):
    """Original/legacy reader; NOT wired into the plugin (superseded by `read` below).

    Despite scanning `tracdir` for files matching `tracfile`, this function
    does not actually read flux data from them: it fills the output with
    random values (`numpy.random.rand`) for every requested date, making it
    effectively a stub/placeholder. Kept for reference only.

    Args:
        self: the fluxes Plugin, providing ``domain`` (for the output
            shape).
        name: the name of the component
        tracdir, tracfile: flux directory and file format
        varnames: Unused, kept for interface consistency.
        dates: list of dates to extract
        interpol_flx (bool): Unused, kept for interface consistency.
        tracer: Unused, kept for interface consistency.
        model: Used only as a fallback for `tracfile` if the latter is
            empty (via `model.flux.file`).

    Returns:
        xr.DataArray: An array of random values with dimensions
        ``(time, lev, lat, lon)`` -- not real flux data.
    """

    # Replace tracfile by available information from model
    if tracfile == "":
        tracfile = model.flux.file

    # Available files in the directory
    list_files = os.listdir(tracdir)
    list_available = []
    for flx_file in list_files:
        try:
            list_available.append(
                datetime.datetime.strptime(flx_file, tracfile)
            )
        except BaseException:
            continue

    list_available = np.array(list_available)

    # Reading required fluxes files
    trcr_flx = []
    for dd in dates:
        debug("Putting random values in fluxes for date:" + str(dd))
        nmerid, nzonal = self.domain.zlat.shape
        trcr_flx.append(np.random.rand(1, nmerid, nzonal))

    # Building a xarray
    xmod = xr.DataArray(
        trcr_flx, coords={"time": dates}, dims=("time", "lev", "lat", "lon")
    )

    return xmod



# New read function by J.C.A. van Peet

[docs]
def read(
        self,
        name,
        varnames,
        dates,
        files,
        interpol_flx=False,
        tracer=None,
        model=None,
        ddi=None,
        **kwargs
):
    """
    PURPOSE
    Active reader. Get fluxes from TM5 emission NetCDF-4 files and load them
    into a pyCIF variable. For each requested date/file pair, opens the
    file's ``glb600x400`` (or ``glb300x200``) region group, validates the
    grid shape and that the time arrays are consistent across all emission
    sources (``biomass-burning``, ``rice``, ``wetlands``, ``other``), sums
    the sources' emission fields, and selects the value at the matching
    ``time_start`` index. Optionally applies an exponential transform if
    `tracer.exp_conversion` is set.

    ARGS
    self         = the fluxes Plugin
    name         = the name of the component (used as the species group
                   name in the file)
    varnames     = original name(s) of the variable(s) to read; unused
                   directly here (the species group `name` is used instead)
    dates        = list of ``[start, end]`` date intervals to extract; only
                   the start of each interval is matched against the file's
                   ``time_start`` values
    files        = list of files matching `dates`
    interpol_flx = (bool): Unused, kept for interface consistency with
                   other flux plugins.
    tracer       = the flux tracer plugin, providing ``exp_conversion``
    model        = Unused, kept for interface consistency.
    ddi          = Unused, kept for interface consistency.

    KWARGS
    **kwargs = Unused, kept for interface consistency.

    RETURNS
    xr.DataArray with dimensions (time, lev, lat, lon): the summed,
    optionally exponential-converted, emission field for each requested
    date.

    RAISES
    CifRuntimeError: if a file has an unexpected number of NetCDF-4 groups,
        an unknown region, an inconsistent emission array shape, mismatched
        time arrays across sources, or a requested date not present in the
        file's ``time_start`` values.

    VERSION HISTORY
    2.0 09-06-2021 by J.C.A. van Peet
        Adapted for TM5.
    1.0 ??-??-???? by A. Berchet
        See original code above.
    """

    # Set the name of this function
    PROG_NAME = MODULE_NAME+".read"


    # Local logger
    logger = logging.getLogger(PROG_NAME)
    logger.setLevel(logging.DEBUG)

    # Printing options for arrays using np.array2string
    # https://docs.scipy.org/doc/numpy/reference/generated/numpy.array2string.html
    print_ops = {
        'max_line_width': 140,
        'prefix': ' ' * 26 + ' = ',
        'formatter': {'float': lambda x: f' {x:7.2f}',
                      'int': lambda x: ' %7i' % x,
                      'bool': lambda x: 'T' if x else 'F'}}
    
    # Some debug statements...
    logger.debug("")
    logger.debug("*"*30)
    logger.debug(PROG_NAME+" => DEBUG:"                      )
    logger.debug("   name                    = %s", name        )
    logger.debug("   files                   = %s", files     )
    logger.debug("   dates                   = %s", dates    )
    logger.debug("   varnames                = %s", varnames    )
    logger.debug("   dates                   = %s", dates       )
    logger.debug("   interpol_flx            = %s", interpol_flx)
    logger.debug("   tracer                  = %s", tracer      )
    logger.debug("   model                   = %s", model       )


    #import traceback
    #logger.debug("")
    #logger.debug(PROG_NAME+" => called from:"                                                     )
    #logger.debug("   traceback.filename      = %s" % traceback.extract_stack(limit=2)[-2].filename)
    #logger.debug("   traceback.lineno        = %s" % traceback.extract_stack(limit=2)[-2].lineno  )
    #logger.debug("   traceback.name          = %s" % traceback.extract_stack(limit=2)[-2].name    )
    #logger.debug("   traceback.line          = %s" % traceback.extract_stack(limit=2)[-2].line    )
    #if( 'native2inputs_adj' in traceback.extract_stack(limit=2)[-2].name ):
    #    raise RuntimeError("JUST DEBUGGING TO SEE FROM WHICH FUNCTION THIS FUNCTION IS CALLED...")
    ## end if

    # # Replace tracfile by available information from model.
    # # Part from original code, extended with try-except.
    # if tracfile == "":
    #     try:
    #         tracfile = model.flux.file
    #     except AttributeError:
    #         logger.critical("")
    #         logger.critical("*"*30)
    #         logger.critical(PROG_NAME+" => ERROR: no model.flux.file attribute!")
    #         logger.critical("*"*30)
    #         logger.critical("")
    #         raise
    #     # end try
    # # end if( tracfile )


    # >>> Read netCDF file with TM5 emissions

    # List the sources present in the file
    sources = ['biomass-burning', 'rice', 'wetlands', 'other']

    # The start times in the file has not yet been read
    time_start = None
    
    # Loop over dates and files
    total_emis_sel = []
    emis_dates = []
    for date, tracfile in zip(dates, files):
        emis_dates.append(date[0])
    
        # Open file
        # Starting with version > 1.4, netcdf4-python returns masked arrays by
        # default. Previous versions only returned masked arrays if there were
        # actually missing of fill values in the data. Not all matplotlib/cartopy
        # plot functions can handle masked arrays very well, so I added
        # nc_id.set_auto_maskandscale(False). Note that you could also use
        # set_auto_mask() only
        nc_id = nc4.Dataset( tracfile, 'r' )
        nc_id.set_auto_maskandscale(False)
    
    
        # Check if there's only one group in the netcdf-4 file.
        if( len(nc_id.groups) != 1 ):
            logger.critical("")
            logger.critical("*"*30)
            logger.critical(PROG_NAME+" => ERROR: unknown number of groups in netcdf-4 file!")
            # logger.critical("   tracdir      = %s", tracdir     )
            logger.critical("   tracfile     = %s", tracfile    )
            logger.critical("   nc_id.groups = %s", nc_id.groups)
            logger.critical("*"*30)
            logger.critical("")
            raise CifRuntimeError
        # end if( len(nc_id.groups) )
    
        # Check the group
        #for region in nc_id.groups.keys():
        for region in ['glb600x400']:
            logger.debug("   region                  = %s", region)
    
            # The region name should be something like 'glb600x400', which means
            # 'global grid, cell size is 6 degrees in longitude by 4 degrees in
            # latitude'. So now you "just" have to split the group name into its
            # components.
            if( region == 'glb600x400' ):
                d_lon = 6.0
                d_lat = 4.0
                n_lon = 60
                n_lat = 45
            elif( region == 'glb300x200' ):
                d_lon = 3.0
                d_lat = 2.0
                n_lon = 120
                n_lat = 90
            else:
                logger.critical("")
                logger.critical("*"*30)
                logger.critical(PROG_NAME+" => ERROR: Unknown group!")
                logger.critical("   region = %s", region)
                logger.critical("*"*30)
                logger.critical("")
                raise CifRuntimeError
            # end if(group)
    
            # Since the longitude and latitude dimensions are present in the
            # emission files, but do not contain the actual value, you have to
            # generate them yourself. The big assumption here is that the TM5
            # grid runs from -180 <= lon <= 180 and -90 <= lat <= 90.
            lons = np.linspace( -180.0, 180.0, num=n_lon+1 )
            lats = np.linspace(  -90.0,  90.0, num=n_lat+1 )
            lon  = 0.5*(lons[0:-1]+lons[1:])
            lat  = 0.5*(lats[0:-1]+lats[1:])
            logger.debug("   lon                     = %s", np.array2string(lon, **print_ops) )
            logger.debug("   lat                     = %s", np.array2string(lat, **print_ops) )
    
            # Loop over the emission categories
            for i_source, source in enumerate(sources):
    
                # Get the path to the current source and print it
                path = region+'/'+name+'/'+source+'/'
                logger.debug("   source                  = %s", source)
                logger.debug("   path                    = %s", path  )
    
                # Check the array sizes
                if( (nc_id[path+'emission'].shape[1] != n_lat) or (nc_id[path+'emission'].shape[2] != n_lon) ):
                    logger.critical("")
                    logger.critical("*"*30)
                    logger.critical(PROG_NAME+" => ERROR: shape of emission category does not comply with TM5 grid!")
                    logger.critical("   n_lon              = %s", n_lon                        )
                    logger.critical("   n_lat              = %s", n_lat                        )
                    logger.critical("   source             = %s", source                       )
                    logger.critical("   path               = %s", path                         )
                    logger.critical("   emission.shape     = %s", nc_id[path+'emission'].shape )
                    logger.critical("*"*30)
                    logger.critical("")
                    raise CifRuntimeError
                # end if
    
                # Save time variables
                # Note the use of argument unpacking (?) with *bagger.
                nc_time_start = np.array( [datetime.datetime( *bagger ) for bagger in nc_id[path+'time_start'] ] )
                nc_time_mid   = np.array( [datetime.datetime( *bagger ) for bagger in nc_id[path+'time_mid'  ] ] )
                nc_time_end   = np.array( [datetime.datetime( *bagger ) for bagger in nc_id[path+'time_end'  ] ] )
    
                # Save time ranges
                if( time_start is None ):
                    time_start = nc_time_start
                    time_mid   = nc_time_mid
                    time_end   = nc_time_end
    
                    # Set a numpy array which will contain the source emission data
                    sources_emis = np.full( (len(sources), time_start.size, n_lat, n_lon), np.nan )
    
                else:
                    if( not( np.array_equal( time_start, nc_time_start ) and \
                             np.array_equal( time_mid,   nc_time_mid   ) and
                             np.array_equal( time_end,   nc_time_end   ) ) ):
                        logger.critical("")
                        logger.critical("*"*30)
                        logger.critical(PROG_NAME+" => ERROR: time arrays not equal!")
                        logger.critical("   time_start    = %s", time_start   )
                        logger.critical("   time_mid      = %s", time_mid     )
                        logger.critical("   time_end      = %s", time_end     )
                        logger.critical("   nc_time_start = %s", nc_time_start)
                        logger.critical("   nc_time_mid   = %s", nc_time_mid  )
                        logger.critical("   nc_time_end   = %s", nc_time_end  )
                        logger.critical("*"*30)
                        logger.critical("")
                        raise CifRuntimeError
                    # end if check time arrays
                # end if save time variables
    
                # Read the source data. Unit = kg CH4 s^-1
                sources_emis[i_source, :, :, :] = nc_id[path+'emission'][()]
                
            # end for source
    
        # end for region
    
        # Close file
        nc_id.close()

        # Calculate total emissions
        total_emis = np.sum(sources_emis, axis=0)
        logger.debug("   total_emis.shape        = %s", total_emis.shape)
    
        # <<< Read netCDF file with TM5 emissions


        # Get a selection of emissions corresponding to "dates".
        date_idx = np.nonzero(date[0] == nc_time_start)[0]
        if date_idx.size != 1:
            logger.critical("")
            logger.critical("*"*30)
            logger.critical(PROG_NAME+" => ERROR: date not in emission file!")
            logger.critical("   time_start = %s", time_start)
            logger.critical("   time_mid   = %s", time_mid  )
            logger.critical("   time_end   = %s", time_end  )
            logger.critical("   dates      = %s", dates     )
            logger.critical("   date       = %s", date      )
            logger.critical("*"*30)
            logger.critical("")
            raise CifRuntimeError
        # end if
    
        # Append the found emission field to the list. Note that using the
        # np.newaxis will result in a [1, n_lat, n_lon] array, where the
        # first dimension is the level, even though the emissions are always
        # at the surface.
        total_emis_sel.append(total_emis[date_idx[0], np.newaxis, :, :])

    # end for
    logger.debug("   total_emis_sel          = %s", np.array2string(np.asarray(total_emis_sel), **print_ops))
    logger.debug("   len(total_emis_sel)     = %s", len(total_emis_sel)                         )
    logger.debug("   total_emis_sel[0].shape = %s", total_emis_sel[0].shape                     )

    # ... final debug statements ...
    logger.debug("*"*30)
    logger.debug("")

    #logger.debug("")
    #logger.debug("*"*30)
    #logger.debug(PROG_NAME+" => Computer says no!")
    #logger.debug("*"*30)
    #logger.debug("")
    #try:
    #    raise RuntimeError
    #except RuntimeError as e:
    #    #logger.exception("OOPS!")
    #    logger.critical(e, exc_info=True)
    #    #raise     # => Will display the traceback on screen a second time
    #    sys.exit() # => Just exit.
    ## end try
    
    # Exponential conversion
    total_emis_sel = np.array(total_emis_sel)
    if tracer is not None:
        if getattr(tracer, "exp_conversion"):
            mask = total_emis_sel < 0
            np.exp(total_emis_sel, out=total_emis_sel, where=mask)
            np.add(total_emis_sel, 1.0, out=total_emis_sel, where=~mask)
            np.multiply(total_emis_sel, total_emis_sel, out=total_emis_sel)

    # Transfrom total_emis_sel into an xarray and return it to the calling
    # function.
    xmod = xr.DataArray(
        total_emis_sel, coords={"time": emis_dates}, dims=("time", "lev", "lat", "lon")
    )
    logger.debug("")
    logger.debug("   xmod = %s", xmod)
    
    return xmod


# end function read