Source code for pycif.plugins.datastreams.fluxes.tm5.read_adj

import datetime
import os

import numpy as np
import xarray as xr
from logging import debug
from .....utils.netcdf import readnc


# Extra import statements, MODULE_NAME module level logger
import logging
import sys
import netCDF4 as nc4
import traceback
from .....utils.check.errclass import CifRuntimeError
MODULE_NAME = __name__[__name__.index('TM5'):] if 'TM5' in __name__ else __name__
logger = logging.getLogger(MODULE_NAME)


# Function to read the adj_emissions.nc4 file from an adjoint TM5 run.
# NOTE: NO self AS FIRST ARGUMENT AS IN read.py. IF YOU WOULD WANT THAT,
# YOU SHOULD PROBABLY HAVE TO UPDATE SOME CIF SPECIFIC CODE TO MAKE
# read_adj PART OF THE FLUXES PLUGIN

[docs]
def read_adj( filename ):
    """
    PURPOSE
    Standalone helper (not part of the plugin's ``read``/``write``
    interface, and not called with ``self``). Read adjoint emission-factor
    fluxes from an adjoint TM5 run, generally saved to a file called
    adj_emissions.nc4: opens the ``glb600x400`` region group, validates the
    grid shape and the ``units``/``long_name`` attributes of the
    ``adj_emis`` variable under the ``total`` source, and returns the raw
    array.

    ARGS
    filename     = the full name of the file, including path

    KWARGS
    None

    RETURNS
    numpy.ndarray with shape (nt, n_lat, n_lon): the raw ``adj_emis``
    values read from the ``total`` source group.

    RAISES
    CifRuntimeError: if the file does not have exactly one NetCDF-4 group,
        the region is unknown, the emission array shape is inconsistent
        with the grid, more than one source is present, or the
        ``units``/``long_name`` attributes of ``adj_emis`` are unexpected.

    VERSION HISTORY
    1.0 26-10-2021 by J.C.A. van Peet
        Original code
    """

    # Set the name of this function
    PROG_NAME = MODULE_NAME+".read_adj"


    # Local logger
    logger = logging.getLogger(PROG_NAME)
    logger.setLevel(logging.DEBUG)

    # Printing options for arrays using np.array2string
    # https://docs.scipy.org/doc/numpy/reference/generated/numpy.array2string.html
    print_ops = {'max_line_width':140, 'prefix':' '*26+' = ', \
                 'formatter':{'float':lambda x: f' {x:7.2f}',
                                'int':lambda x: ' %7i' % x,
                               'bool':lambda x: 'T' if x else 'F'}}

    # Get the name of the function calling this current function.
    # https://stackoverflow.com/a/2529884
    parent = traceback.extract_stack(limit=2)[-2]

    # Some debug statements...
    logger.debug("")
    logger.debug("*"*30)
    logger.debug(PROG_NAME+" => DEBUG:"                        )
    logger.debug("   filename           = %s", filename        )
    logger.debug("   traceback info:        "                  )
    logger.debug(f"      parent filename = {parent.filename}")
    logger.debug(f"      parent lineno   = {parent.lineno}"  )
    logger.debug(f"      parent name     = {parent.name}"    )
    logger.debug(f"      parent line     = {parent.line}"    )



    # >>> Read netCDF file with TM5 adjoint emissions (adj_emissions.nc4)

    # List the sources present in the file
    sources = ['total']

    # The start times in the file has not yet been read
    time_start = None

    # Open file
    # Starting with version > 1.4, netcdf4-python returns masked arrays by
    # default. Previous versions only returned masked arrays if there were
    # actually missing of fill values in the data. Not all matplotlib/cartopy
    # plot functions can handle masked arrays very well, so I added
    # nc_id.set_auto_maskandscale(False). Note that you could also use
    # set_auto_mask() only
    nc_id = nc4.Dataset( filename, 'r' )
    nc_id.set_auto_maskandscale(False)


    # Check if there's only one group in the netcdf-4 file.
    if( len(nc_id.groups) != 1 ):
        logger.critical("")
        logger.critical("*"*30)
        logger.critical(PROG_NAME+" => ERROR: unknown number of groups in netcdf-4 file!")
        logger.critical(f"   filename     = {filename}"   )
        logger.critical("   nc_id.groups = %s", nc_id.groups)
        logger.critical("*"*30)
        logger.critical("")
        raise CifRuntimeError
    # end if( len(nc_id.groups) )

    # Check the group
    for region in ['glb600x400']:
        logger.debug("   region                  = %s", region)

        # The region name should be something like 'glb600x400', which means
        # 'global grid, cell size is 6 degrees in longitude by 4 degrees in
        # latitude'. So now you "just" have to split the group name into its
        # components.
        if( region == 'glb600x400' ):
            d_lon = 6.0
            d_lat = 4.0
            n_lon = 60
            n_lat = 45
        elif( region == 'glb300x200' ):
            d_lon = 3.0
            d_lat = 2.0
            n_lon = 120
            n_lat = 90
        else:
            logger.critical("")
            logger.critical("*"*30)
            logger.critical(PROG_NAME+" => ERROR: Unknown group!")
            logger.critical("   region = %s", region)
            logger.critical("*"*30)
            logger.critical("")
            raise CifRuntimeError
        # end if(group)

        # Since the longitude and latitude dimensions are present in the
        # emission files, but do not contain the actual value, you have to
        # generate them yourself. The big assumption here is that the TM5
        # grid runs from -180 <= lon <= 180 and -90 <= lat <= 90.
        lons = np.linspace( -180.0, 180.0, num=n_lon+1 )
        lats = np.linspace(  -90.0,  90.0, num=n_lat+1 )
        lon  = 0.5*(lons[0:-1]+lons[1:])
        lat  = 0.5*(lats[0:-1]+lats[1:])
        logger.debug("   lon                     = %s", np.array2string(lon, **print_ops) )
        logger.debug("   lat                     = %s", np.array2string(lat, **print_ops) )

        # Loop over the emission categories
        for i_source, source in enumerate(sources):

            # Get the path to the current source and print it
            path = region+'/'+source+'/'
            logger.debug("   source                  = %s", source)
            logger.debug("   path                    = %s", path  )

            # Check the array sizes
            if( (nc_id[path+'adj_emis'].shape[1] != n_lat) or (nc_id[path+'adj_emis'].shape[2] != n_lon) ):
                logger.critical("")
                logger.critical("*"*30)
                logger.critical(PROG_NAME+" => ERROR: shape of emission category does not comply with TM5 grid!")
                logger.critical("   n_lon          = %s", n_lon                        )
                logger.critical("   n_lat          = %s", n_lat                        )
                logger.critical("   source         = %s", source                       )
                logger.critical("   path           = %s", path                         )
                logger.critical("   adj_emis.shape = %s", nc_id[path+'adj_emis'].shape )
                logger.critical("*"*30)
                logger.critical("")
                raise CifRuntimeError
            # end if

            # Read the adjoint emissions. Units = 1, long_name = 'adjoint emission factors'.
            # Note that in adj_emissions.nc4, only the "total" source should be present,
            # and therefore that i_source can only be 0. If that is not the case, adj_emis
            # should be defined previously and filled here. The way it is coded now adj_emis
            # will be overwritten for each source. See read.py for an example.
            if( i_source != 0 ):
                logger.critical("")
                logger.critical("*"*30)
                logger.critical(PROG_NAME+" => ERROR: multiple sources present!"       )
                logger.critical("   i_source       = %s", i_source                     )
                logger.critical("   source         = %s", source                       )
                logger.critical("   path           = %s", path                         )
                logger.critical("   adj_emis.shape = %s", nc_id[path+'adj_emis'].shape )
                logger.critical("*"*30)
                logger.critical("")
                raise CifRuntimeError
            # end if
            if( (nc_id[path+'adj_emis'].units     != "1"                       ) or \
                (nc_id[path+'adj_emis'].long_name != "adjoint emission factors") ):
                logger.critical("")
                logger.critical("*"*30)
                logger.critical(PROG_NAME+" => ERROR: unknown units or long_name attribute!"   )
                logger.critical("   i_source           = %s", i_source                         )
                logger.critical("   source             = %s", source                           )
                logger.critical("   path               = %s", path                             )
                logger.critical("   adj_emis.shape     = %s", nc_id[path+'adj_emis'].shape     )
                logger.critical("   adj_emis.units     = %s", nc_id[path+'adj_emis'].units     )
                logger.critical("   adj_emis.long_name = %s", nc_id[path+'adj_emis'].long_name )
                logger.critical("*"*30)
                logger.critical("")
                raise CifRuntimeError
            #
            adj_emis = nc_id[path+'adj_emis'][()]

        # end for source

    # end for region

    # Close file
    nc_id.close()

    # Log debug message
    logger.debug("   adj_emis.shape        = %s", adj_emis.shape)

    # <<< Read netCDF file with TM5 emissions



    # ... final debug statements ...
    logger.debug("*"*30)
    logger.debug("")

    #logger.debug("")
    #logger.debug("*"*30)
    #logger.debug(PROG_NAME+" => Computer says no!")
    #logger.debug("*"*30)
    #logger.debug("")
    #try:
    #    raise RuntimeError
    #except RuntimeError as e:
    #    #logger.exception("OOPS!")
    #    logger.critical(e, exc_info=True)
    #    #raise     # => Will display the traceback on screen a second time
    #    sys.exit() # => Just exit.
    ## end try



    # Return the adj_emis array to the calling function.
    return adj_emis


# end function read_adj