Source code for pycif.plugins.datastreams.fluxes.wrfchem.read

import numpy as np
import xarray as xr
import netCDF4 as nc

from .times_in_wrf_file import times_in_wrf_file
from .....utils.check.errclass import CifValueError
        

[docs]
def read(
        self,
        trcr,
        varname,
        in_dates,
        in_files,
        comp_type,
        tracer,
        model,
        ddi,
        **kwargs
):
    """Read fluxes from files and load them into a xarray.DataArray

    Inputs: 
    ---------
    Arguments as in plugins/transforms/system/fromcontrol/forward.py.
    trcr: string
        Name in the yaml
    varname: string
        Variable name in file. In yaml it's "varname". If it's not
        defined in the yaml it's empty here.
    in_dates: list of datetime.datetime objects
        All input dates
    in_files: list of strings
        For each in_dates, the file where the flux is found
    comp_type: string
        Name of component in yaml, i.e. level above parameter.
        Currently "flux" in my config_wrfchem_explanations.yml.
        Probably won't use it.
    tracer : Plugin flux
        Full flux object, with everything in the yaml. Use this
        instead of the self in read.py because that self can be
        modified. Same with write.
    model:  model plugin
        So you can use e.g. its domain subplugin
    ddi: datetime.datetime?
        Start of model period, probably won't use it.
    **kwargs:
        Ignore, will be removed.

    Ouputs:
    ---------
    xmod: xarray.DataArray
        Flux data.
        Dimensions: (time, vertical levels, latitude, longitude)
        Coordinates in increasing order.

    VERSION HISTORY (before incremental git commit messages)
    2021-09-20   freum   Wrote it.
    2021-09-17   freum   Changed arguments to the ones in
                         pycif/plugins/transforms/system/fromcontrol/forward.py
    2021-08-20   freum   Original code from flux_plugin_template, put a
                         NotImplementedError at the beginning.
                       

    """

    fluxes = []
    dates = []
    # Loop over each input date
    for in_period, in_file in zip(in_dates, in_files):
        
        # Get time index in file to read 
        times = times_in_wrf_file(in_file)
        ind = [n for n, t in enumerate(times) if t==in_period[0]]
        # Complain if requested date not found exactly once in file
        if len(ind)!=1:
            fmt = "%Y-%m-%d %H:%M:%S"
            in_date_str = in_period[0].strftime(fmt)
            if len(ind)==0:
                # This case is probably impossible by design
                times_str_l = [time.strftime(fmt) for time in times]
                times_str = ", ".join(t_str_l)
                msg = "in_date {} not found in {}. Dates in file: {}."
                raise CifValueError(msg.format(in_date_str, in_file, times_str))
            elif len(ind)>1:
                msg = "in_date {} found more than once in file {}."
                raise CifValueError(msg.format(in_date_str, in_file))
        
        # Read data
        ncf = nc.Dataset(in_file, "r")
        dat = ncf[varname][ind, ...]
        ncf.close()
        
        # Check dimensions. Ignore levels for now (if I recall
        # correctly, the files don't have to match kemit exactly).
        domain = model.domain

        # In the case of the unstructured wrf domain, flatten the lat/lon
        # dimensions of the input data
        if getattr(domain, "unstructured_domain", False):
            dat = dat.reshape(dat.shape[:2] + (1, np.prod(dat.shape[2:])))

        dat_shape = [dat.shape[n] for n in [0, 2, 3]]
        required_shape = [1, domain.nlat, domain.nlon]
        if dat_shape!=required_shape:
            msg_fmt = "File {}, variable {}: Expected shape {}, got " + \
                "{} (levels are ignored here)."
            msg = msg_fmt.format(in_file, varname, str(required_shape),
                             str(dat_shape))
            raise CifValueError(msg)
        
        # Collect data into list
        fluxes.append(dat)
        dates.append(in_period[0])
                                 
    # Fill datastore
    xmod = xr.DataArray(
        np.concatenate(fluxes, axis=0),
        coords={"time": dates},
        dims=("time", "lev", "lat", "lon"),
    )

    return xmod