Source code for pycif.plugins.datastreams.fluxes.GFEDv4.read

import numpy as np
from logging import debug
import xarray as xr
import pandas as pd
import netCDF4
import h5py
from .....utils.check.errclass import CifError, CifKeyError



[docs]
def read(
        self,
        name,
        varnames,
        dates,
        files,
        interpol_flx=False,
        tracer=None,
        model=None,
        ddi=None,
        **kwargs
):
    """Get GFED4 fluxes, converted to trace-gas emissions, into a pyCIF array.

    Loads the per-ecosystem emission-factor table from
    ``tracer.file_emisfactors`` for the requested species (defaulting to
    1000 g/kgDM per ecosystem if not specified). For each requested
    date/file: reads the monthly dry-matter emissions
    (``/emissions/MM/DM``), converts them to a per-hour rate, computes a
    per-ecosystem contribution-weighted conversion factor from
    ``tracer.ecosystems`` and the per-source partitioning variables in the
    file, then applies daily and/or diurnal fractional scaling (depending on
    ``tracer.temporal_fraction``) read from the HDF file. Latitude is
    flipped to match the domain built by :mod:`get_domain`.

    Args:
        self: the fluxes Plugin.
        name (str): name of the component.
        varnames (str): original name of the variable to read; ``name`` is
            used if ``varnames`` is empty.
        dates (list): list of the date intervals to extract.
        files (list): list of the files matching ``dates``.
        interpol_flx (bool): unused, kept for interface compatibility.
        tracer: the tracer Plugin, giving access to ``file_emisfactors``,
            ``ecosystems`` and ``temporal_fraction``.
        model: unused, kept for interface compatibility.
        ddi: unused, kept for interface compatibility.
        **kwargs: unused, kept for interface compatibility.

    Returns:
        xr.DataArray: the flux data with dimensions
        ``(time, lev, lat, lon)``, in g-species/hour.

    Raises:
        CifError: if the requested species is not found in the emission
            factor file, or if a required ecosystem in ``tracer.ecosystems``
            has no matching emission factor.
        CifKeyError: if a daily or diurnal fraction variable required by
            ``tracer.temporal_fraction`` is missing from the input file.
    """

    var2extract = varnames if varnames != "" else name

    # Read emission factors
    ecosystems = ["SAVA", "BORF", "TEMF", "DEFO", "PEAT", "AGRI"]
    emis_factors = {k: 1000 for k in ecosystems}
    if hasattr(tracer, "file_emisfactors"):
        emis_factors = pd.read_csv(tracer.file_emisfactors, comment="#",
                                   header=None, sep=r"\s+", index_col=0,
                                   names=ecosystems)

        # Raise exception
        if var2extract not in emis_factors.index:
            raise CifError(
                f"Could not find the species {var2extract} in the emission factor file: {tracer.file_emisfactors}\nPlease consider specifying the variable name explicitly using 'varname' in your yaml if the parameter name is not an existing species"
            )

        emis_factors = emis_factors.loc[var2extract].to_dict()

    # Loop over dates/files and import data
    data = []
    out_dates = []
    ref_file = ""
    ref_month = -1
    ref_day = -1
    for dd, ff in zip(dates, files):
        debug(
            f"Reading the file {ff} for the date interval {dd}"
        )

        # Open HDF file
        if ff != ref_file:
            f = h5py.File(ff, "r")
            ref_file = ff

        # read in dry matter emissions
        month = dd[0].month
        year = dd[0].year
        month_str = f'{month:02d}'
        string = '/emissions/' + month_str + '/DM'

        if month != ref_month:
            ref_month = month

            # Dry matter emissions in kgDM/m2/month
            DM_emissions = f[string][:]

            # Convert to emissions per hour
            DM_emissions /= 24 * pd.DatetimeIndex([dd[0]]).days_in_month[0]

            # Compute conversion factor depending on contributions
            contrib_correction = 0
            for source in tracer.ecosystems:
                # Exception if required ecosystems not in emission factors
                if source not in emis_factors:
                    raise CifError(f"According to the yaml, I am expected to extract the contribution from the ecosystem {source}, whereas I have only the following ecosystems available: {emis_factors.keys()}\nPlease check your yaml")

                # read in the fractional contribution of each source
                string = f'/emissions/{month_str}/partitioning/DM_{source}'
                contribution = f[string][:]

                # Apply the emission factor per ecosystem from kgDM to g-species
                contrib_correction += contribution * emis_factors[source]

            DM_emissions *= contrib_correction

        # Load finer temporal resolution if
        tfrac = tracer.temporal_fraction
        if tfrac in ["daily", "diurnal"]:
            if ref_day != dd[0].day:
                daily_frac_str = f'emissions/{month_str}/daily_fraction/day_{dd[0].day}'
                if daily_frac_str not in f:
                    raise CifKeyError(
                        f"The variable {daily_frac_str} is not available in {ff} "
                        "whereas it is needed to extract daily fractions. "
                        "Use the parameter `temporal_fraction = monthly` to use "
                        "monthly values only, or fix your input file."
                    )

                daily_frac = \
                    f[f'emissions/{month_str}/daily_fraction/day_{dd[0].day}'][:]
                DM_emissions *= daily_frac
                ref_day = dd[0].day

        if tfrac == "diurnal":
            dirunal_frac_str = f'emissions/{month_str}/diurnal_cycle/UTC_{dd[0].hour}-{dd[0].hour + 3}h'
            if dirunal_frac_str not in f:
                raise CifKeyError(
                    f"The variable {dirunal_frac_str} is not available in {ff} "
                    "whereas it is needed to extract diurnal fractions. "
                    "Use the parameter `temporal_fraction = monthly` to use "
                    "monthly values only, or fix your input file."
                )

            diurnal_frac = \
                f[f'emissions/{month_str}/diurnal_cycle/UTC_{dd[0].hour}-{dd[0].hour + 3}h'][:]
            DM_emissions *= diurnal_frac

        # Save data and swap latitudes
        data.append(DM_emissions[::-1])
        out_dates.append(dd[0])

    # if only one level for emissions, create the axis
    dataout = np.array(data)[:, np.newaxis]
    dataout[np.isnan(dataout)] = 0
    xmod = xr.DataArray(
        dataout,
        coords={"time": out_dates},
        dims=("time", "lev", "lat", "lon"),
    )

    return xmod