Source code for pycif.plugins.datastreams.fluxes.GFEDv4.fetch

import datetime
import glob
import os
from netCDF4 import Dataset
import pandas as pd
import xarray as xr
import numpy as np

from .....utils import path
from .....utils.dates import date_range
from logging import debug
from .....utils.check.errclass import CifError



[docs]
def fetch(ref_dir, ref_file, input_interval, target_dir,
          tracer=None, component=None, **kwargs):
    """
    Fetch files and dates for GFED4.

    Builds candidate yearly file dates at ``tracer.file_freq``; for each
    existing file, generates the sub-annual date pairs at the resolution
    matching ``tracer.temporal_fraction`` (monthly, daily, or 3-hourly for
    ``"diurnal"``) and links the file into ``target_dir``.

    Args:
        ref_dir (str): the path to the input files
        ref_file (str): format of the input files
        input_interval (list): simulation interval (start and end dates)
        target_dir (str): where to copy
        tracer: the tracer Plugin, corresponding to the paragraph
            :bash:`datavect/components/fluxes/parameters/my_species` in the
            configuration yaml; can be needed to fetch extra information
            given by the user
        component: the component Plugin, same as tracer; corresponds to the paragraph
            :bash:`datavect/components/fluxes` in the configuration yaml

    Return:
        list_files: for each date that begins a period, an array containing
            the names of the files that are available for the dates within this period
        list_dates: for each date that begins a period, an array containing
            the names of the dates matching the files listed in list_files

    Raises:
        CifError: if ``tracer.temporal_fraction`` is not one of
            ``"monthly"``, ``"daily"`` or ``"diurnal"``.
    """

    # List of possible dates
    datei, datef = input_interval
    list_period_dates = \
        date_range(datei, datef, period=tracer.file_freq, close="")

    # Loop over dates
    list_files = {}
    list_dates = {}
    valid_files = []
    for dd in list_period_dates:
        file = dd.strftime(f"{ref_dir}/{ref_file}")
        if not os.path.isfile(file) or file in valid_files:
            continue

        # Force dates to be full years
        dd0 = datetime.datetime(dd.year, 1, 1)
        dd1 = datetime.datetime(dd.year + 1, 1, 1)

        # Split monthly/daily/diurnal fraction according to yaml configuration
        tfrac = tracer.temporal_fraction
        if tfrac == "monthly":
            freq = "1MS"
        elif tfrac == "daily":
            freq = "D"
        elif tfrac == "diurnal":
            freq = "3h"
        else:
            raise CifError(
                f"The specified temporal fraction {tfrac} is not accepted by GFED. Please check your yaml file")

        dates = pd.date_range(dd0, dd1, freq=freq).values[:, np.newaxis]

        # Build the output dictionary
        out_dates = np.concatenate([dates[:-1], dates[1:]],
                                   axis=1)
        list_dates[dd] = [list(d) for d in out_dates]
        list_files[dd] = len(out_dates) * [file]

        # Fetching
        target_file = f"{target_dir}/{os.path.basename(file)}"
        path.link(file, target_file)

        valid_files.append(file)

    return list_files, list_dates