Source code for pycif.plugins.datastreams.fluxes.CAMSREG_nc.fetch

import datetime
import glob
import os
import pandas as pd

import numpy as np

from pycif.utils import path
from .utils import find_valid_file



[docs]
def fetch(ref_dir, ref_file, input_interval, target_dir, tracer=None, **kwargs):
    """Fetch TNO/CAMS-REG forecast-style files covering the simulation interval.

    Iterates daily over ``input_interval``; for each day, brackets the
    nearest valid file within a 3-hour window in the current (and, at month
    boundaries, adjacent) directory using
    :func:`~pycif.plugins.datastreams.fluxes.CAMSREG_nc.utils.find_valid_file`,
    links the found file into ``target_dir``, and builds hourly date
    sub-intervals for that day.

    Args:
        ref_dir (str): directory where the original files are found.
        ref_file (str): (template) name of the original files.
        input_interval (list): simulation interval, as a list of the two
            bounding dates.
        target_dir (str): directory where links to the original files are
            created.
        tracer: the tracer Plugin, corresponding to the paragraph
            :bash:`datavect/components/fluxes/parameters/my_species` in the
            configuration yaml.
        **kwargs: unused, kept for interface compatibility.

    Returns:
        (dict, dict): ``list_files`` and ``list_dates``.

        list_files: for each date that begins a period, a list containing
            the names of the files that are available for the dates within
            this period.
        list_dates: for each date that begins a period, a list containing
            the date intervals matching the files listed in ``list_files``.
    """

    list_period_dates = pd.date_range(input_interval[0], input_interval[1], freq="1D")
    list_dates = {}
    list_files = {}
    for dd in list_period_dates:
        dir_dd = dd.strftime(ref_dir)
        dir_dd_next = (dd + datetime.timedelta(hours=1)).strftime(ref_dir)
        dir_dd_previous = (dd - datetime.timedelta(hours=1)).strftime(ref_dir)
        files_3d, dates_3d = find_valid_file(dir_dd, ref_file, dd, dir_dd_next,
                                             ref_dir_previous=dir_dd_previous)
        list_hours = pd.date_range(dd, dd + datetime.timedelta(hours=23),
                                   freq="1h")

        if os.path.isfile(files_3d[0]):
            # list_dates[dd] = [[dd, dd + datetime.timedelta(hours=1)]]
            # list_files[dd] = [files_3d]* len(list_dates[dd])
            list_dates[dd] = [[hh, hh + datetime.timedelta(hours=1)]
                              for hh in list_hours]
            list_files[dd] = len(list_hours) * [files_3d]
            # the  to fetch is a forecast
            local_files = []
            target_file = f"{target_dir}/{dd.strftime(ref_file)}"
            path.link(files_3d[0], target_file)
            local_files.append(target_file)

    return list_files, list_dates