Source code for pycif.plugins.datastreams.fields.netcdf_cams.fetch

import os
import numpy as np
import pandas as pd
import datetime
from netCDF4 import Dataset
from logging import debug
from .....utils import path
from .....utils.hdf5 import _hdf5_lock



[docs]
def fetch(ref_dir, ref_file, input_interval, target_dir, tracer=None,
          component=None):
    """Fetch monthly CAMS NetCDF files and their sub-interval time steps.

    Forces the requested period to full months, then for each month
    (stepped at ``tracer.file_freq``) reads the number of time steps in
    the corresponding file to build one sub-interval per time step, and
    links the file into ``target_dir``.

    Args:
        ref_dir: directory where the original files are found.
        ref_file: (template) name of the original files.
        input_interval: list of two dates, the beginning and end of the
            simulation.
        target_dir: directory where links to the original files are
            created.
        tracer: the fields Plugin, giving access to ``file_freq``.
        component: unused, accepted for interface compatibility.

    Returns:
        list_files: for each monthly date, the file path repeated once
            per time step found in that file.
        list_dates: for each monthly date, the list of ``[start, end]``
            sub-intervals covered by that file's time steps.
    """

    # Force the dates to include full months
    datei, datef = input_interval
    datei = datetime.datetime(year=datei.year, month=datei.month, day=1)
    datef = datetime.datetime(year=datef.year, month=datef.month, day=1)
    datef = datef +  \
        datetime.timedelta(
            days=int(pd.DatetimeIndex([datef]).days_in_month[0]))
    list_period_dates = \
        pd.date_range(datei, datef, freq=tracer.file_freq, inclusive="left")

    list_files = {}
    list_dates = {}
    for dd in list_period_dates:
        file = dd.strftime(f"{ref_dir}/{ref_file}")
        debug(f"Reading CAMS data for {dd} in file {file}")

        # Fetch date frequency
        with _hdf5_lock:
            with Dataset(file, "r") as f:
                ntimes = f.dimensions["time"].size

        # to_timedelta does not work with all frequencies!
        datef = dd +  \
            datetime.timedelta(
                days=int(pd.DatetimeIndex([dd]).days_in_month[0]))

        list_hours = pd.date_range(dd, datef, periods=ntimes + 1)
        list_dates[dd] = [[hh0, hh1]
                          for hh0, hh1 in zip(list_hours[:-1], list_hours[1:])]
        list_files[dd] = (len(list_hours) * [file])

        target_file = f"{target_dir}/{os.path.basename(file)}"
        path.link(file, target_file)

    return list_files, list_dates