Source code for pycif.plugins.datastreams.fields.lmdz_chemfield_reg.fetch

from __future__ import annotations

import datetime
import os
from os import PathLike
from pathlib import Path

import pandas as pd

from .....utils import path


# pylint: disable=unused-argument
[docs] def fetch( ref_dir: str | PathLike, ref_file: str | PathLike, input_interval: tuple[datetime.datetime, datetime.datetime], target_dir: str | PathLike, tracer: object | None = None, **kwargs, ) -> tuple[ dict[datetime.datetime, list[str | PathLike]], dict[datetime.datetime, list[tuple[datetime.datetime, datetime.datetime]]], ]: if not ref_dir and not ref_file: return {}, {} # Reshape input interval to include full months date_i, date_f = input_interval file_freq = tracer.file_freq # type: ignore # Getting file dates file_dates = pd.date_range(date_i, date_f, freq=file_freq, inclusive="left") if file_dates.empty: file_dates = pd.to_datetime([date_i]) if file_dates[0] > date_i: file_dates = pd.to_datetime([date_i] + file_dates.to_list()) # Getting files paths file_paths = [Path(ref_dir, date.strftime(ref_file)) for date in file_dates] list_dates = {} list_files = {} for date, source_path in zip(file_dates, file_paths): if not source_path.is_file(): raise FileNotFoundError(f"file '{source_path}' not found") # Fetching target_path = os.path.join(target_dir, os.path.basename(source_path)) path.link(source_path, target_path) # Timestamps (assume monthly files with daily resolution) period_start = pd.date_range(date, periods=date.days_in_month, freq="1D") period_end = period_start + pd.offsets.Hour(24) # pylint: disable=no-member date = date.to_pydatetime() period_start = period_start.to_pydatetime() # type: ignore period_end = period_end.to_pydatetime() list_dates[date] = list(zip(period_start, period_end)) list_files[date] = len(period_start) * [str(target_path)] return list_files, list_dates