Source code for pycif.plugins.datastreams.fields.lmdz_chemfield_reg.fetch
from __future__ import annotations
import datetime
import os
from os import PathLike
from pathlib import Path
import pandas as pd
from .....utils import path
# pylint: disable=unused-argument
[docs]
def fetch(
ref_dir: str | PathLike,
ref_file: str | PathLike,
input_interval: tuple[datetime.datetime, datetime.datetime],
target_dir: str | PathLike,
tracer: object | None = None,
**kwargs,
) -> tuple[
dict[datetime.datetime, list[str | PathLike]],
dict[datetime.datetime, list[tuple[datetime.datetime, datetime.datetime]]],
]:
if not ref_dir and not ref_file:
return {}, {}
# Reshape input interval to include full months
date_i, date_f = input_interval
file_freq = tracer.file_freq # type: ignore
# Getting file dates
file_dates = pd.date_range(date_i, date_f, freq=file_freq, inclusive="left")
if file_dates.empty:
file_dates = pd.to_datetime([date_i])
if file_dates[0] > date_i:
file_dates = pd.to_datetime([date_i] + file_dates.to_list())
# Getting files paths
file_paths = [Path(ref_dir, date.strftime(ref_file)) for date in file_dates]
list_dates = {}
list_files = {}
for date, source_path in zip(file_dates, file_paths):
if not source_path.is_file():
raise FileNotFoundError(f"file '{source_path}' not found")
# Fetching
target_path = os.path.join(target_dir, os.path.basename(source_path))
path.link(source_path, target_path)
# Timestamps (assume monthly files with daily resolution)
period_start = pd.date_range(date, periods=date.days_in_month, freq="1D")
period_end = period_start + pd.offsets.Hour(24)
# pylint: disable=no-member
date = date.to_pydatetime()
period_start = period_start.to_pydatetime() # type: ignore
period_end = period_end.to_pydatetime()
list_dates[date] = list(zip(period_start, period_end))
list_files[date] = len(period_start) * [str(target_path)]
return list_files, list_dates