Source code for pycif.plugins.datavects.standard.fetch
import glob
import os
from logging import info
import pandas as pd
from ....utils import path
[docs]
def default_fetch(ref_dir, ref_file, input_dates, target_dir, tracer=None, **kwargs):
"""This is the default fetch function when no plugin is given for a datastream
Args:
ref_dir (str): Path to the data
ref_file (str): File format of the data
input_dates (list[datetime.datetime]): Date range
target_dir (str): Where to link the data
tracer (_type_, optional): _description_. Defaults to None.
Returns:
(list_files, list_dates): tuple of dictionaries describing input dates and corresponding files
"""
ref_path = os.path.join(ref_dir, ref_file)
info(f"Default fetching of input files from:\n{ref_path}")
date_start, date_end = input_dates
file_freq = getattr(tracer, "file_freq", "1D")
file_freq = file_freq if file_freq else "1D"
list_period_dates = pd.date_range(date_start, date_end, freq=file_freq)
list_dates = {}
list_files = {}
for date in list_period_dates:
path_list = glob.glob(date.strftime(ref_path))
for path_str in path_list:
if not os.path.isfile(path_str):
continue
if date not in list_dates:
list_dates[date] = []
list_files[date] = []
if path_str not in list_files[date]:
list_dates[date].append([date, date])
list_files[date].append([path_str])
# Fetching
target = os.path.join(target_dir, os.path.basename(path_str))
path.link(path_str, target)
return list_files, list_dates