Source code for pycif.plugins.datavects.standard.fetch

import glob
import os
from logging import info

import pandas as pd
from ....utils import path


[docs] def default_fetch(ref_dir, ref_file, input_dates, target_dir, tracer=None, **kwargs): """This is the default fetch function when no plugin is given for a datastream Args: ref_dir (str): Path to the data ref_file (str): File format of the data input_dates (list[datetime.datetime]): Date range target_dir (str): Where to link the data tracer (_type_, optional): _description_. Defaults to None. Returns: (list_files, list_dates): tuple of dictionaries describing input dates and corresponding files """ ref_path = os.path.join(ref_dir, ref_file) info(f"Default fetching of input files from:\n{ref_path}") date_start, date_end = input_dates file_freq = getattr(tracer, "file_freq", "1D") file_freq = file_freq if file_freq else "1D" list_period_dates = pd.date_range(date_start, date_end, freq=file_freq) list_dates = {} list_files = {} for date in list_period_dates: path_list = glob.glob(date.strftime(ref_path)) for path_str in path_list: if not os.path.isfile(path_str): continue if date not in list_dates: list_dates[date] = [] list_files[date] = [] if path_str not in list_files[date]: list_dates[date].append([date, date]) list_files[date].append([path_str]) # Fetching target = os.path.join(target_dir, os.path.basename(path_str)) path.link(path_str, target) return list_files, list_dates