Source code for pycif.plugins.datastreams.fields.netcdf_cams.fetch

import os
import numpy as np
import pandas as pd
import datetime
from netCDF4 import Dataset
from logging import debug
from .....utils import path


[docs] def fetch(ref_dir, ref_file, input_interval, target_dir, tracer=None, component=None): """ Reads netcdf files from CAMS for a given species. One file per month. Args ----- - ref_dir: directory where the original files are found - ref_file: (template) name of the original files - input_interval: list of two dates: the beginning and end of the simulation - target_dir: directory where the links to the orginal files are created Returns --------- - list_dates - list_files """ # Force the dates to include full months datei, datef = input_interval datei = datetime.datetime(year=datei.year, month=datei.month, day=1) datef = datetime.datetime(year=datef.year, month=datef.month, day=1) datef = datef + \ datetime.timedelta( days=int(pd.DatetimeIndex([datef]).days_in_month[0])) list_period_dates = \ pd.date_range(datei, datef, freq=tracer.file_freq, inclusive="left") list_files = {} list_dates = {} for dd in list_period_dates: file = dd.strftime("{}/{}".format(ref_dir, ref_file)) debug(f"Reading CAMS data for {dd} in file {file}") # Fetch date frequency with Dataset(file, "r") as f: ntimes = f.dimensions["time"].size # to_timedelta does not work with all frequencies! datef = dd + \ datetime.timedelta( days=int(pd.DatetimeIndex([dd]).days_in_month[0])) list_hours = pd.date_range(dd, datef, periods=ntimes + 1) list_dates[dd] = [[hh0, hh1] for hh0, hh1 in zip(list_hours[:-1], list_hours[1:])] list_files[dd] = (len(list_hours) * [file]) target_file = "{}/{}".format(target_dir, os.path.basename(file)) path.link(file, target_file) return list_files, list_dates