Source code for pycif.plugins.datastreams.fluxes.edgar_v5.fetch
import datetime
import glob
import os
import pandas as pd
import xarray as xr
import numpy as np
import glob
from netCDF4 import Dataset, num2date
from .....utils import path
from logging import info, debug
[docs]
def fetch(ref_dir, ref_file, date_interval, target_dir,
tracer=None, **kwargs):
# Reshape input interval to include full years
datei, datef = date_interval
datei = datetime.datetime(year=datei.year, month=1, day=1)
datef = datetime.datetime(year=datef.year + 1, month=1, day=1)
list_dates = pd.date_range(datei, datef, freq="1YS")
# Find all available dates matching the format provided by the user
list_dates_avail = np.array([
d.to_pydatetime()
for d in pd.date_range("1900", "2100", freq="1YS")
if os.path.isfile(
d.strftime("{}/{}".format(ref_dir, ref_file)))
])
list_files_avail = np.array([
d.strftime("{}/{}".format(ref_dir, ref_file))
for d in pd.date_range("1900", "2100", freq="1YS")
if os.path.isfile(
d.strftime("{}/{}".format(ref_dir, ref_file)))
])
list_files_avail = list_files_avail[np.argsort(list_dates_avail)]
list_dates_avail = np.sort(list_dates_avail)
# Loop over years to find correct file
tmp_files = {}
tmp_dates = {}
for dd in list_dates:
deltas = dd - list_dates_avail
if np.any(deltas >= datetime.timedelta(0)):
delta_max = np.min(deltas[deltas >= datetime.timedelta(0)])
indout = np.where(deltas == delta_max)[0][0]
else:
indout = 0
file = list_files_avail[indout]
# Check if "time" is in variables
# Do not do it with open_dataset which is slow with big files...
with Dataset(file, "r") as f:
available_time = "time" in f.variables
if available_time:
debug("Fetching times for date {} from {}".format(dd, file))
with Dataset(file, "r") as f:
times = f.variables["time"]
dates = num2date(times[:], times.units,
only_use_python_datetimes=True,
only_use_cftime_datetimes=False)
dt = np.unique(np.diff(dates))[0]
tmp_dates[dd] = [[d - dt, d] for d in dates]
tmp_files[dd] = len(tmp_dates[dd]) * [file]
else:
tmp_dates[dd] = [[dd.to_pydatetime(),
datetime.datetime(dd.year + 1, 1, 1)]]
tmp_files[dd] = [file]
# Fetch to datavect
f = list_files_avail[indout]
target_file = "{}/{}".format(target_dir, os.path.basename(f))
path.link(f, target_file)
return tmp_files, tmp_dates