Source code for pycif.plugins.datastreams.fluxes.orchidee.fetch
import datetime
import glob
import os
from netCDF4 import Dataset
import pandas as pd
import xarray as xr
import numpy as np
from .....utils import path
from .....utils.dates import date_range
from logging import debug
[docs]
def fetch(ref_dir, ref_file, input_interval, target_dir,
tracer=None, component=None, **kwargs):
"""
Fetch files and dates for ORCHIDEE.
Args:
ref_dir (str): the path to the input files
ref_file (str): format of the input files
input_interval (list): simulation interval (start and end dates)
target_dir (str): where to copy
tracer: the tracer Plugin, corresponding to the paragraph
:bash:`datavect/components/fluxes/parameters/my_species` in the
configuration yaml; can be needed to fetch extra information
given by the user
component: the component Plugin, same as tracer; corresponds to the paragraph
:bash:`datavect/components/fluxes` in the configuration yaml
Return:
list_files: for each date that begins a period, an array containing
the names of the files that are available for the dates within this period
list_dates: for each date that begins a period, an array containing
the names of the dates matching the files listed in list_files
"""
# List of possible dates
datei, datef = input_interval
list_period_dates = \
date_range(datei, datef, period=tracer.file_freq, close="")
# Loop over dates
list_files = {}
list_dates = {}
valid_files = []
for dd in list_period_dates:
file = dd.strftime("{}/{}".format(ref_dir, ref_file))
if not os.path.isfile(file) or file in valid_files:
continue
# Read the file to fetch dates
with Dataset(file, "r") as f:
isvar = "time" in f.variables
dates = xr.open_dataset(file)["time"].values[:, np.newaxis]
# Define dates if not a variable
if not isvar:
dates = pd.date_range(dd, periods=len(dates),
freq=tracer.timeresol).values[:, np.newaxis]
freq = np.unique(np.diff(dates.flatten()))
if freq.size != 1:
raise Exception("Couldn't extract a fixed frequency from {}. "
"Please check the file 'time' variable"
.format(file))
# Shift dates if in variables as the middle of periods is specified
if isvar:
dates -= freq / 2
# Keep only dates needed for the period
mask = (dates >= np.datetime64(input_interval[0])) \
& (dates <= np.datetime64(input_interval[1]) + freq)
dates = dates[mask.flatten()]
# Build the output dictionary
out_dates = np.concatenate([dates, dates + freq[0]],
axis=1)
# Interpolate to new resolution
if hasattr(tracer, "interpol_resolution"):
out_dates = pd.date_range(
out_dates.min(), out_dates.max(),
freq=tracer.interpol_resolution)
out_dates = np.array(
[[d0, d1] for d0, d1 in zip(out_dates[:-1], out_dates[1:])])
list_dates[dd] = [list(d) for d in out_dates]
list_files[dd] = len(out_dates) * [file]
# Fetching
target_file = "{}/{}".format(target_dir, os.path.basename(file))
path.link(file, target_file)
valid_files.append(file)
return list_files, list_dates