Source code for pycif.plugins.datastreams.fields.netcdf_cams.fetch
import os
import numpy as np
import pandas as pd
import datetime
from netCDF4 import Dataset
from logging import debug
from .....utils import path
[docs]
def fetch(ref_dir, ref_file, input_interval, target_dir, tracer=None,
component=None):
"""
Reads netcdf files from CAMS for a given species.
One file per month.
Args
-----
- ref_dir: directory where the original files are found
- ref_file: (template) name of the original files
- input_interval: list of two dates: the beginning and end of the simulation
- target_dir: directory where the links to the orginal files are created
Returns
---------
- list_dates
- list_files
"""
# Force the dates to include full months
datei, datef = input_interval
datei = datetime.datetime(year=datei.year, month=datei.month, day=1)
datef = datetime.datetime(year=datef.year, month=datef.month, day=1)
datef = datef + \
datetime.timedelta(
days=int(pd.DatetimeIndex([datef]).days_in_month[0]))
list_period_dates = \
pd.date_range(datei, datef, freq=tracer.file_freq, inclusive="left")
list_files = {}
list_dates = {}
for dd in list_period_dates:
file = dd.strftime("{}/{}".format(ref_dir, ref_file))
debug(f"Reading CAMS data for {dd} in file {file}")
# Fetch date frequency
with Dataset(file, "r") as f:
ntimes = f.dimensions["time"].size
# to_timedelta does not work with all frequencies!
datef = dd + \
datetime.timedelta(
days=int(pd.DatetimeIndex([dd]).days_in_month[0]))
list_hours = pd.date_range(dd, datef, periods=ntimes + 1)
list_dates[dd] = [[hh0, hh1]
for hh0, hh1 in zip(list_hours[:-1], list_hours[1:])]
list_files[dd] = (len(list_hours) * [file])
target_file = "{}/{}".format(target_dir, os.path.basename(file))
path.link(file, target_file)
return list_files, list_dates