"""
This is a template to implement a new observation parser plugin.
It include all required functions to run properly.
It loops over existing files and generate pseudo data as if they were parsed.
The files can be any format, they are just used as a basis for looping.
.. warning::
Please gradually document your plugin properly when starting from the template.
It includes :bash:`input_arguments` (see :doc:`here</contrib_doc>` for details),
as well as all information about the original data the plugin is supposed to
accommodate.
Please include licensing information, permanent link to download the data (or a
contact person if no link is publicly available), as well as data format (temporal
and horizontal resolution, names and shape of the data files), and any specific treatment
that prevents the plugin from working with another type of files.
"""
from logging import info
import numpy as np
import pandas as pd
_name = "template"
_fullname = "Template plugin for observation parsers"
input_arguments = {
"parameter": {
"doc": "Name of the species for which to generate observations",
"default": "CO2",
"accepted": str,
},
"obs_min": {
"doc": "Lower range for generating random observations",
"default": 380,
"accepted": float,
},
"obs_max": {
"doc": "Lower range for generating random observations",
"default": 450,
"accepted": float,
},
"nstations": {
"doc": "Number of stations to randomly distribute",
"default": 5,
"accepted": int,
},
"xmin": {
"doc": "West border of the domain on which to distribute stations",
"default": -180,
"accepted": float,
},
"xmax": {
"doc": "East border of the domain on which to distribute stations",
"default": 180,
"accepted": float,
},
"ymin": {
"doc": "South border of the domain on which to distribute stations",
"default": -90,
"accepted": float,
},
"ymax": {
"doc": "North border of the domain on which to distribute stations",
"default": 90,
"accepted": float,
},
}
[docs]
def do_parse(self, obs_file, **kwargs):
"""Parse function for a file from template observations
Args:
obs_file (str) :
Path to input file
Returns:
pandas.DataFrame :
Dataframe from input file df[parameter][station]
"""
info("Generating random values from observation file: {}".format(obs_file))
# Pick random locations for x and y within the random domain
zmax = 100
nstat = 5
statx = np.random.uniform(low=self.xmin, high=self.xmax, size=nstat)
staty = np.random.uniform(low=self.ymin, high=self.ymax, size=nstat)
statz = np.random.uniform(low=1, high=zmax, size=nstat)
drange = pd.date_range(self.datei, self.datef, freq="1h")
ndates = drange.size
seconds_duration = 3600
# Generate a pandas.DataFrame with expected data
df = pd.DataFrame(
{
"alt": np.array(ndates * list(statz)),
"lat": np.array(ndates * list(staty)),
"lon": np.array(ndates * list(statx)),
"obs": np.random.uniform(self.obs_min, self.obs_max, size=ndates * nstat),
"obserror": np.random.uniform(
0, 0.1 * abs(self.obs_max - self.obs_min), size=ndates * nstat
),
"station": ndates * list(range(nstat)),
"network": ndates * nstat * ["random"],
"parameter": self.parameter,
"duration": seconds_duration,
"date": np.array(nstat * list(drange))
.reshape((ndates, nstat), order="F")
.flatten(),
}
)
return df