Source code for pycif.plugins.obsparsers.template

"""
This is a template to implement a new observation parser plugin.
It include all required functions to run properly.

It loops over existing files and generate pseudo data as if they were parsed.
The files can be any format, they are just used as a basis for looping.

.. warning::

    Please gradually document your plugin properly when starting from the template.
    It includes :bash:`input_arguments` (see :doc:`here</contrib_doc>` for details),
    as well as all information about the original data the plugin is supposed to
    accommodate.

    Please include licensing information, permanent link to download the data (or a
    contact person if no link is publicly available), as well as data format (temporal
    and horizontal resolution, names and shape of the data files), and any specific treatment
    that prevents the plugin from working with another type of files.


"""

from logging import info

import numpy as np
import pandas as pd

_name = "template"
_fullname = "Template plugin for observation parsers"

input_arguments = {
    "parameter": {
        "doc": "Name of the species for which to generate observations",
        "default": "CO2",
        "accepted": str,
    },
    "obs_min": {
        "doc": "Lower range for generating random observations",
        "default": 380,
        "accepted": float,
    },
    "obs_max": {
        "doc": "Lower range for generating random observations",
        "default": 450,
        "accepted": float,
    },
    "nstations": {
        "doc": "Number of stations to randomly distribute",
        "default": 5,
        "accepted": int,
    },
    "xmin": {
        "doc": "West border of the domain on which to distribute stations",
        "default": -180,
        "accepted": float,
    },
    "xmax": {
        "doc": "East border of the domain on which to distribute stations",
        "default": 180,
        "accepted": float,
    },
    "ymin": {
        "doc": "South border of the domain on which to distribute stations",
        "default": -90,
        "accepted": float,
    },
    "ymax": {
        "doc": "North border of the domain on which to distribute stations",
        "default": 90,
        "accepted": float,
    },
}



[docs]
def do_parse(self, obs_file, **kwargs):
    """Parse function for a file from template observations

    Args:
        obs_file (str) :
            Path to input file

    Returns:
        pandas.DataFrame :
            Dataframe from input file df[parameter][station]

    """

    info("Generating random values from observation file: {}".format(obs_file))

    # Pick random locations for x and y within the random domain
    zmax = 100

    nstat = 5
    statx = np.random.uniform(low=self.xmin, high=self.xmax, size=nstat)
    staty = np.random.uniform(low=self.ymin, high=self.ymax, size=nstat)
    statz = np.random.uniform(low=1, high=zmax, size=nstat)

    drange = pd.date_range(self.datei, self.datef, freq="1h")
    ndates = drange.size
    seconds_duration = 3600

    # Generate a pandas.DataFrame with expected data
    df = pd.DataFrame(
        {
            "alt": np.array(ndates * list(statz)),
            "lat": np.array(ndates * list(staty)),
            "lon": np.array(ndates * list(statx)),
            "obs": np.random.uniform(self.obs_min, self.obs_max, size=ndates * nstat),
            "obserror": np.random.uniform(
                0, 0.1 * abs(self.obs_max - self.obs_min), size=ndates * nstat
            ),
            "station": ndates * list(range(nstat)),
            "network": ndates * nstat * ["random"],
            "parameter": self.parameter,
            "duration": seconds_duration,
            "date": np.array(nstat * list(drange))
            .reshape((ndates, nstat), order="F")
            .flatten(),
        }
    )

    return df