Source code for pycif.plugins.obsparsers.wdcgg.headers

# -*- coding: utf-8 -*-

from logging import info
from .utils import remap_extract, find_header


[docs] def get_header(obs_file, maxlen): """Extract the header from a WDCGG File Args: obs_file (str): path to input file maxlen (int): abort after this amount of lines when reading header. Default 300 Returns: List[str]: List with all Lines of the Header """ with open(obs_file, "r") as input_file: lines = [] nheader = 0 # Accepts formats with 'CXX' terminating the header # or with the number of HEADER LINES specified explicitly in the header for line in input_file: lines.append(line.strip()) if "CXX" in line: return lines if "HEADER LINES" in line: nheader = int(line.split(":")[1]) if len(lines) > maxlen: break if not lines: return [] # if the number of line was not found, tries to find it # on the first line of the document if nheader == 0: try: nheader = int(lines[0].split()[1]) except BaseException: raise ValueError( "More than {} Header Lines in WDCGG File. " "Is it a WDCGG file?".format(maxlen) ) return lines[:nheader]
[docs] def parse_header( header, spec, list_extract, default_unit="ppm", default_tz="utc" ): """Extract information from the header Args: header (list[str]): extracted header spec (str): species to extract list_extract (list[str]): list of parameters to return 'flag' to extract flag 'error' to extract observation error any other parameter appearing in the columns default_unit (str): default unit generally used to report this species default_tz (str): default time zone for this file Returns: a 4-element tuple containing - names (list[str]): list of columns names to extract - columns (list[int]): list of column index to extract - date_ids (list[int]): list of column ids for date information - extra (dict): extra information contained in the header and not in the body of the file, e.g., altitude, coordinates, unit, etc. """ # Minimize all characters to facilitate comparisons head = [s.lower() for s in header[-1].split()[1:]] # Parsing time information try: date_ids = [head.index("date")] except BaseException: info(header) info(head) raise ValueError( "Cant find a date in this WDCGG file. " "Please check format" ) if "time" in head: date_ids.append(head.index("time")) # Getting other parameters using the utils.remap_extract function columns = [] names = [] extra = {} for id_extract in list_extract: try: # First look into columns names columns.append(head.index(remap_extract(id_extract))) names.append(id_extract.lower()) except BaseException: try: # Some files have a name with CH4_Air instead of CH4 columns.append(head.index(remap_extract(id_extract) + "_air")) names.append(id_extract.lower()) except BaseException: try: # Look into the header id_value = find_header(id_extract, header) extra[id_extract.lower()] = id_value except Exception as e: # If cannot find, # assume default values for unit and timezone info("Cant extract " + id_extract) if id_extract == "unit": extra[id_extract] = default_unit elif id_extract == "tz": extra[id_extract] = default_tz else: extra[id_extract] = None return names, columns, date_ids, extra