Source code for pycif.plugins.obsparsers.verify.headers

# -*- coding: utf-8 -*-

from __future__ import print_function
from __future__ import absolute_import
from .utils import remap_extract, find_header
import os
import glob

from ....utils import check as check


[docs] def get_header(obs_file, maxlen): """Extract the header from a VERIFY format observation file Args: obs_file (str): path to input file maxlen (int): abort after this amount of lines when reading header. Default 300 Returns: List[str]: List with all Lines of the Header """ # return empty if not a file if not os.path.isfile(obs_file): return [] with open(obs_file, "r") as input_file: lines = [] nheader = 8 # Accepts formats with 'CXX' terminating the header # or with the number of HEADER LINES specified explicitly in the header for line in input_file: lines.append(line.strip()) if len(lines) > maxlen: break if len(lines) > nheader: break if not lines: return [] # if the number of line was not found, tries to find it # on the first line of the document return lines[:nheader]
[docs] def parse_header(header, spec, list_extract, default_unit='ppm', default_tz='utc'): """Extract information from the header Args: header (list[str]): extracted header spec (str): species to extract list_extract (list[str]): list of parameters to return 'flag' to extract flag 'error' to extract observation error any other parameter appearing in the columns default_unit (str): default unit generally used to report this species default_tz (str): default time zone for this file Returns: a 4-element tuple containing - names (list[str]): list of columns names to extract - columns (list[int]): list of column index to extract - date_ids (list[int]): list of column ids for date information - extra (dict): extra information contained in the header and not in the body of the file, e.g., altitude, coordinates, unit, etc. """ # Minimize all characters to facilitate comparisons head = [s.lower() for s in header[-1].split()] # Parsing time information try: date_ids = [head.index('year'), head.index('month'), head.index('day')] except: print(header) print(head) raise ValueError("Cant find a date in this VERIFY file. " \ "Please check format") if 'hour' in head: date_ids += [head.index('hour'), head.index('minute'), head.index('second')] # Getting other parameters using the utils.remap_extract function columns = [] names = [] extra = {} for id_extract in list_extract: try: # First look into columns names columns.append(head.index(remap_extract(id_extract))) names.append(id_extract.lower()) except: try: # Some files have a name with CH4_Air instead of CH4 columns.append( head.index( remap_extract(id_extract) + '_air')) names.append(id_extract.lower()) except: try: # Look into the header id_value = find_header(id_extract, header) extra[id_extract.lower()] = id_value except Exception as e: # If cannot find, # assume default values for unit and timezone check.verbose("Cant extract " + id_extract) if id_extract == 'units': extra[id_extract] = default_unit elif id_extract == 'time': extra[id_extract] = default_tz return names, columns, date_ids, extra