Source code for pycif.plugins.chemistries.chimere.utils

import copy
import os
import re
from operator import xor
from os.path import exists, getsize

import numpy as np
import pandas as pd



[docs]
def create_mandchem(chemistry, mandatory_files):
    """Write the four mandatory CHIMERE chemistry text files from the YAML config.

    Writes (in order):

    1. ``REACTIONS.{schemeid}`` — one reaction string per line.
    2. ``PRESCRIBED_SPECIES.{schemeid}`` — prescribed species names.
    3. ``PRODLOSS_SPECIES.{schemeid}`` — production/loss species names.
    4. ``DEPO_SPEC.{schemeid}`` — depositing species names.

    Files are created empty when the corresponding species list is absent
    from the chemistry plugin.
from ....utils.check.errclass import CifError

    Args:
        chemistry: CHIMERE / TM5 chemistry plugin instance.
        mandatory_files (list[str]): four file paths in the order described
            above.
    """
    for mfile in mandatory_files:
        os.system(f"touch {mfile}")

    # Chemical reactions
    with open(mandatory_files[0], "w") as f:
        if chemistry.nreacs > 0:
            for attr in chemistry.reactions.attributes:
                reac = getattr(chemistry.reactions, attr)
                f.write(reac + "\n")

    if hasattr(chemistry, "prescrconcs"):
        with open(mandatory_files[1], "w") as f:
            for attr in chemistry.prescrconcs.attributes:
                f.write(attr + "\n")

    if hasattr(chemistry, "prodloss3d"):
        with open(mandatory_files[2], "w") as f:
            for attr in chemistry.prodloss3d.attributes:
                f.write(attr + "\n")

    if hasattr(chemistry, "deposition"):
        with open(mandatory_files[3], "w") as f:
            for attr in chemistry.deposition.attributes:
                f.write(attr + "\n")




[docs]
def create_optchem(chemistry, filer, fileps):
    """Build the derived CHIMERE scheme files from reactions and species lists.

    Reads ``REACTIONS`` and ``PRESCRIBED_SPECIES`` text files, parses
    stoichiometry and rate-coefficient strings, then writes:

    * ``STOICHIOMETRY`` — product stoichiometric coefficients.
    * ``CHEMISTRY`` — parsed reaction matrix (loss + product columns).
    * ``REACTION_RATES`` — reaction-rate type and parameter table.
    * ``PHOTO_RATES`` — photolysis reaction index ↔ J-rate name mapping.
    * ``PHOTO_PARAMETERS`` — photolysis altitude / zenith-angle table.
    * ``FAMILIES`` — empty (families not yet implemented).
    * ``DEPO_PARS`` / ``WETD_SPEC`` — empty deposition files.
    * ``ALL_SPECIES`` / ``ACTIVE_SPECIES`` / ``OUTPUT_SPECIES`` — species
      lists with type (``'ac'`` / ``'pr'``) and transport flags.
    * ``ANTHROPIC`` / ``BIOGENIC`` — emission species lists.

    Args:
        chemistry: CHIMERE chemistry plugin instance.
        filer (str): path to the ``REACTIONS`` text file.
        fileps (str): path to the ``PRESCRIBED_SPECIES`` text file.

    Returns:
        tuple[int, int]: ``(nallqmax, nphoto_rates)`` — total number of
        species and number of photolysis reactions.

    Raises:
        Exception: if any emitted / biogenic / output species is not in
            ``acspecies``, or if chemical families are requested.
    """

    workdir = chemistry.workdir
    dirchem_ref = chemistry.dirchem_ref
    mecachim = chemistry.schemeid

    files = f"{dirchem_ref}/STOICHIOMETRY.{mecachim}"
    filec = f"{dirchem_ref}/CHEMISTRY.{mecachim}"
    filerr = f"{dirchem_ref}/REACTION_RATES.{mecachim}"
    filej = f"{dirchem_ref}/PHOTO_RATES.{mecachim}"
    filephot = f"{dirchem_ref}/PHOTO_PARAMETERS.{mecachim}"
    filedpar = f"{dirchem_ref}/DEPO_PARS.{mecachim}"
    filewdep = f"{dirchem_ref}/WETD_SPEC.{mecachim}"
    filef = f"{dirchem_ref}/FAMILIES.{mecachim}"
    fileals = f"{dirchem_ref}/ALL_SPECIES.{mecachim}"
    fileas = f"{dirchem_ref}/ACTIVE_SPECIES.{mecachim}"
    fileout = f"{dirchem_ref}/OUTPUT_SPECIES.{mecachim}"
    fileanth = f"{dirchem_ref}/ANTHROPIC.{mecachim}"
    filebio = f"{dirchem_ref}/BIOGENIC.{mecachim}"

    os.system(f"rm -f {files} {filec} {filerr} {filef}")

    # Read prescribed species
    prescribed_species = np.array([])
    if exists(fileps) and getsize(fileps) > 0:
        df_prescr = pd.read_csv(
            fileps,
            index_col=False,
            header=None,
            comment="#",
            engine="python",
            sep="\s+",
        )
        prescribed_species = df_prescr[0]

    nonactive = np.append(prescribed_species, ["O2", "X", None])

    # Read reactions
    nphoto_rates = 0
    with open(filer, "r") as f:
        nlines = len(f.readlines())
    if nlines > 0:
        df_reac = pd.read_csv(
            filer,
            index_col=False,
            header=None,
            comment="#",
            engine="python",
            sep="\s+",
        )

        # Create stoechiometry and chemistry
        parts = df_reac[0].str.split("->", n=2, expand=True)
        left_hand_sides = parts[0]
        right_hand_sides = parts[1]
        losses = left_hand_sides.str.split("+", expand=True)
        nlosses = left_hand_sides.str.split("+").apply(len)
        losses.insert(0, -1, nlosses)
        prods = right_hand_sides.str.split("+", expand=True)
        nprods = pd.Series(np.sum(~pd.isnull(prods).values, axis=1))
        prods.insert(0, -1, nprods)

        prods_spec = np.copy(prods.values)
        stoichiometry = np.zeros((1, 6))
        for (i, j), value in np.ndenumerate(prods.values[:, 1:]):
            try:
                prods_list = prods.values[i, j + 1].split("*")
                if len(prods_list) > 1:
                    arr = np.array(
                        [
                            prods_list[1],
                            prods_list[0],
                            prods_list[0],
                            prods_list[0],
                            prods_list[0],
                            i + 1,
                        ]
                    )
                    stoichiometry = np.append(
                        stoichiometry, arr[np.newaxis, ...], axis=0
                    )

                if prods_list[-1] not in nonactive:
                    prods_spec[i, j + 1] = prods_list[-1]
                else:
                    prods_spec[i, j + 1] = None
                    prods_spec[i, 0] -= 1
            except AttributeError:
                pass

        file_chemistry = np.append(losses.values, prods_spec, axis=1)
        file_chemistry = pd.DataFrame(file_chemistry)
        stoichiometry = pd.DataFrame(stoichiometry[1:])

        # Create reactions
        reactions = df_reac[1].values
        reactions_rates, nphoto_rates, photo_rates = read_react(
            reactions, filej)
        reactions_rates = pd.DataFrame(reactions_rates)
        reactions_rates = reactions_rates.astype({0: 'int32'})
        chemistry.stoichiometry = stoichiometry
        chemistry.reactions_rates = reactions_rates
        chemistry.photo_rates = photo_rates

        # Create files to create
        stoichiometry.to_csv(files, sep=" ", header=False, index=False)
        file_chemistry.to_csv(filec, sep=" ", header=False, index=False)
        reactions_rates.to_csv(filerr, sep=" ", header=False, index=False)

    # If no reaction, make empty files
    else:
        open(files, "w").close()
        open(filec, "w").close()
        open(filerr, "w").close()
        open(filej, "w").close()

    # Make Families
    if chemistry.families.attributes != []:
        raise CifError("Families not implemented yet in pyCIF")

    open(filef, "w").close()

    # Make empty DEPO_PARS
    open(filedpar, "w").close()
    open(filewdep, "w").close()

    # Photolysis rates
    pparam = chemistry.photo_parameters
    pparam.nlevphot = len(pparam.altiphot)
    with open(filephot, "w") as f:
        f.write(
            " ".join(
                map(
                    str,
                    [pparam.ntabuzen, pparam.nphot, pparam.nwavel, pparam.nlevphot]
                    + pparam.altiphot,
                )
            )
        )

    # Active species
    chemistry.nspecies = len(chemistry.acspecies.attributes)

    # Create active_species (output_species) and all_species
    active_species = np.array(chemistry.acspecies.attributes)
    output_species = np.array(chemistry.outspecies.attributes)
    emis_species = np.array(chemistry.emis_species.attributes)
    bio_species = np.array(chemistry.bio_species.attributes)

    # Check that output/emis/bio species are active species
    if np.any(np.isin(emis_species, active_species, invert=True)):
        raise CifError(
            "Some species in emis_species are not active species. " "Check your yaml!"
        )

    if np.any(np.isin(bio_species, active_species, invert=True)):
        raise CifError(
            "Some species in bio_species are not active species. " "Check your yaml!"
        )

    if np.any(np.isin(output_species, active_species, invert=True)):
        raise CifError(
            "Some species in output_species are not active species. " "Check your yaml!"
        )

    # Fill active species with proper information
    all_species = np.append(active_species, prescribed_species, axis=0)
    type_spec = np.array([["type"]])
    acinfos = np.array([0, 0, 0])
    outinfos = np.array([0])
    type_spec = np.broadcast_to(type_spec, (all_species.shape[0], 1))
    acinfos = np.broadcast_to(acinfos, (active_species.shape[0], 3))
    outinfos = np.broadcast_to(outinfos, (output_species.shape[0], 1))
    all_species = np.append(all_species[:, np.newaxis], type_spec, axis=1)
    active_species = np.append(active_species[:, np.newaxis], acinfos, axis=1)
    output_species = np.append(output_species[:, np.newaxis], outinfos, axis=1)

    # Check whether LMDZ type or CHIMERE
    is_lmdz = False
    for i in range(active_species.shape[0]):
        acspec = getattr(chemistry.acspecies, active_species[i, 0])
        if xor(hasattr(active_species, "restart_id"), hasattr(active_species, "mass")):
            raise CifError(
                "WARNING: the active species {} has one of the arguments: "
                "restart_id / mass, but not the other, suggesting you want to "
                "set the chemistry for LMDZ, but not fully consistently. "
                "Please check your Yaml"
            )

        # For LMDZ
        if hasattr(acspec, "restart_id"):
            is_lmdz = True
            active_species[i, 1] = str(getattr(acspec, "restart_id"))
            active_species[i, 2] = str(getattr(acspec, "mass"))

        # For CHIMERE
        else:
            if is_lmdz:
                raise CifError(
                    "Another species was defined for LMDZ and this one is "
                    "for CHIMERE, please check your Yaml"
                )

            active_species[i, 1] = str(getattr(acspec, "htransport"))
            active_species[i, 2] = str(getattr(acspec, "vtransport"))
            active_species[i, 3] = str(int(getattr(acspec, "bound_dry")))

            if active_species[i, 0] in output_species[:, 0]:
                outspec = getattr(chemistry.outspecies, active_species[i, 0])
                output_species[i, 1] = str(getattr(outspec, "output_frac"))

    for i in range(all_species.shape[0]):
        if i < chemistry.nspecies:
            all_species[i, 1] = "ac"
        else:
            all_species[i, 1] = "pr"

    all_species = pd.DataFrame(all_species)
    output_species = pd.DataFrame(output_species)
    active_species = pd.DataFrame(active_species)
    emis_species = pd.DataFrame(emis_species)
    bio_species = pd.DataFrame(bio_species)

    # Create files to create
    all_species.to_csv(fileals, sep=" ", header=False, index=False)
    active_species.to_csv(
        fileas,
        sep=" ",
        header=False,
    )
    output_species.to_csv(fileout, sep=" ", header=False, index=False)
    emis_species.to_csv(fileanth, sep=" ", header=False)
    bio_species.to_csv(filebio, sep=" ", header=False)

    return all_species.shape[0], nphoto_rates




[docs]
def read_react(reactions, filej):
    """Parse reaction-rate strings and write the ``PHOTO_RATES`` file.

    Classifies each reaction into one of five CHIMERE rate types:

    1. ``k=<value>`` — constant rate.
    2. ``k(T)=Aexp(-B/T)`` — simplified Arrhenius (2 parameters).
    3. ``k(T)=Aexp(-B/T)(300/T)**N`` — full Arrhenius (3 parameters).
    4. ``k(P)=A(B+C*P/Pref)`` — pressure-dependent (3 parameters).
    5. ``J=<name>`` — photolysis (index written to ``filej``).

    Args:
        reactions (np.ndarray): 1-D array of rate-formula strings, one
            per reaction.
        filej (str): path to the ``PHOTO_RATES`` output file.

    Returns:
        tuple: ``(reactions_rates, nphoto_rates, photo_rates)`` where
        *reactions_rates* is a ``(nreac, 10)`` float array with type code
        in column 0 and parameters in columns 1–9, *nphoto_rates* is the
        number of photolysis reactions, and *photo_rates* is a DataFrame
        of (reaction_index, J-rate_name) pairs.
    """
    nreacts = reactions.shape[0]
    reactions_rates = np.zeros((nreacts, 10))
    reactions_rates.fill(None)
    photo_rates = np.zeros((0, 2))
    nphoto_rates = 0

    for i, value in np.ndenumerate(reactions):
        # Type 1 : Constant rate
        if re.search(r"k=", value):
            val_list = re.split(r"=", value)
            reactions_rates[i, 0] = 1
            reactions_rates[i, 1] = val_list[1]

        # Type 2 : Simplified Arrhenius
        if re.search(r"k\(T\)=Aexp\(-B\/T\)", value):
            val_list = re.split(r"=|,", value)
            reactions_rates[i, 0] = 2
            reactions_rates[i, 1] = val_list[3]
            reactions_rates[i, 2] = val_list[5]

        # Type 3 : Complete Arrhenius
        if re.search(r"k\(T\)=Aexp\(-B\/T\)\(300\/T\)\*\*N", value):
            val_list = re.split(r"=|,", value)
            reactions_rates[i, 0] = 3
            reactions_rates[i, 1] = val_list[3]
            reactions_rates[i, 2] = val_list[5]
            reactions_rates[i, 3] = val_list[7]

        # Type 4 : Relative pressure
        if re.search(r"k\(P\)=A\(B\+C\*P/Pref\)", value):
            val_list = re.split(r"=|,", value)
            reactions_rates[i, 0] = 4
            reactions_rates[i, 1] = val_list[3]
            reactions_rates[i, 2] = val_list[5]
            reactions_rates[i, 3] = val_list[7]

        # Type 5 : Simple Photolysis
        if re.search(r"J=", value):
            val_list = re.split(r"=|,", value)
            reactions_rates[i, 0] = 5
            reactions_rates[i, 1] = val_list[1]
            photo_rates = np.append(
                photo_rates, [[str(i[0] + 1), "j" + val_list[1]]], axis=0
            )
            nphoto_rates += 1

    photo_rates = pd.DataFrame(photo_rates[:])
    photo_rates.to_csv(filej, sep=" ", header=False, index=False)

    return reactions_rates, nphoto_rates, photo_rates




[docs]
def make_inout_react_graph(filer):
    """Build a transitively-closed reactant → product graph from a CHEMISTRY file.

    Reads the CHIMERE ``CHEMISTRY.{schemeid}`` text file (columns:
    ``nloss, loss_1, …, loss_nloss, nprod, prod_1, …``), constructs a
    direct reactant → product mapping, then propagates the graph
    recursively until no new connections are added.

    The result maps each reactant to every species it can *eventually*
    produce through any chain of reactions in the scheme.  This graph is
    used by the adjoint mapper to route sensitivities from outputs back to
    all relevant emission inputs.

    Args:
        filer (str): path to the ``CHEMISTRY.{schemeid}`` text file.

    Returns:
        dict[str, list[str]]: mapping of reactant name → sorted unique
        list of all (direct and indirect) product names.  Returns ``{}``
        when the file is empty.
    """

    # Check if REACTIONS files is empty
    with open(filer, "r") as f:
        nlines = len(f.readlines())
    if nlines == 0:
        return {}

    # Otherwhise, start building the graph
    inout_reaction_graph = {}
    with open(filer, "r") as f:
        lines = f.readlines()

    for ln in lines:
        ln = ln.split()
        nloss = int(ln[0])
        losses = [s for s in ln[1:nloss + 1] if s not in ["X", "M"]]
        nprods = int(ln[nloss + 1])
        prods = [s for s in ln[nloss + 2:] if s not in ["X", "M"]]

        # Remove smaller 'o' in front of species if any
        losses = [s[1:] if s[0] == 'o' else s for s in losses]
        prods = [s[1:] if s[0] == 'o' else s for s in prods]

        # Now link prods and losses
        for trin in losses:
            if trin not in inout_reaction_graph:
                inout_reaction_graph[trin] = []
            inout_reaction_graph[trin].extend(prods)

    inout_reaction_graph = {
        tr: list(set(inout_reaction_graph[tr]))
        for tr in inout_reaction_graph
    }

    # Propagate recursively influence from inputs to outputs
    def propagate_chemistry(inout_reaction_graph):
        inout_reaction_graph_out = copy.deepcopy(inout_reaction_graph)
        for trin in inout_reaction_graph_out:
            for trout in inout_reaction_graph_out[trin]:
                inout_reaction_graph_out[trin].extend(
                    inout_reaction_graph_out.get(trout, []))
                inout_reaction_graph_out[trin] = list(
                    set(inout_reaction_graph_out[trin])
                )

        # Compare length and carries on recursively if any changes
        lengths = [
            len(inout_reaction_graph_out[tr]) == len(inout_reaction_graph[tr])
            for tr in inout_reaction_graph_out
        ]
        if all(lengths):
            return inout_reaction_graph_out
        else:
            return propagate_chemistry(inout_reaction_graph_out)

    return propagate_chemistry(inout_reaction_graph)