Source code for message_ix_models.model.structure

import logging
import re
from collections import ChainMap
from collections.abc import Mapping, MutableMapping
from copy import copy
from functools import cache
from itertools import product

import click
import pandas as pd
import pycountry
import xarray as xr
from iam_units import registry
from sdmx.model.common import Code, Codelist
from sdmx.model.v21 import Annotation

from message_ix_models.util import load_package_data, package_data_path
from message_ix_models.util.sdmx import as_codes

log = logging.getLogger(__name__)



[docs]
@cache
def codelists(kind: str) -> list[str]:
    """Return a valid IDs for code lists of `kind`.

    Parameters
    ----------
    kind : str
        "node" or "year".
    """
    return sorted(path.stem for path in package_data_path(kind).glob("*.yaml"))




[docs]
@cache
def get_codes(name: str) -> list[Code]:
    """Return codes for the dimension/set `name` in MESSAGE-GLOBIOM scenarios.

    The information is read from :file:`data/{name}.yaml`, e.g.
    :file:`data/technology.yaml`.

    When `name` includes "node", then child codes are automatically populated from the
    ISO 3166 database via :mod:`pycountry`. For instance:

    .. code-block:: yaml

       myregion:
         name: Custom region
         child: [AUT, SCG]

    …results in a region with child codes for Austria (a current country) and the
    formerly-existing country Serbia and Montenegro.

    Parameters
    ----------
    name : :class:`str`
        Any :file:`.yaml` file in the folder :file:`message_ix_models/data/`.

    Returns
    -------
    list of :class:`~sdmx.model.Code`
        Every Code has :attr:`id`, :attr:`name`, :attr:`description`, and
        :attr:`annotations` attributes. Calling :func:`str` on a code returns its
        :attr:`id`.
    """
    # Raw contents of the config file
    config = load_package_data(name)

    if "node" in name:
        # Automatically add information for countries within regions in the node
        # codelists. Use a ChainMap to combine a the `config` loaded from file and then
        # fall back to contents of the pycountry databases.
        config = ChainMap(
            config,
            # Create codes using the ISO database via pycountry
            {c.alpha_3: dict(id=c.alpha_3, name=c.name) for c in pycountry.countries},
            # Also include historic countries
            {
                c.alpha_3: dict(id=c.alpha_3, name=c.name)
                for c in pycountry.historic_countries
            },
        )

    # Convert to codes
    data = as_codes(config)

    # Fill in additional data, defaults, etc.
    if name == "commodity":
        process_commodity_codes(data)
    elif name == "technology":
        process_technology_codes(data)

    return data




[docs]
@cache
def get_codelist(name: str) -> Codelist:
    """Return a :class:`.Codelist` for `name` in MESSAGEix-GLOBIOM scenarios."""
    cl: Codelist = Codelist(id=name.replace("/", "_").upper())
    cl.extend(get_codes(name))
    return cl




[docs]
@cache
def get_region_codes(codelist: str) -> list[Code]:
    """Return the codes that are children of "World" in the specified `codelist`."""
    nodes = get_codes(f"node/{codelist}")
    return nodes[nodes.index(Code(id="World"))].child




[docs]
def generate_product(
    data: Mapping, name: str, template: Code
) -> tuple[list[Code], dict[str, xr.DataArray]]:
    """Generates codes using a `template` by Cartesian product along ≥1 dimensions.

    :func:`generate_set_elements` is called for each of the `dims`, and these values
    are used to format `base`.

    Parameters
    ----------
    data
        Mapping from dimension IDs to lists of codes.
    name : str
        Name of the set.
    template : Code
        Must have Python format strings for its its :attr:`id` and :attr:`name`
        attributes.
    """
    # eval() and remove the original annotation
    dims = template.eval_annotation(id="_generate")
    template.pop_annotation(id="_generate")

    def _base(dim, match):
        """Return codes along dimension `dim`.

        If `match` is given, only children matching an expression."""
        dim_codes = data[dim]["add"]

        try:
            i = dim_codes.index(match)
        except ValueError:
            if isinstance(match, str):
                expr = re.compile(match)
                dim_codes = list(filter(lambda c: expr.match(c.id), dim_codes))
        else:
            dim_codes = dim_codes[i].child

        return dim_codes

    codes = []  # Accumulate codes and indices
    indices = []

    # Iterate over the product of filtered codes for each dimension in
    for item in product(*[_base(*dm) for dm in dims.items()]):
        result = copy(template)  # Duplicate the template

        fmt = dict(zip(dims.keys(), item))  # Format the ID and name
        result.id = result.id.format(**fmt)
        result.name = str(result.name).format(**fmt)  # type: ignore [assignment]

        codes.append(result)  # Store code and indices
        indices.append(tuple(map(str, item)))

    # - Convert length-N sequence of D-tuples to D iterables each of length N.
    # - Convert to D × 1-dimensional xr.DataArrays, each of length N.
    tmp = zip(*indices)
    indexers = {d: xr.DataArray(list(i), dims=name) for d, i in zip(dims.keys(), tmp)}
    # Corresponding indexer with the full code IDs
    indexers[name] = xr.DataArray([c.id for c in codes], dims=name)

    return codes, indexers




[docs]
def generate_set_elements(data: MutableMapping, name) -> None:
    """Generate elements for set `name`.

    This function converts lists of codes in `data`, calling :func:`generate_product`
    and :func:`process_units_anno` as appropriate.

    Parameters
    ----------
    data
        Mapping from dimension IDs to lists of codes.
    name : str
        Name of the set for which to generate elements e.g. "commodity" or "technology".
    """
    hierarchical = name in {"technology"}

    codes = []  # Accumulate codes
    deferred = []
    for code in as_codes(data[name].get("add", [])):
        if name in {"commodity", "technology"}:
            process_units_anno(name, code, quiet=True)

        if code.eval_annotation(id="_generate"):
            # Requires a call to generate_product(); do these last
            deferred.append(code)
            continue

        codes.append(code)

        if hierarchical:
            # Store the children of `code`
            codes.extend(filter(lambda c: c not in codes, code.child))

    # Store codes processed so far, in case used recursively by generate_product()
    data[name]["add"] = codes

    # Use generate_product() to generate codes and indexers based on other sets
    for code in deferred:
        generated, indexers = generate_product(data, name, code)

        # Store
        data[name]["add"].extend(generated)

        # NB if there are >=2 generated groups, only indexers for the last are kept
        data[name]["indexers"] = indexers




[docs]
def process_units_anno(set_name: str, code: Code, quiet: bool = False) -> None:
    """Process an annotation on `code` with id="units".

    The annotation text is wrapped as ``'registry.Unit("{text}")'``, such that it can
    be retrieved with :func:`.eval_anno` or :meth:`.ScenarioInfo.units_for`. If `code`
    has direct children, the annotation is also copied to those codes.

    Parameters
    ----------
    set_name : str
        Used in logged messages when `quiet` is :data:`False`.
    quiet : bool, optional
        If :data:`False` (the default), log on level :ref:`WARNING <python:levels>` if:

        - the annotation is missing, or
        - its text is not parseable with the :mod:`pint` application registry, i.e.
          :data:`iam_units.registry`.

        Otherwise, log on :ref:`DEBUG <python:levels>`.

    """
    level = logging.NOTSET if quiet else logging.WARNING
    # Convert a "units" annotation to a code snippet that will return a pint.Unit
    # via eval_anno()
    try:
        units_anno = code.get_annotation(id="units")
    except KeyError:
        log.log(level, f"{set_name.title()} {code} lacks defined units")
        return

    # First try the expression as-is, in case already processed
    expr = None
    for candidate in (str(units_anno.text), f'registry.Unit("{units_anno.text}")'):
        # Check that the unit can be parsed by the pint.UnitRegistry
        try:
            result = eval(candidate)
        except Exception:
            continue
        else:
            if isinstance(result, registry.Unit):
                expr = candidate
                break

    if not expr:  # pragma: no cover
        # No coverage: code that triggers this exception should never be committed
        log.log(
            level,
            f"Unit '{units_anno.text}' for {set_name} {code} not pint compatible",
        )
    else:
        # Modify the annotation so eval_anno() can be used
        units_anno.text = expr

    # Also annotate child codes
    for c in code.child:
        c.annotations.append(copy(units_anno))




[docs]
def process_commodity_codes(codes):
    """Process a list of codes for ``commodity``.

    The function warns for commodities missing units or with non-:mod:`pint`-compatible
    units.
    """
    for code in codes:
        # FIXME remove quiet=True; instead improve commodity.yaml with units
        process_units_anno("commodity", code, quiet=True)




[docs]
def process_technology_codes(codes):
    """Process a list of codes for ``technology``.

    This function ensures every code has an annotation with id "vintaged", default
    :obj:`False`.
    """
    for code in codes:
        # FIXME remove quiet=True; instead improve technology.yaml with units
        process_units_anno("technology", code, quiet=True)

        try:
            anno = code.pop_annotation(id="vintaged")
        except KeyError:
            # Default value for 'vintaged'
            anno = Annotation(id="vintaged", text=repr(False))

        code.annotations.append(anno)



@click.command(name="techs")
@click.pass_obj
def cli(ctx):
    """Export metadata to technology.csv.

    This command transforms the technology metadata from the YAML file to CSV format.
    """
    # Convert each code to a pd.Series
    data = []
    for code in get_codes("technology"):
        # Base attributes
        d = dict(id=code.id, name=str(code.name), description=str(code.description))

        # Annotations
        for anno in ("type", "vintaged", "sector", "input", "output"):
            try:
                d[anno] = str(code.get_annotation(id=anno).text)
            except KeyError:
                pass

        data.append(pd.Series(d))

    # Combine series to a data frame
    techs = pd.DataFrame(data)

    # Write to file
    dest = ctx.get_local_path("technology.csv")
    print(f"Write to {dest}")

    techs.to_csv(dest, index=None, header=True)

    # Print the first few items of the data frame
    print(techs.head())