Source code for message_ix_models.model.structure

import logging
from collections import ChainMap
from functools import lru_cache
from typing import List

import click
import pandas as pd
import pycountry
from iam_units import registry
from sdmx.model import Annotation, Code

from message_ix_models.util import as_codes, load_package_data, package_data_path

log = logging.getLogger(__name__)


[docs]def codelists(kind: str) -> List[str]:
    """Return a valid IDs for code lists of `kind`.

    Parameters
    ----------
    kind : str
        "node" or "year".
    """
    return sorted(path.stem for path in package_data_path(kind).glob("*.yaml"))


[docs]@lru_cache()
def get_codes(name: str) -> List[Code]:
    """Return codes for the dimension/set `name` in MESSAGE-GLOBIOM scenarios.

    The information is read from :file:`data/{name}.yaml`, e.g.
    :file:`data/technology.yaml`.

    When `name` includes "node", then child codes are automatically populated from the
    ISO 3166 database via :mod:`pycountry`. For instance:

    .. code-block:: yaml

       myregion:
         name: Custom region
         child: [AUT, SCG]

    …results in a region with child codes for Austria (a current country) and the
    formerly-existing country Serbia and Montenegro.

    Parameters
    ----------
    name : :class:`str`
        Any :file:`.yaml` file in the folder :file:`message_ix_models/data/`.

    Returns
    -------
    list of :class:`~sdmx.model.Code`
        Every Code has :attr:`id`, :attr:`name`, :attr:`description`, and
        :attr:`annotations` attributes. Calling :func:`str` on a code returns its
        :attr:`id`.
    """
    # Raw contents of the config file
    config = load_package_data(name)

    if "node" in name:
        # Automatically add information for countries within regions in the node
        # codelists. Use a ChainMap to combine a the `config` loaded from file and then
        # fall back to contents of the pycountry databases.
        config = ChainMap(
            config,
            # Create codes using the ISO database via pycountry
            {c.alpha_3: dict(id=c.alpha_3, name=c.name) for c in pycountry.countries},
            # Also include historic countries
            {
                c.alpha_3: dict(id=c.alpha_3, name=c.name)
                for c in pycountry.historic_countries
            },
        )

    # Convert to codes
    data = as_codes(config)

    # Fill in additional data, defaults, etc.
    if name == "commodity":
        process_commodity_codes(data)
    elif name == "technology":
        process_technology_codes(data)

    return data


[docs]def process_units_anno(set_name: str, code: Code, quiet: bool = False) -> None:
    """Process an annotation on `code` with id="units".

    The annotation text is wrapped as ``'registry.Unit("{text}")'``, such that it can
    be retrieved with :func:`.eval_anno` or :meth:`.ScenarioInfo.units_for`. If `code`
    has direct children, the annotation is also copied to those codes.

    Parameters
    ----------
    set_name : str
        Used in logged messages when `quiet` is :data:`False`.
    quiet : bool, optional
        If :data:`False` (the default), log on level :ref:`WARNING <python:levels>` if:

        - the annotation is missing, or
        - its text is not parseable with the :mod:`pint` application registry, i.e.
          :data:`iam_units.registry`.

        Otherwise, log on :ref:`DEBUG <python:levels>`.

    """
    level = logging.DEBUG if quiet else logging.WARNING
    # Convert a "units" annotation to a code snippet that will return a pint.Unit
    # via eval_anno()
    try:
        units_anno = code.get_annotation(id="units")
    except KeyError:
        log.log(level, f"{set_name.title()} {code} lacks defined units")
        return

    # First try the expression as-is, in case already processed
    expr = None
    for candidate in (str(units_anno.text), f'registry.Unit("{units_anno.text}")'):
        # Check that the unit can be parsed by the pint.UnitRegistry
        try:
            result = eval(candidate)
        except Exception:
            continue
        else:
            if isinstance(result, registry.Unit):
                expr = candidate
                break

    if not expr:  # pragma: no cover
        # No coverage: code that triggers this exception should never be committed
        log.log(
            level,
            f"Unit '{units_anno.text}' for {set_name} {code} not pint compatible",
        )
    else:
        # Modify the annotation so eval_anno() can be used
        units_anno.text = expr

    # Also annotate child codes
    for c in code.child:
        c.annotations.append(units_anno.copy())


def process_commodity_codes(codes):
    """Process a list of codes for ``commodity``.

    The function warns for commodities missing units or with non-:mod:`pint`-compatible
    units.
    """
    for code in codes:
        # FIXME remove quiet=True; instead improve commodity.yaml with units
        process_units_anno("commodity", code, quiet=True)


def process_technology_codes(codes):
    """Process a list of codes for ``technology``.

    This function ensures every code has an annotation with id "vintaged", default
    :obj:`False`.
    """
    for code in codes:
        # FIXME remove quiet=True; instead improve technology.yaml with units
        process_units_anno("technology", code, quiet=True)

        try:
            anno = code.pop_annotation(id="vintaged")
        except KeyError:
            # Default value for 'vintaged'
            anno = Annotation(id="vintaged", text=repr(False))

        code.annotations.append(anno)


@click.command(name="techs")
@click.pass_obj
def cli(ctx):
    """Export metadata to technology.csv.

    This command transforms the technology metadata from the YAML file to CSV format.
    """
    # Convert each code to a pd.Series
    data = []
    for code in get_codes("technology"):
        # Base attributes
        d = dict(id=code.id, name=str(code.name), description=str(code.description))

        # Annotations
        for anno in ("type", "vintaged", "sector", "input", "output"):
            try:
                d[anno] = str(code.get_annotation(id=anno).text)
            except KeyError:
                pass

        data.append(pd.Series(d))

    # Combine series to a data frame
    techs = pd.DataFrame(data)

    # Write to file
    dest = ctx.get_local_path("technology.csv")
    print(f"Write to {dest}")

    techs.to_csv(dest, index=None, header=True)

    # Print the first few items of the data frame
    print(techs.head())