Source code for message_ix_models.tools.advance

"""Handle data from the ADVANCE project."""

import logging
from pathlib import Path
from typing import Optional
from zipfile import ZipFile

import pandas as pd
import pint
from genno import Quantity

from message_ix_models.project.advance.data import LOCATION, NAME
from message_ix_models.util import (
    cached,
    local_data_path,
    maybe_query,
    private_data_path,
)

log = logging.getLogger(__name__)

#: Standard dimensions for data produced as snapshots from the IIASA ENE Program
#: “WorkDB”.
DIMS = ["model", "scenario", "region", "variable", "unit", "year"]



[docs]
@cached
def get_advance_data(query: Optional[str] = None) -> pd.Series:
    """Return data from the ADVANCE Work Package 2 data snapshot at :data:`LOCATION`.

    .. deprecated:: 2023.11
       Use :class:`.ADVANCE` through :func:`.exo_data.prepare_computer` instead.

    Parameters
    ----------
    query : str, optional
        Passed to :meth:`pandas.DataFrame.query` to limit the returned values.

    Returns
    -------
    pandas.Series
        with a :class:`pandas.MultiIndex` having the levels :data:`.DIMS`.
    """
    try:
        path = private_data_path(*LOCATION)
    except TypeError:
        path = local_data_path(*LOCATION)

    return _read_workdb_snapshot(path, NAME).pipe(maybe_query, query)




[docs]
def advance_data(variable: str, query: Optional[str] = None) -> Quantity:
    """Return a single ADVANCE data `variable` as a :class:`genno.Quantity`.

    .. deprecated:: 2023.11
       Use :class:`.ADVANCE` through :func:`.exo_data.prepare_computer` instead.

    Parameters
    ----------
    query : str, optional
        Passed to :func:`get_advance_data`.

    Returns
    -------
    genno.Quantity
        with the dimensions :data:`.DIMS` and name `variable`. If the units of the data
        for `variable` are consistent and parseable by :mod:`pint`, the returned
        Quantity has these units; otherwise units are discarded and the returned
        Quantity is dimensionless.
    """
    data = (
        get_advance_data(query)
        .rename("value")
        .xs(variable, level="variable")
        .reset_index("unit")
    )
    if len(data.unit.unique()) > 1:  # pragma: no cover
        log.info(f"Non-unique units for {variable!r}; discarded")
        units = ""
    else:
        units = data.unit.iloc[0]

    result = Quantity(data["value"], name=variable)

    try:
        result.units = units
    except pint.errors.PintError as e:  # pragma: no cover
        log.info(f'"{e}" when parsing {units!r}; discarded')

    return result




[docs]
@cached
def _read_workdb_snapshot(path: Path, name: str) -> pd.Series:
    """Read the data file.

    The expected format is a ZIP archive at `path` containing a member at `name` in CSV
    format, with columns corresponding to :data:`DIMS`, except for “year”, which is
    stored as column headers (‘wide’ format). (This corresponds to an older version of
    the “IAMC format,” without more recent additions intended to represent sub-annual
    time resolution using a separate column.)

    .. deprecated:: 2023.11
       Use :func:`.iamc_like_data_for_query` instead.
    """
    with ZipFile(path) as zf:  # Open the ZIP archive
        with zf.open(name) as f:  # Open a particular member
            # - Read data using upper case column names, then convert to lower-case.
            # - Drop null rows.
            # - Stack the “year” dimension (‘long’ format), creating a pd.Series.
            # - Apply the index names.
            return (
                pd.read_csv(f, index_col=list(map(str.upper, DIMS[:-1])))
                .rename(columns=lambda c: int(c))
                .dropna(how="all")
                .stack()
                .rename_axis(DIMS)
            )