Source code for message_ix_models.tools.advance

"""Handle data from the ADVANCE project."""

import logging
from pathlib import Path
from typing import Optional
from zipfile import ZipFile

import pandas as pd
import pint
from genno import Quantity

from message_ix_models.project.advance.data import LOCATION, NAME
from message_ix_models.util import (
    cached,
    local_data_path,
    maybe_query,
    private_data_path,
)

log = logging.getLogger(__name__)

#: Standard dimensions for data produced as snapshots from the IIASA ENE Program
#: “WorkDB”.
DIMS = ["model", "scenario", "region", "variable", "unit", "year"]


[docs]@cached def get_advance_data(query: Optional[str] = None) -> pd.Series: """Return data from the ADVANCE Work Package 2 data snapshot at :data:`LOCATION`. .. deprecated:: 2023.11 Use :class:`.ADVANCE` through :func:`.exo_data.prepare_computer` instead. Parameters ---------- query : str, optional Passed to :meth:`pandas.DataFrame.query` to limit the returned values. Returns ------- pandas.Series with a :class:`pandas.MultiIndex` having the levels :data:`.DIMS`. """ try: path = private_data_path(*LOCATION) except TypeError: path = local_data_path(*LOCATION) return _read_workdb_snapshot(path, NAME).pipe(maybe_query, query)
[docs]def advance_data(variable: str, query: Optional[str] = None) -> Quantity: """Return a single ADVANCE data `variable` as a :class:`genno.Quantity`. .. deprecated:: 2023.11 Use :class:`.ADVANCE` through :func:`.exo_data.prepare_computer` instead. Parameters ---------- query : str, optional Passed to :func:`get_advance_data`. Returns ------- genno.Quantity with the dimensions :data:`.DIMS` and name `variable`. If the units of the data for `variable` are consistent and parseable by :mod:`pint`, the returned Quantity has these units; otherwise units are discarded and the returned Quantity is dimensionless. """ data = ( get_advance_data(query) .rename("value") .xs(variable, level="variable") .reset_index("unit") ) if len(data.unit.unique()) > 1: # pragma: no cover log.info(f"Non-unique units for {variable!r}; discarded") units = "" else: units = data.unit.iloc[0] result = Quantity(data["value"], name=variable) try: result.units = units except pint.errors.PintError as e: # pragma: no cover log.info(f'"{e}" when parsing {units!r}; discarded') return result
[docs]@cached def _read_workdb_snapshot(path: Path, name: str) -> pd.Series: """Read the data file. The expected format is a ZIP archive at `path` containing a member at `name` in CSV format, with columns corresponding to :data:`DIMS`, except for “year”, which is stored as column headers (‘wide’ format). (This corresponds to an older version of the “IAMC format,” without more recent additions intended to represent sub-annual time resolution using a separate column.) .. deprecated:: 2023.11 Use :func:`.iamc_like_data_for_query` instead. """ with ZipFile(path) as zf: # Open the ZIP archive with zf.open(name) as f: # Open a particular member # - Read data using upper case column names, then convert to lower-case. # - Drop null rows. # - Stack the “year” dimension (‘long’ format), creating a pd.Series. # - Apply the index names. return ( pd.read_csv(f, index_col=list(map(str.upper, DIMS[:-1]))) .rename(columns=lambda c: int(c)) .dropna(how="all") .stack() .rename_axis(DIMS) )