Source code for message_ix_models.model.snapshot

"""Prepare base models from snapshot data."""

import logging
from pathlib import Path

import pandas as pd
from message_ix import Scenario
from tqdm import tqdm

from message_ix_models import Spec
from message_ix_models.util._message_ix import MACRO
from message_ix_models.util.pooch import SOURCE, fetch

from .build import apply_spec
from .structure import get_codes

log = logging.getLogger(__name__)



[docs]
def unpack(path: Path) -> Path:
    """Unpack :ref:`ixmp-format Excel file <ixmp:excel-data-format>` at `path`.

    The file is unpacked into a directory with the same name stem as the file (that is,
    without the :file:`.xlsx` suffix). In this directory are created:

    - One :file:`.csv.gz` file for each MESSAGE and/or MACRO parameter.
    - One file :file:`sets.xlsx` with only the :mod:`ixmp` sets, and no parameter data.

    If the files exist, they are not updated.
    To force re-unpacking, delete the files.

    Returns
    -------
    Path
        Path to the directory containing the unpacked files.
    """
    assert path.suffix == ".xlsx"
    base = path.with_suffix("")
    base.mkdir(exist_ok=True)

    # Get item name -> ixmp type mapping as a pd.Series
    xf = pd.ExcelFile(path, engine="openpyxl")
    name_type = xf.parse("ix_type_mapping")

    # Copied exactly from ixmp.backend.io
    def parse_item_sheets(name):
        """Read data for item *name*, possibly across multiple sheets."""
        dfs = [xf.parse(name)]

        # Collect data from repeated sheets due to max_row limit
        for x in filter(lambda n: n.startswith(name + "("), xf.sheet_names):
            dfs.append(xf.parse(x))  # pragma: no cover

        # Concatenate once and return
        return pd.concat(dfs, axis=0, ignore_index=True)

    sets_path = base.joinpath("sets.xlsx")
    sets_path.unlink(missing_ok=True)

    with pd.ExcelWriter(sets_path, engine="openpyxl") as ew:
        for _, (name, ix_type) in tqdm(name_type.iterrows()):
            item_path = base.joinpath(f"{name}.csv.gz")
            if item_path.exists():
                continue

            df = parse_item_sheets(name)

            if ix_type == "set":
                df.to_excel(ew, sheet_name=name, index=False)
            else:  # pragma: no cover
                df.to_csv(item_path, index=False)

        name_type.query("ix_type == 'set'").to_excel(ew, sheet_name="ix_type_mapping")

    return base




[docs]
def read_excel(scenario: Scenario, path: Path) -> None:
    """Similar to :meth:`.Scenario.read_excel`, but using :func:`unpack`."""
    base = unpack(path)

    scenario.read_excel(path=base.joinpath("sets.xlsx"))

    parameters = set(scenario.par_list())

    with scenario.transact(f"Read snapshot data from {path}"):
        for p in base.glob("*.csv.gz"):
            name = p.name.split(".")[0]

            if name not in parameters:
                continue  # Variable or equation data: don't read

            data = pd.read_csv(p)

            # Correct units
            if name == "inv_cost":
                data.replace({"unit": {"USD_2005/t ": "USD_2005/t"}}, inplace=True)

            scenario.add_par(name, data)




[docs]
def load(
    scenario: Scenario, snapshot_id: int, extra_cache_path: str | None = None
) -> None:
    """Fetch and load snapshot with ID `snapshot_id` into `scenario`.

    See also
    --------
    SNAPSHOTS
    """
    snapshot_name = f"snapshot-{snapshot_id}"
    path, *_ = fetch(**SOURCE[snapshot_name], extra_cache_path=extra_cache_path)

    # Add units
    spec = Spec()
    spec.add.set["unit"] = get_codes(f"unit/snapshot-{snapshot_id}")
    apply_spec(scenario, spec)

    # Initialize MACRO items
    with scenario.transact("Prepare scenario for snapshot data"):
        MACRO.initialize(scenario)

    read_excel(scenario=scenario, path=path)

    # Transfer 'node' from `scenario` to `platform` "regions"
    spec.add.set["node"] = scenario.set("node")
    apply_spec(scenario, spec)