Source code for message_ix_models.model.snapshot

"""Prepare base models from snapshot data."""

import logging
from pathlib import Path
from typing import Optional

import pandas as pd
from message_ix import Scenario
from message_ix.models import MACRO
from tqdm import tqdm

from message_ix_models import Spec
from message_ix_models.util import minimum_version
from message_ix_models.util.pooch import SOURCE, fetch

from .build import apply_spec
from .structure import get_codes

log = logging.getLogger(__name__)


[docs] def unpack(path: Path) -> Path: """Unpack :ref:`ixmp-format Excel file <ixmp:excel-data-format>` at `path`. The file is unpacked into a directory with the same name stem as the file (that is, without the :file:`.xlsx` suffix). In this directory are created: - One :file:`.csv.gz` file for each MESSAGE and/or MACRO parameter. - One file :file:`sets.xlsx` with only the :mod:`ixmp` sets, and no parameter data. If the files exist, they are not updated. To force re-unpacking, delete the files. Returns ------- Path Path to the directory containing the unpacked files. """ assert path.suffix == ".xlsx" base = path.with_suffix("") base.mkdir(exist_ok=True) # Get item name -> ixmp type mapping as a pd.Series xf = pd.ExcelFile(path, engine="openpyxl") name_type = xf.parse("ix_type_mapping") # Copied exactly from ixmp.backend.io def parse_item_sheets(name): """Read data for item *name*, possibly across multiple sheets.""" dfs = [xf.parse(name)] # Collect data from repeated sheets due to max_row limit for x in filter(lambda n: n.startswith(name + "("), xf.sheet_names): dfs.append(xf.parse(x)) # pragma: no cover # Concatenate once and return return pd.concat(dfs, axis=0, ignore_index=True) sets_path = base.joinpath("sets.xlsx") sets_path.unlink(missing_ok=True) with pd.ExcelWriter(sets_path, engine="openpyxl") as ew: for _, (name, ix_type) in tqdm(name_type.iterrows()): item_path = base.joinpath(f"{name}.csv.gz") if item_path.exists(): continue df = parse_item_sheets(name) if ix_type == "set": df.to_excel(ew, sheet_name=name, index=False) else: # pragma: no cover df.to_csv(item_path, index=False) name_type.query("ix_type == 'set'").to_excel(ew, sheet_name="ix_type_mapping") return base
[docs] def read_excel(scenario: Scenario, path: Path) -> None: """Similar to :meth:`.Scenario.read_excel`, but using :func:`unpack`.""" base = unpack(path) scenario.read_excel(path=base.joinpath("sets.xlsx")) parameters = set(scenario.par_list()) with scenario.transact(f"Read snapshot data from {path}"): for p in base.glob("*.csv.gz"): name = p.name.split(".")[0] if name not in parameters: continue # Variable or equation data: don't read data = pd.read_csv(p) # Correct units if name == "inv_cost": data.replace({"unit": {"USD_2005/t ": "USD_2005/t"}}, inplace=True) scenario.add_par(name, data)
[docs] @minimum_version("message_ix 3.5") def load( scenario: Scenario, snapshot_id: int, extra_cache_path: Optional[str] = None ) -> None: """Fetch and load snapshot with ID `snapshot_id` into `scenario`. See also -------- SNAPSHOTS """ snapshot_name = f"snapshot-{snapshot_id}" path, *_ = fetch(**SOURCE[snapshot_name], extra_cache_path=extra_cache_path) # Add units spec = Spec() spec.add.set["unit"] = get_codes(f"unit/snapshot-{snapshot_id}") apply_spec(scenario, spec) # Initialize MACRO items with scenario.transact("Prepare scenario for snapshot data"): MACRO.initialize(scenario) read_excel(scenario=scenario, path=path) # Transfer 'node' from `scenario` to `platform` "regions" spec.add.set["node"] = scenario.set("node") apply_spec(scenario, spec)