Source code for message_ix_models.model.transport.migrate

"""Importing data from MESSAGE (V)-Transport data files.

This code is not currently in use.

See also :mod:`message_data.tools.messagev`.
"""

import logging
from collections import defaultdict
from functools import lru_cache
from itertools import product

import pandas as pd
from tqdm import tqdm

from message_ix_models.tools.messagev import CHNFile, DICFile, INPFile

log = logging.getLogger(__name__)


[docs] def import_all( context, path, nodes=[], version="geam_ADV3TRAr2_BaseX2_0", verbose=False ): """Import data from MESSAGE V files. .chn, .dic, and .inp files are read from *path* for the given *nodes*. """ data_path = context.get_path("transport", "migrate") data_path.mkdir(exist_ok=True) if not (len(nodes) and nodes != [""]): nodes = config["MESSAGE V"]["set"]["node"] # noqa: F821 # Import .dic and .chn files node_path = {n: path / f"{n}_geam.dic" for n in nodes} dic = import_dic(node_path) del dic # Unused node_path = {n: path / f"{n}_geam.chn" for n in nodes} chn = import_chn(node_path) del chn # Unused # Import .inp files node_path = {n: path / f"{n}_{version}.inp" for n in nodes} data = import_inp(node_path, version, verbose) log.info("Imported\n" + str(data.head())) out_path = data_path / f"{version}.csv.gz" log.info(f"Write to {out_path}") data.to_csv(data_path / f"{version}.csv.gz") # Also write in wide format wide = data.reset_index("year").pivot(columns="year") wide.to_csv(data_path / f"{version}-wide.csv.gz") return data
[docs] def import_chn(node_path): """Import data from .chn files. *node_path* is a mapping from node names to paths of files. The tools.messagev.CHNFile class is used to parse the contents. """ return {n: CHNFile(path) for n, path in node_path.items()}
[docs] def import_dic(node_path): """Import data from .dic files. *node_path* is a mapping from node names to paths of files. The tools.messagev.DICFile class is used to parse the contents, after which the values are merged to a single dictionary. """ # Parse the files dics = {} for node, path in node_path.items(): dics[node] = DICFile(path) # Merge to a single dictionary merged = DICFile() for node, dic in dics.items(): for code, tec in dic.code_tec.items(): # Assert that the value for this node is the same as others assert merged.code_tec.setdefault(code, tec) == tec merged.tec_code[tec] = code return merged
[docs] def import_inp(node_path, version, verbose): """Import data from MESSAGE V-Transport .inp files. The tools.messagev.INPFile class is used to parse the contents. For 11 regions: 4015 region × tec; 357,964 data points. """ # List of all technologies to import # TODO re-add old names like Load_factor_truck tecs = list(transport_technologies(by_cg=True)) # noqa: F821 log.info( f"Importing from {len(node_path)} nodes × {len(tecs)} technology/" "consumer groups" ) @lru_cache() def region_file(path): """Return the parsed .inp file for region. Results are cached, so each file is parsed at most once. """ return INPFile(path) # commented: incomplete # # Iterate over demand sections # for (node, path) in node_path.items(): # f = region_file(path) # log.info(f.get_section('demand:')) # Prepare an iterator over (node, technology) iterator = product(node_path.items(), tecs) if not verbose: # Show a progress bar iterator = tqdm(list(iterator)) # Accumulate log messages. Emitting these directly while tqdm() is active # spoils the progress bar _log_messages = [] def _log(message): if not verbose: _log_messages.append(message) else: log.info(message) N_dp = 0 # Total number of data points data = [] # Loaded data no_section = defaultdict(set) # No section # Iterate over regions and technologies for (node, path), tec in iterator: f = region_file(path) try: # Get information for this technology in this node info = f.parse_section(tec) except KeyError: no_section[node].add(tec) continue except Exception as e: # Some kind of error while parsing _log(f"{e!r} in {node}, {tec}:\n{f.get_section(tec)}") # Skip to the next item continue if "params" not in info: _log(f"No data points for {tec} in {node}.\n{f.get_section(tec)}") continue # Add node, tec columns info["params"]["node"] = node info["params"]["technology"] = tec # Count of data points N_dp += len(info["params"]) data.append(info["params"]) for node, tecs in no_section.items(): _log(f"{node}: no section(s) for technologies:\n{tecs!r}") # Display accumulated log messages log.info("\n---\n".join(_log_messages + [f"{N_dp} data points."])) # Concatenate to a single dataframe return ( pd.concat(data) .fillna({"source": ""}) .set_index(["node", "technology", "param", "source", "year"]) )
[docs] def load_all(version): """Load cached data from MESSAGE(V) *version*.""" return load_inp(version)
[docs] def load_inp(version): """Load cached .inp data for *version*.""" return pd.read_csv( data_path / f"{version}.csv.gz", # noqa: F821 index_col="node technology param source year".split(), )
# Mapping from .inp lines to MESSAGEix parameters INP_PARS = { # TODO add # - 'bda lo' = bound_activity_lo "bda": { "name": "bound_activity_lo", }, # - 'bda up' = bound_activity_up # - 'bdc lo' = bound_new_capacity_lo # - 'bdc up' = bound_new_capacity_up # - 'mpa lo' = initial_activity_lo, growth_activity_lo # - 'mpa up' = initial_activity_up, growth_activity_up # - 'mpc lo' = initial_new_capacity_lo, growth_new_capacity_lo # - 'mpc up' = initial_new_capacity_up, growth_new_capacity_up "plf": { "name": "capacity_factor", "drop": ["source"], "rename": {"node": "node_loc", "year": "year_vtg"}, }, "pll": { "name": "technical_lifetime", "drop": ["source"], "rename": {"node": "node_loc", "year": "year_vtg"}, }, "minp": { "name": "input", }, "moutp": {"name": "output"}, "fom": { "name": "fix_cost", "drop": ["source"], "rename": {"node": "node_loc", "year": "year_vtg"}, }, "inv": { "name": "inv_cost", "drop": ["source"], "rename": {"node": "node_loc", "year": "year_vtg"}, }, "vom": { "name": "var_cost", "drop": ["source"], "rename": {"node": "node_loc", "year": "year_vtg"}, }, }
[docs] def fill_year_act(data, info): """transform() helper: fill in year_act for *data*.""" try: i = data.index.names.index("year_vtg") sort_levels = data.index.names[:i] except ValueError: return data data["year_act"] = data.index.to_frame()["year_vtg"] years = list(data["year_act"].unique()) data = data.set_index("year_act", append=True).unstack("year_act") # Additional years to fill that do not appear in year_vtg Y = info.Y for year in filter(lambda y: y > min(years), sorted(set(Y) - set(years))): data[("value", year)] = None data = ( data.fillna(method="ffill", axis=1) .stack("year_act") .dropna() .sort_index(level=sort_levels) ) return data
[docs] def truncate(data, info): """transform() helper: limit 'year_vtg', 'year' to y0 or later.""" # Year columns to truncate col = next(filter(lambda c: c in data.index.names, ["year_vtg", "year"])) log.info("Years " + str(data.index.to_frame()[col].unique()) + f" < {info.y0}") return data.query(f"{col} >= {info.y0}")
[docs] def transform(data, version, info): """Transform *data* from MESSAGE V schema to MESSAGEix. Data is written to data_path / version. """ # Create output path out_path = data_path / version # noqa: F821 out_path.mkdir(exist_ok=True) # Rename indices data.rename(index=lambda n: f"R11_{n}".upper(), level="node", inplace=True) log.info(f"year_vtg >= {info.y0}") # Process parameters for name, par_info in INP_PARS.items(): # - Select data for this parameter. # - Drop the column 'param' and any others. # - Rename MultiIndex levels. # - Fill in 'year_act' from 'year_vtg'. # - Truncate pre-model years. par = ( data.query(f"param == '{name}'") .droplevel(["param"] + par_info.get("drop", [])) .rename_axis(index=par_info.get("rename", {})) .pipe(fill_year_act, info) .pipe(truncate, info) ) log.info(f"{len(par)} rows in {name}/{par_info['name']}") # Write to file par.to_csv(out_path / f"{par_info['name']}.csv")
[docs] def plot_inp_data(data, target_path): """Quick diagnostic plots of .inp file data.""" import plotnine as p9 plot = ( p9.ggplot(data.reset_index(), p9.aes(x="year", y="value", color="node")) + p9.geom_point() ) plot.save(target_path / "demo.pdf")