Source code for message_ix_models.tools.costs.projections

import logging
from itertools import product
from typing import Mapping, Tuple

import numpy as np
import pandas as pd
from numpy.polynomial import Polynomial

from .config import Config
from .decay import project_ref_region_inv_costs_using_reduction_rates
from .gdp import adjust_cost_ratios_with_gdp
from .regional_differentiation import apply_regional_differentiation

log = logging.getLogger(__name__)


def smaller_than(sequence, value):
    return [item for item in sequence if item < value]


def larger_than(sequence, value):
    return [item for item in sequence if item > value]


def _maybe_query_scenario(df: pd.DataFrame, config: "Config") -> pd.DataFrame:
    """Filter `df` for :attr`.Config.scenario`, if any is specified."""
    if config.scenario == "all":
        scen = ["SSP1", "SSP2", "SSP3", "SSP4", "SSP5", "LED"]  # noqa: F841
        return df.query("scenario in @scen")
    elif config.scenario is not None:
        return df.query(f"scenario == {config.scenario.upper()!r}")
    else:
        return df


def _maybe_query_scenario_version(df: pd.DataFrame, config: "Config") -> pd.DataFrame:
    """Filter `df` for :attr`.Config.scenario_version`, if any is specified."""
    if config.scenario_version is None:
        return df

    # NB "all" does not appear in Config
    scen_vers = {  # noqa: F841
        "all": ["Review (2023)", "Previous (2013)"],
        "updated": ["Review (2023)"],
        "original": ["Previous (2013)"],
    }[config.scenario_version]

    return df.query("scenario_version in @scen_vers")


[docs]def create_projections_constant(config: "Config"): """Create cost projections using assuming constant regional cost ratios. Parameters ---------- config : .Config The function responds to, or passes on to other functions, the fields: :attr:`~.Config.base_year`, :attr:`~.Config.module`, :attr:`~.Config.node`, :attr:`~.Config.ref_region`, and :attr:`~.Config.scenario`. Returns ------- df_costs : pd.DataFrame Dataframe containing the cost projections with the columns: - scenario_version: scenario version (for constant method, only "Not applicable") - scenario: scenario name (SSP1, SSP2, SSP3, SSP4, SSP5, or LED) - message_technology: technology name - region: region name - year: year - inv_cost: investment cost - fix_cost: fixed operating and maintenance cost """ log.info(f"Selected scenario: {config.scenario}") log.info( "For the constant method, only the SSP scenario(s) itself needs to be " "specified. No scenario version (previous vs. updated) is needed." ) log.info("Calculate regional differentiation in base year+region") df_region_diff = apply_regional_differentiation(config) log.info("Apply cost reduction rates rates to reference region") df_ref_reg_decay = project_ref_region_inv_costs_using_reduction_rates( df_region_diff, config ).pipe(_maybe_query_scenario, config) df_costs = ( df_region_diff.merge(df_ref_reg_decay, on="message_technology") .assign( inv_cost=lambda x: np.where( x.year <= config.base_year, x.reg_cost_base_year, x.inv_cost_ref_region_decay * x.reg_cost_ratio, ), fix_cost=lambda x: x.inv_cost * x.fix_ratio, scenario_version="Not applicable", ) .reindex( [ "scenario_version", "scenario", "message_technology", "first_technology_year", "region", "year", "inv_cost", "fix_cost", ], axis=1, ) .drop_duplicates() ) return df_costs
[docs]def create_projections_gdp(config: "Config"): """Create cost projections using the GDP method. Parameters ---------- config : .Config The function responds to, or passes on to other functions, the fields: :attr:`~.Config.base_year`, :attr:`~.Config.module`, :attr:`~.Config.node`, :attr:`~.Config.ref_region`, :attr:`~.Config.scenario`, and :attr:`~.Config.scenario_version`. Returns ------- df_costs : pd.DataFrame Dataframe containing the cost projections with the columns: - scenario_version: scenario version (for gdp method, either "Review (2023)" or "Previous (2013)" - scenario: scenario name (SSP1, SSP2, SSP3, SSP4, SSP5, or LED) - message_technology: technology name - region: region name - year: year - inv_cost: investment cost - fix_cost: fixed operating and maintenance cost """ # Print selection of scenario version and scenario log.info(f"Selected scenario: {config.scenario}") log.info(f"Selected scenario version: {config.scenario_version}") log.info("Calculate regional differentiation in base year+region") df_region_diff = apply_regional_differentiation(config) log.info("Apply cost reduction rates to reference region") df_ref_reg_reduction = project_ref_region_inv_costs_using_reduction_rates( df_region_diff, config ).pipe(_maybe_query_scenario, config) log.info("Adjust ratios using GDP data") # - Compute adjustment # - Filter by Config.scenario, if given. # - Filter by Config.scenario_version, if given. df_adj_cost_ratios = ( adjust_cost_ratios_with_gdp(df_region_diff, config) .pipe(_maybe_query_scenario, config) .pipe(_maybe_query_scenario_version, config) ) df_costs = ( df_region_diff.merge(df_ref_reg_reduction, on="message_technology") .merge( df_adj_cost_ratios, on=["scenario", "message_technology", "region", "year"] ) .assign( inv_cost=lambda x: np.where( x.year <= config.base_year, x.reg_cost_base_year, x.inv_cost_ref_region_decay * x.reg_cost_ratio_adj, ), fix_cost=lambda x: x.inv_cost * x.fix_ratio, ) .reindex( [ "scenario_version", "scenario", "message_technology", "first_technology_year", "region", "year", "inv_cost", "fix_cost", ], axis=1, ) .drop_duplicates() ) return df_costs
[docs]def create_projections_converge(config: "Config"): """Create cost projections using the convergence method. Parameters ---------- config : .Config The function responds to, or passes on to other functions, the fields: :attr:`~.Config.base_year`, :attr:`~.Config.convergence_year`, :attr:`~.Config.module`, :attr:`~.Config.node`, :attr:`~.Config.ref_region`, and :attr:`~.Config.scenario`. Returns ------- df_costs : pd.DataFrame Dataframe containing the cost projections with the columns: - scenario_version: scenario version (for convergence method, only "Not applicable") - scenario: scenario name (SSP1, SSP2, SSP3, SSP4, SSP5, or LED) - message_technology: technology name - region: region name - year: year - inv_cost: investment cost - fix_cost: fixed operating and maintenance cost """ log.info(f"Selected scenario: {config.scenario}") log.info(f"Selected convergence year: {config.convergence_year}") log.info( "For the convergence method, only the SSP scenario(s) itself needs to be " "specified. No scenario version (previous vs. updated) is needed." ) log.info("Calculate regional differentiation in base year+region") df_region_diff = apply_regional_differentiation(config) log.info("Apply cost reduction rates to reference region") df_ref_reg_cost_reduction = project_ref_region_inv_costs_using_reduction_rates( df_region_diff, config ).pipe(_maybe_query_scenario, config) df_tmp_costs = ( df_region_diff.merge(df_ref_reg_cost_reduction, on="message_technology") .assign( inv_cost_tmp=lambda x: np.where( x.year <= config.base_year, x.reg_cost_base_year, np.where( x.year < config.convergence_year, x.inv_cost_ref_region_decay * x.reg_cost_ratio, x.inv_cost_ref_region_decay, ), ), ) .drop_duplicates() ) y_predict = np.array(config.seq_years) y_index = pd.Index(config.seq_years, name="year") def _predict(df: pd.DataFrame) -> pd.Series: """Fit a degree-3 polynomial to `df` and predict for :attr:`.seq_years`.""" # Fit p = Polynomial.fit(df.year, df.inv_cost_tmp, deg=1) # - Predict using config.seq_years. # - Assemble a single-column data frame with "year" as the index name. return pd.DataFrame({"inv_pre_converge_decay": p(y_predict)}, index=y_index) # Columns for grouping and merging cols = ["scenario", "message_technology", "region"] # Apply polynomial regression to costs at base year and convergence year # (interpolating) df_pre_converge_costs = ( df_tmp_costs.query( "year == @config.base_year or year == @config.convergence_year" ) .groupby(cols[:3], group_keys=True) .apply(_predict) .reset_index() ) # Get final investment costs df_inv_costs_final = ( df_tmp_costs.merge( df_pre_converge_costs, on=["scenario", "message_technology", "region", "year"], ) .assign( inv_cost_converge=lambda x: np.where( x.year <= config.base_year, x.reg_cost_base_year, np.where( x.region == config.ref_region, x.inv_cost_ref_region_decay, np.where( x.year < config.convergence_year, x.inv_pre_converge_decay, x.inv_cost_ref_region_decay, ), ), ), ) .drop_duplicates() ) # Get fixed O&M costs df_costs = ( df_inv_costs_final.rename(columns={"inv_cost_converge": "inv_cost"}) .assign( fix_cost=lambda x: x.inv_cost * x.fix_ratio, scenario_version="Not applicable", ) .reindex( [ "scenario_version", "scenario", "message_technology", "first_technology_year", "region", "year", "inv_cost", "fix_cost", ], axis=1, ) .drop_duplicates() ) return df_costs
[docs]def create_message_outputs( df_projections: pd.DataFrame, config: "Config" ) -> Tuple[pd.DataFrame, pd.DataFrame]: """Create MESSAGEix outputs for investment and fixed costs. The returned data have the model periods given by :attr:`.Config.Y`. Parameters ---------- df_projections : pd.DataFrame Dataframe containing the cost projections for each technology. Output of func:`create_cost_projections`. config : .Config The function responds to the fields :attr:`~.Config.fom_rate` and :attr:`~.Config.Y`. Returns ------- inv: pd.DataFrame Dataframe containing investment costs. fom: pd.DataFrame Dataframe containing fixed operating and maintenance costs. """ log.info("Convert {fix,inv}_cost data to MESSAGE structure") y_base = config.base_year dims = [ "scenario_version", "scenario", "message_technology", "first_technology_year", "region", "year", ] df_prod = pd.DataFrame( product( df_projections.scenario_version.unique(), df_projections.scenario.unique(), df_projections.message_technology.unique(), df_projections.first_technology_year.unique(), df_projections.region.unique(), config.seq_years, ), columns=dims, ) val_2020 = ( df_projections.query("year == 2020") .rename(columns={"inv_cost": "inv_cost_2020", "fix_cost": "fix_cost_2020"}) .drop(columns=["year"]) ) val_2100 = ( df_projections.query("year == 2100") .drop(columns=["year"]) .rename(columns={"inv_cost": "inv_cost_2100", "fix_cost": "fix_cost_2100"}) ) df_merge = ( ( df_prod.merge(val_2020, on=dims[:-1]) .merge(val_2100, on=dims[:-1]) .merge(df_projections, on=dims, how="left") ) .assign( inv_cost=lambda x: np.where(x.year <= y_base, x.inv_cost_2020, x.inv_cost), fix_cost=lambda x: np.where(x.year <= y_base, x.fix_cost_2020, x.fix_cost), ) .assign( # NOTE: This portion carries over the 2100 values to years beyond 2100. # This is applicable in the case where Config.final_year > 2100. inv_cost=lambda x: np.where(x.year >= 2100, x.inv_cost_2100, x.inv_cost), fix_cost=lambda x: np.where(x.year >= 2100, x.fix_cost_2100, x.fix_cost), ) .drop( columns=["inv_cost_2020", "fix_cost_2020", "inv_cost_2100", "fix_cost_2100"] ) .rename(columns={"year": "year_vtg"}) .drop_duplicates() ) dtypes = dict( scenario_version=str, scenario=str, node_loc=str, technology=str, first_technology_year=str, unit=str, year_vtg=int, value=float, ) inv = ( df_merge.copy() .assign(unit="USD/kWa") .rename( columns={ "inv_cost": "value", "message_technology": "technology", "region": "node_loc", } ) .reindex( [ "scenario_version", "scenario", "node_loc", "technology", "first_technology_year", "year_vtg", "value", "unit", ], axis=1, ) .astype(dtypes) .query("year_vtg in @config.Y") .astype({"first_technology_year": float}) # has to be float; int gives error .query("year_vtg >= first_technology_year") .reset_index(drop=True) .drop_duplicates() .drop("first_technology_year", axis=1) ) dtypes.update(year_act=int) to_merge = pd.DataFrame( {"year_act" if config.use_vintages else "year_vtg": config.seq_years} ).assign(key=1) def _compute_value(df: pd.DataFrame) -> pd.Series: if not config.use_vintages: return df.fix_cost rate = 1.0 + config.fom_rate return np.where( df.year_vtg <= y_base, np.where( df.year_act <= y_base, df.fix_cost, # NB if fom_rate was 0, the latter terms collapse to 1.0 ** (…) = 1.0 df.fix_cost * rate ** (df.year_act - y_base), ), df.fix_cost * rate ** (df.year_act - df.year_vtg), ) fom = ( df_merge.copy() .drop(columns=["inv_cost"]) .rename(columns={"year_vtg": "year_vtg" if config.use_vintages else "year_act"}) .assign(key=1) .merge(to_merge, on="key") .drop(columns=["key"]) .query("year_act >= year_vtg") .assign(value=_compute_value, unit="USD/kWa") .rename(columns={"message_technology": "technology", "region": "node_loc"}) .reindex( [ "scenario_version", "scenario", "node_loc", "technology", "first_technology_year", "year_vtg", "year_act", "value", "unit", ], axis=1, ) .astype(dtypes) .query("year_act in @config.Y and year_vtg in @config.Y") .astype({"first_technology_year": float}) # has to be float; int gives error .query("year_vtg >= first_technology_year") .reset_index(drop=True) .drop_duplicates() .drop("first_technology_year", axis=1) ) return inv, fom
[docs]def create_iamc_outputs( msg_inv: pd.DataFrame, msg_fix: pd.DataFrame ) -> Tuple[pd.DataFrame, pd.DataFrame]: """Create IAMC outputs for investment and fixed costs. Parameters ---------- msg_inv : pd.DataFrame Dataframe containing investment costs in MESSAGEix format. Output of func:`create_message_outputs`. msg_fix : pd.DataFrame Dataframe containing fixed operating and maintenance costs in MESSAGEix format. Output of func:`create_message_outputs`. Returns ------- iamc_inv : pd.DataFrame Dataframe containing investment costs in IAMC format. iamc_fix : pd.DataFrame Dataframe containing fixed operating and maintenance costs in IAMC format. """ log.info("Convert {fix,inv}_cost data to IAMC structure") iamc_inv = ( ( msg_inv.assign( Variable=lambda x: "Capital Cost|Electricity|" + x.technology, ) .rename( columns={ "scenario_version": "SSP_Scenario_Version", "scenario": "SSP_Scenario", "year_vtg": "Year", "node_loc": "Region", "unit": "Unit", } ) .drop(columns=["technology"]) ) .pivot_table( index=[ "SSP_Scenario_Version", "SSP_Scenario", "Region", "Variable", "Unit", ], columns="Year", values="value", ) .reset_index() .rename_axis(None, axis=1) .drop_duplicates() ) iamc_fix = ( ( msg_fix.assign( Variable=lambda x: "OM Cost|Electricity|" + x.technology + "|Vintage=" + x.year_vtg.astype(str), ) .rename( columns={ "scenario_version": "SSP_Scenario_Version", "scenario": "SSP_Scenario", "year_act": "Year", "node_loc": "Region", "unit": "Unit", } ) .drop(columns=["technology", "year_vtg"]) ) .pivot_table( index=[ "SSP_Scenario_Version", "SSP_Scenario", "Region", "Variable", "Unit", ], columns="Year", values="value", ) .reset_index() .rename_axis(None, axis=1) .drop_duplicates() ) return iamc_inv, iamc_fix
[docs]def create_cost_projections(config: "Config") -> Mapping[str, pd.DataFrame]: """Get investment and fixed cost projections. This is the main function to get investment and fixed cost projections. It calls the other functions in this module, and returns the projections in the specified format. Parameters ---------- config : .Config The function responds to, or passes on to other functions, the fields: :attr:`~.Config.base_year`, :attr:`~.Config.convergence_year`, :attr:`~.Config.fom_rate`, :attr:`~.Config.format`, :attr:`~.Config.method`, :attr:`~.Config.module`, :attr:`~.Config.node`, :attr:`~.Config.ref_region`, :attr:`~.Config.scenario`, and :attr:`~.Config.scenario_version`. Returns ------- dict Keys are "fix_cost" and "inv_cost", each mapped to a :class:`~.pandas.DataFrame`. If :attr:`.Config.format` is "message", the data frames have the same columns as required by :mod:`message_ix` for the respective parameter—for instance, the columns given by :py:`make_df("fix_cost", ...)`—*plus* columns named "scenario" and "scenario_version". """ # Validate configuration config.check() # Display configuration using the default __repr__ provided by @dataclass log.info(f"Configuration: {config!r}") # Select function according to `config.method` func = { "convergence": create_projections_converge, "gdp": create_projections_gdp, "constant": create_projections_constant, }[config.method] # Create projections df_costs = func(config) # Convert to MESSAGEix format df_inv, df_fom = create_message_outputs(df_costs, config) if config.format == "iamc": df_inv, df_fom = create_iamc_outputs(df_inv, df_fom) return {"inv_cost": df_inv, "fix_cost": df_fom}