Source code for message_ix_models.tools.costs.regional_differentiation

import logging
from collections.abc import Mapping
from functools import lru_cache
from itertools import product
from typing import Literal

import numpy as np
import pandas as pd
from iam_units import registry

from message_ix_models.util import package_data_path
from message_ix_models.util.node import adapt_R11_R12

from .config import Config

log = logging.getLogger(__name__)


[docs] @lru_cache def get_weo_region_map(regions: str) -> Mapping[str, str]: """Return a mapping from MESSAGE node IDs to WEO region names. The mapping is constructed from the ``iea-weo-region`` annotations on the :doc:`/pkg-data/node`. """ from message_ix_models.model.structure import get_codelist # Retrieve the appropriate node codelist; the "World" code; and its children nodes = get_codelist(f"node/{regions}")["World"].child # Map from the child's (node's) ID to the value of the "iea-weo-region" annotation return {n.id: str(n.get_annotation(id="iea-weo-region").text) for n in nodes}
[docs] def get_weo_data() -> pd.DataFrame: """Read in raw WEO investment/capital costs and O&M costs data. Returns ------- pandas.DataFrame DataFrame with columns: - cost_type: investment or fixed O&M cost - weo_technology: WEO technology name - weo_region: WEO region - year: year - value: cost value """ # Dict of all of the technologies, # their respective sheet in the Excel file, # and the start row DICT_TECH_ROWS = { "bioenergy_ccus": ["Renewables", 99], "bioenergy_cofiring": ["Renewables", 79], "bioenergy_large": ["Renewables", 69], "bioenergy_medium_chp": ["Renewables", 89], "ccgt": ["Gas", 9], "ccgt_ccs": ["Fossil fuels equipped with CCUS", 29], "ccgt_chp": ["Gas", 29], "csp": ["Renewables", 109], "fuel_cell": ["Gas", 39], "gas_turbine": ["Gas", 19], "geothermal": ["Renewables", 119], "hydropower_large": ["Renewables", 49], "hydropower_small": ["Renewables", 59], "igcc": ["Coal", 39], "igcc_ccs": ["Fossil fuels equipped with CCUS", 19], "marine": ["Renewables", 129], "nuclear": ["Nuclear", 9], "pulverized_coal_ccs": ["Fossil fuels equipped with CCUS", 9], "solarpv_buildings": ["Renewables", 19], "solarpv_large": ["Renewables", 9], "steam_coal_subcritical": ["Coal", 9], "steam_coal_supercritical": ["Coal", 19], "steam_coal_ultrasupercritical": ["Coal", 29], "wind_offshore": ["Renewables", 39], "wind_onshore": ["Renewables", 29], } # Dict of cost types to read in and the required columns DICT_COST_COLS = {"inv_cost": "A,B:D", "fix_cost": "A,F:H"} # Set file path for raw IEA WEO cost data file_path = package_data_path( "iea", "WEO_2023_PG_Assumptions_STEPSandNZE_Scenario.xlsx" ) # Retrieve conversion factor conversion_factor = registry("1.0 USD_2022").to("USD_2005").magnitude # Loop through Excel sheets to read in data and process: # - Convert to long format # - Only keep investment costs # - Replace "n.a." with NaN # - Convert units from 2022 USD to 2005 USD dfs_cost = [] for tech_key, cost_key in product(DICT_TECH_ROWS, DICT_COST_COLS): df = ( pd.read_excel( file_path, sheet_name=DICT_TECH_ROWS[tech_key][0], header=None, skiprows=DICT_TECH_ROWS[tech_key][1], nrows=9, usecols=DICT_COST_COLS[cost_key], ) .set_axis(["weo_region", "2022", "2030", "2050"], axis=1) .melt(id_vars=["weo_region"], var_name="year", value_name="value") .assign( weo_technology=tech_key, cost_type=cost_key, units="usd_per_kw", ) .reindex( [ "cost_type", "weo_technology", "weo_region", "year", "units", "value", ], axis=1, ) .replace({"value": "n.a."}, np.nan) .assign(value=lambda x: x.value * conversion_factor) ) dfs_cost.append(df) all_cost_df = pd.concat(dfs_cost) # Substitute NaN values # If value is missing, then replace with median across regions for that # technology # Calculate median values for each technology df_median = ( all_cost_df.groupby(["weo_technology", "cost_type"]) .agg(median_value=("value", "median")) .reset_index() ) # Merge full dataframe with median dataframe # Replace null values with median values df_merged = ( all_cost_df.merge(df_median, on=["weo_technology", "cost_type"], how="left") .assign(adj_value=lambda x: np.where(x.value.isnull(), x.median_value, x.value)) .drop(columns={"value", "median_value"}) .rename(columns={"adj_value": "value"}) ) return df_merged
[docs] def get_intratec_data() -> pd.DataFrame: """Read in raw Intratec data. Returns ------- pandas.DataFrame DataFrame with columns: - node: Intratec region - value: Intratec index value """ # Set file path for raw Intratec data file = package_data_path("intratec", "R11", "indices.csv") return pd.read_csv(file, comment="#", skipinitialspace=True)
[docs] def get_raw_technology_mapping( module: Literal["energy", "materials", "cooling"], ) -> pd.DataFrame: """Retrieve a technology mapping for `module`. The data are read from a CSV file at :file:`data/{module}/tech_map.csv`. The file must have the following columns: - ``message_technology``: MESSAGEix-GLOBIOM technology code - ``reg_diff_source``: data source to map MESSAGEix technology to. A string like "weo", "energy", or possibly others. - ``reg_diff_technology``: technology code in the source data. - ``base_year_reference_region_cost``: manually specified base year cost of the technology in the reference region (in 2005 USD). - ``fix_ratio``: manually specified of fixed O&M costs to investment costs. Parameters ---------- module : str See :attr:`.Config.module`. Returns ------- pandas.DataFrame """ path = package_data_path("costs", module, "tech_map.csv") return pd.read_csv(path, comment="#")
[docs] def subset_module_map(raw_map): """Subset non-energy module mapping for only technologies that have sufficient data. Parameters ---------- raw_map : pandas.DataFrame Output of :func:`get_raw_technology_mapping` Returns ------- pandas.DataFrame DataFrame with columns: - message_technology: MESSAGEix technology name - reg_diff_source: data source to map MESSAGEix technology to (e.g., WEO) - reg_diff_technology: technology name in the data source - base_year_reference_region_cost: manually specified base year cost of the technology in the reference region (in 2005 USD) """ # - Remove module technologies that are missing both a reg_diff_source and a # base_year_reference_region_cost sub_map = ( raw_map.query( "reg_diff_source.notnull() or base_year_reference_region_cost.notnull()" ) .rename(columns={"base_year_reference_region_cost": "base_cost"}) .assign(base_year_reference_region_cost=lambda x: x.base_cost) .drop(columns={"base_cost"}) ) return sub_map
[docs] def adjust_technology_mapping( module: Literal["energy", "materials", "cooling"], ) -> pd.DataFrame: """Adjust technology mapping based on sources and assumptions. Parameters ---------- module : str See :attr:`.Config.module`. Returns ------- pandas.DataFrame DataFrame with columns: - message_technology: MESSAGEix technology name. - reg_diff_source: data source to map MESSAGEix technology to (e.g., WEO, Intratec). - reg_diff_technology: technology name in the data source. - base_year_reference_region_cost: manually specified base year cost of the technology in the reference region (in 2005 USD). """ raw_map_energy = get_raw_technology_mapping("energy") if module == "energy": return raw_map_energy else: raw_map_module = get_raw_technology_mapping(module) sub_map_module = subset_module_map(raw_map_module) # If message_technology in sub_map_module is in raw_map_energy and # base_year_reference_region_cost is not null/empty, then replace # base_year_reference_region_cost in raw_map_energy with # base_year_reference_region_cost in sub_map_module module_replace = ( sub_map_module.query( "message_technology in @raw_map_energy.message_technology" ) .rename( columns={ "message_technology": "material_message_technology", "base_year_reference_region_cost": "module_base_cost", } ) .drop(columns=["reg_diff_source", "reg_diff_technology"]) .merge( raw_map_energy, how="right", left_on="material_message_technology", right_on="message_technology", ) .assign( base_year_reference_region_cost=lambda x: np.where( x.module_base_cost.notnull(), x.module_base_cost, x.base_year_reference_region_cost, ) ) .reindex( [ "message_technology", "reg_diff_source", "reg_diff_technology", "base_year_reference_region_cost", ], axis=1, ) ) # Subset to only rows where reg_diff_source is "energy" # Merge with raw_map_energy on reg_diff_technology # If the "base_year_reference_region_cost" is not # null/empty in raw_module_map, # then use that. # If the base_year_reference_region_cost is null/empty in raw_module_map, # then use the base_year_reference_region_cost from the mapped energy technology module_map_energy = ( sub_map_module.query("reg_diff_source == 'energy'") .drop(columns=["reg_diff_source"]) .rename( columns={ "reg_diff_technology": "reg_diff_technology_energy", "base_year_reference_region_cost": "material_base_cost", } ) .merge( raw_map_energy.rename( columns={ "message_technology": "message_technology_base", } ), left_on="reg_diff_technology_energy", right_on="message_technology_base", how="left", ) .assign( base_year_reference_region_cost=lambda x: np.where( x.material_base_cost.isnull(), x.base_year_reference_region_cost, x.material_base_cost, ) ) .reindex( [ "message_technology", "reg_diff_source", "reg_diff_technology", "base_year_reference_region_cost", ], axis=1, ) ) # Get technologies that don't have a map source but do have a base year cost # For these technologies, assume no regional differentiation # So use the reference region base year cost as the base year cost # across all regions module_map_noregdiff = sub_map_module.query( "reg_diff_source.isnull() and base_year_reference_region_cost.notnull()" ) # Concatenate module_replace, module_map_energy, and module_map_noregdiff # Drop duplicates module_all = ( pd.concat( [ module_replace, module_map_energy, module_map_noregdiff, ] ) .drop_duplicates() .reset_index(drop=True) ) # If module == "materials", then get materials_map_intratec # and concatenate with module_all if module == "materials": # Get technologies that are mapped to Intratec AND have a base year cost # Assign map_techonology as "all" materials_map_intratec = sub_map_module.query( "reg_diff_source == 'intratec' and " "base_year_reference_region_cost.notnull()" ).assign(reg_diff_technology="all") # Concatenate materials_map_intratec and module_all # Drop duplicates module_all = ( pd.concat( [ module_all, materials_map_intratec, ] ) .drop_duplicates() .reset_index(drop=True) ) # Get full list of technologies in module_all # If a custom fix_ratio exists in raw_map_energy, then use that # If a custom fix_ratio exists in sub_map_module, then use that # (including replacing one in raw_map_energy) # Otherwise, keep the fix_ratio as null module_all = ( module_all.merge( raw_map_energy.query("fix_ratio.notnull()")[ ["message_technology", "fix_ratio"] ].rename(columns={"fix_ratio": "fix_ratio_energy"}), how="left", on="message_technology", ) .merge( sub_map_module.query("fix_ratio.notnull()")[ ["message_technology", "fix_ratio"] ].rename(columns={"fix_ratio": "fix_ratio_module"}), how="left", on="message_technology", ) .assign( fix_ratio=lambda x: np.where( x.fix_ratio_energy.notnull(), x.fix_ratio_energy, x.fix_ratio, ) ) .assign( fix_ratio=lambda x: np.where( x.fix_ratio_module.notnull(), x.fix_ratio_module, x.fix_ratio, ) ) .drop(columns=["fix_ratio_energy", "fix_ratio_module"]) ) # Get list of technologies in raw_map_module that are not in module_all missing_tech = raw_map_module.query( "message_technology not in @module_all.message_technology" ).message_technology.unique() log.info( "The following technologies are not projected due to insufficient data:" + "\n" + "\n".join(missing_tech) ) return module_all
[docs] def get_weo_regional_differentiation(config: "Config") -> pd.DataFrame: """Apply WEO regional differentiation. 1. Retrieve WEO data using :func:`.get_weo_data`. 2. Map data to MESSAGEix-GLOBIOM regions according to the :attr:`.Config.node`. 3. Calculate cost ratios for each region relative to the :attr:`~.Config.ref_region`. Parameters ---------- config : .Config The function responds to the fields: :attr:`~.Config.base_year`, :attr:`~.Config.node`, and :attr:`~.Config.ref_region`. Returns ------- pandas.DataFrame DataFrame with columns: - message_technology: MESSAGEix technology name - region: MESSAGEix region - weo_ref_region_cost: WEO cost in reference region - reg_cost_ratio: regional cost ratio relative to reference region """ # Grab WEO data and keep only investment costs df_weo = get_weo_data() # Even if config.base_year is greater than 2022, use 2022 WEO values sel_year = str(2022) log.info("…using year " + str(sel_year) + " data from WEO") # - Retrieve a map from MESSAGEix node IDs to WEO region names. # - Map WEO data to MESSAGEix regions. # - Keep only base year data. l_sel_weo = [] for message_node, weo_region in get_weo_region_map(config.node).items(): df_sel = ( df_weo.query("year == @sel_year & weo_region == @weo_region") .assign(region=message_node) .rename(columns={"value": "weo_cost"}) .reindex( [ "cost_type", "weo_technology", "weo_region", "region", "year", "weo_cost", ], axis=1, ) ) l_sel_weo.append(df_sel) df_sel_weo = pd.concat(l_sel_weo) # If specified reference region is not in WEO data, then give error assert config.ref_region is not None ref_region = config.ref_region.upper() if ref_region not in df_sel_weo.region.unique(): raise ValueError( f"Reference region {ref_region} not found in WEO data. " "Please specify a different reference region. " f"Available regions are: {df_sel_weo.region.unique()}" ) # Calculate regional investment cost ratio relative to reference region df_reg_ratios = ( df_sel_weo.query("region == @ref_region and cost_type == 'inv_cost'") .rename(columns={"weo_cost": "weo_ref_region_cost"}) .drop(columns={"weo_region", "region"}) .merge( df_sel_weo.query("cost_type == 'inv_cost'"), on=["weo_technology", "year"] ) .assign(reg_cost_ratio=lambda x: x.weo_cost / x.weo_ref_region_cost) .reindex( [ "weo_technology", "region", "weo_ref_region_cost", "reg_cost_ratio", ], axis=1, ) ) # Calculate fixed O&M cost ratio relative to investment cost # Get investment costs df_inv = ( df_sel_weo.query("cost_type == 'inv_cost' and year == @sel_year") .rename(columns={"weo_cost": "inv_cost"}) .drop(columns=["year", "cost_type"]) ) # Get fixed O&M costs df_fix = ( df_sel_weo.query("cost_type == 'fix_cost' and year == @sel_year") .rename(columns={"weo_cost": "fix_cost"}) .drop(columns=["year", "cost_type"]) ) # Merge investment and fixed O&M costs # Calculate ratio of fixed O&M costs to investment costs df_fom_inv = ( df_inv.merge(df_fix, on=["weo_technology", "weo_region", "region"]) .assign(weo_fix_ratio=lambda x: x.fix_cost / x.inv_cost) .drop(columns=["inv_cost", "fix_cost", "weo_region"]) ) # Combine cost ratios (regional and fix-to-investment) together df_cost_ratios = df_reg_ratios.merge(df_fom_inv, on=["weo_technology", "region"]) return df_cost_ratios
[docs] def get_intratec_regional_differentiation(node: str, ref_region: str) -> pd.DataFrame: """Apply Intratec regional differentiation. 1. Retrieve Intratec data using :func:`.get_intratec_data`. 2. Map data to MESSAGEix-GLOBIOM regions according to the :attr:`.Config.node`. 3. Calculate cost ratios for each region relative to the :attr:`~.Config.ref_region`. Parameters ---------- node : str See :attr`.Config.node`. ref_region : str See :attr`.Config.ref_region`. Returns ------- pandas.DataFrame DataFrame with columns: - message_technology: MESSAGEix technology name - region: MESSAGEix region - intratec_ref_region_cost: Intratec cost in reference region - reg_cost_ratio: regional cost ratio relative to reference region """ df_intratec = get_intratec_data() # Map Intratec regions to MESSAGEix regions # If node is R11, then map directly # If node is R12, then adapt R11 regions to R12 regions if node.upper() == "R11": df_intratec_map = df_intratec.rename( columns={"node": "region", "value": "intratec_index"} ).assign(intratec_tech="all") elif node.upper() == "R12": df_intratec_map = ( adapt_R11_R12(df_intratec) .rename(columns={"node": "region", "value": "intratec_index"}) .assign(intratec_tech="all") .drop(columns=["unit"]) ) elif node.upper() == "R20": raise NotImplementedError # If specified reference region is not in data, then give error ref_region = ref_region.upper() if ref_region not in df_intratec_map.region.unique(): raise ValueError( f"Reference region {ref_region} not found in WEO data. " "Please specify a different reference region. " f"Available regions are: {df_intratec_map.region.unique()}" ) # Calculate regional investment cost ratio relative to reference region df_reg_ratios = ( df_intratec_map.query("region == @ref_region") .rename(columns={"intratec_index": "intratec_ref_region_cost"}) .drop(columns={"region"}) .merge(df_intratec_map, on=["intratec_tech"]) .assign(reg_cost_ratio=lambda x: x.intratec_index / x.intratec_ref_region_cost) .reindex( [ "intratec_tech", "region", "intratec_ref_region_cost", "reg_cost_ratio", ], axis=1, ) ) return df_reg_ratios
[docs] def apply_regional_differentiation(config: "Config") -> pd.DataFrame: """Apply regional differentiation depending on mapping source. 1. Retrieve an adjusted technology mapping from :func:`.adjust_technology_mapping`. 2. Based on the value in the ``reg_diff_source`` column: - "energy" or "weo": use WEO data via :func:`.get_weo_regional_differentiation`. - "intratec": use Intratec data via :func:`.get_intratec_regional_differentiation`. - "none": assume no regional differentiation; use the :attr:`~.Config.ref_region` cost as the cost for all regions. Parameters ---------- config : .Config The function responds to, or passes on to other functions, the fields: :attr:`~.Config.module`, :attr:`~.Config.node`, and :attr:`~.Config.ref_region`. Returns ------- pandas.DataFrame DataFrame with columns: - message_technology: MESSAGEix technology name - reg_diff_source: data source to map MESSAGEix technology to (e.g., WEO, Intratec) - reg_diff_technology: technology name in the data source - region: MESSAGEix region - base_year_reference_region_cost: manually specified base year cost of the technology in the reference region (in 2005 USD) - reg_cost_ratio: regional cost ratio relative to reference region - fix_ratio: ratio of fixed O&M costs to investment costs """ df_map = adjust_technology_mapping(config.module) assert config.ref_region is not None df_weo = get_weo_regional_differentiation(config) df_intratec = get_intratec_regional_differentiation(config.node, config.ref_region) # Get mapping of technologies # Then merge with output of get_weo_regional_differentiation # If the base_year_reference_region_cost is empty, then use the weo_ref_region_cost # If the fix_ratio is empty, then use weo_fix_ratio filt_weo = ( df_map.merge( df_weo, left_on="reg_diff_technology", right_on="weo_technology", how="left" ) .assign( base_year_reference_region_cost=lambda x: np.where( x.base_year_reference_region_cost.isnull(), x.weo_ref_region_cost, x.base_year_reference_region_cost, ), fix_ratio=lambda x: np.where( x.fix_ratio.isnull(), x.weo_fix_ratio, x.fix_ratio ), ) .reindex( [ "message_technology", "reg_diff_source", "reg_diff_technology", "region", "base_year_reference_region_cost", "reg_cost_ratio", "fix_ratio", ], axis=1, ) ) # Filter for reg_diff_source == "intratec" # Then merge with output of get_intratec_regional_differentiation # If the base_year_reference_region_cost is empty, # then use the intratec_ref_region_cost # If the fix_ratio is empty, then use 0 filt_intratec = ( df_map.query("reg_diff_source == 'intratec'") .merge( df_intratec, left_on="reg_diff_technology", right_on="intratec_tech", how="left", ) .assign( base_year_reference_region_cost=lambda x: np.where( x.base_year_reference_region_cost.isnull(), x.intratec_ref_region_cost, x.base_year_reference_region_cost, ), fix_ratio=lambda x: np.where(x.fix_ratio.isnull(), 0, x.fix_ratio), ) .reindex( [ "message_technology", "reg_diff_source", "reg_diff_technology", "region", "base_year_reference_region_cost", "reg_cost_ratio", "fix_ratio", ], axis=1, ) ) # TODO: Change from using intratec source as list of regions un_reg = pd.DataFrame( {"region": filt_intratec.region.unique(), "reg_cost_ratio": 1, "key": "z"} ) # Filter for reg_diff_source == NaN # Create dataframe of all regions and merge with map data # Assume reg_cost_ratio = 1 for all regions # If the fix_ratio is empty, then use 0 filt_none = ( df_map.query("reg_diff_source.isnull()") .assign(key="z") .merge(un_reg, on="key", how="left") .assign(fix_ratio=lambda x: np.where(x.fix_ratio.isnull(), 0, x.fix_ratio)) .reindex( [ "message_technology", "reg_diff_source", "reg_diff_technology", "region", "base_year_reference_region_cost", "reg_cost_ratio", "fix_ratio", ], axis=1, ) ) all_tech = ( pd.concat([filt_weo, filt_intratec, filt_none]) .reset_index(drop=True) .assign( reg_cost_ratio=lambda x: np.where( x.reg_diff_source.isna() & x.reg_diff_technology.isna(), 1, x.reg_cost_ratio, ) ) .assign( reg_cost_base_year=lambda x: x.base_year_reference_region_cost * x.reg_cost_ratio ) .dropna(subset=["region"]) .reset_index(drop=True) ) return all_tech