Source code for message_ix_models.tools.costs.gdp

import logging

import numpy as np
import pandas as pd
from genno import KeySeq

from message_ix_models import Context

from .config import Config

log = logging.getLogger(__name__)


[docs]def process_raw_ssp_data(context: Context, config: Config) -> pd.DataFrame: """Retrieve SSP data as required for :mod:`.tools.costs`. This method uses :class:`.SSPOriginal` and :class:`.SSPUpdate` via :func:`.exo_data.prepare_computer` Returns ------- pandas.DataFrame with the columns: - scenario_version: version of SSP scenario data - scenario: scenario (SSP1-5, LED) - region: region name - year: year of data - total_population: total population aggregated to the regional level - total_gdp: total GDP aggregated to the regional level - gdp_ppp_per_capita: regional GDP per capita in PPP - gdp_ratio_reg_to_reference: ratio of regional GDP per capita to \ reference region's GDP per capita """ from collections import defaultdict import xarray as xr from genno import Computer, Key, Quantity, quote from message_ix_models.project.ssp.data import SSPUpdate # noqa: F401 from message_ix_models.tools.exo_data import prepare_computer # Computer to hold computations c = Computer() # Common dimensions dims = ("n", "y", "scenario") def broadcast_qty(s) -> Quantity: """Return a quantity with a "scenario" dimension with the single label `s`. Multiplying this by any other quantity adds the "scenario" dimension.""" return Quantity(xr.DataArray([1.0], coords={"scenario": [s]})) c.add("LED:scenario", broadcast_qty("LED")) # Keys prepared in the loop keys = defaultdict(list) for n in "12345": # Source/scenario identifier ssp = f"ICONICS:SSP(2024).{n}" # Add a quantity for broadcasting c.add(f"SSP{n}:scenario", broadcast_qty(f"SSP{n}")) # Both population and GDP data for source_kw in ( dict(measure="POP", model="IIASA-WiC POP 2023", name=f"_pop {n}"), dict(measure="GDP", model="OECD ENV-Growth 2023", name=f"_gdp {n}"), ): m = source_kw["measure"].lower() # Add tasks to `c` that retrieve and (partly) process data from the database key, *_ = prepare_computer(context, c, ssp, source_kw, strict=False) # Add a "scenario" dimension for label in [f"SSP{n}"] + (["LED"] if n == "2" else []): keys[m].append(c.add(f"{m} {label}", "mul", key, f"{label}:scenario")) # Concatenate single-scenario data k_pop = Key("pop", dims) c.add(k_pop, "concat", *keys["pop"]) k_gdp = KeySeq("gdp", dims) c.add(k_gdp.base, "concat", *keys["gdp"]) # Further calculations # GDP per capita c.add(k_gdp["cap"], "div", k_gdp.base, k_pop) # Ratio to reference region value c.add( k_gdp["indexed"], "index_to", k_gdp["cap"], quote("n"), quote(config.ref_region) ) def merge(*dfs: pd.DataFrame) -> pd.DataFrame: """Merge data to a single data frame with the expected format.""" return ( pd.concat( [ dfs[0].to_series().rename("total_population"), dfs[1].to_series().rename("total_gdp"), dfs[2].to_series().rename("gdp_ppp_per_capita"), dfs[3].to_series().rename("gdp_ratio_reg_to_reference"), ], axis=1, ) .reset_index() .rename(columns={"n": "region", "y": "year"}) .sort_values(by=["scenario", "region", "year"]) .assign(scenario_version="2023") ) k_result = "data::pandas" c.add(k_result, merge, k_pop, k_gdp.base, k_gdp["cap"], k_gdp["indexed"]) # log.debug(c.describe(k_result)) # DEBUG Show what would be done result = c.get(k_result) # Ensure no NaN values in the ratio column assert not result.gdp_ratio_reg_to_reference.isna().any() return result
[docs]def adjust_cost_ratios_with_gdp(region_diff_df, config: Config): """Calculate adjusted region-differentiated cost ratios. This function takes in a data frame with region-differentiated cost ratios and calculates adjusted region-differentiated cost ratios using GDP per capita data. Parameters ---------- region_diff_df : pandas.DataFrame Output of :func:`apply_regional_differentiation`. config : .Config The function responds to, or passes on to other functions, the fields: :attr:`~.Config.base_year`, :attr:`~.Config.node`, :attr:`~.Config.ref_region`, :attr:`~.Config.scenario`, and :attr:`~.Config.scenario_version`. Returns ------- pandas.DataFrame DataFrame with columns: - scenario_version: scenario version - scenario: SSP scenario - message_technology: message technology - region: R11, R12, or R20 region - year - gdp_ratio_reg_to_reference: ratio of GDP per capita in respective region to GDP per capita in reference region. - reg_cost_ratio_adj: adjusted region-differentiated cost ratio """ from .projections import _maybe_query_scenario, _maybe_query_scenario_version context = Context.get_instance(-1) context.model.regions = config.node # - Retrieve GDP from SSP databases and compute and ratios (per capita; versus # ref_region. # - Keep only data from y₀ onwards. # - Map "scenario_version" strings to the desired output. # - Set the dtype of the "year" column. # - Filter on config.scenario and config.scenario_version, if configured. df_gdp = ( process_raw_ssp_data(context, config) .query("year >= @config.y0") .drop(columns=["total_gdp", "total_population"]) .assign( scenario_version=lambda x: np.where( x.scenario_version.str.contains("2013"), "Previous (2013)", "Review (2023)", ) ) .astype({"year": int}) .pipe(_maybe_query_scenario, config) .pipe(_maybe_query_scenario_version, config) ) # If base year does not exist in GDP data, then use earliest year in GDP data and # give warning base_year = config.base_year if base_year not in df_gdp.year.unique(): new_base_year = min(df_gdp.year.unique()) log.warning(f"Use year={new_base_year} GDP data as proxy for {base_year}") base_year = new_base_year def _constrain_cost_ratio(df: pd.DataFrame, base_year): """Constrain "reg_cost_ratio_adj". In cases where gdp_ratio_reg_to_reference is < 1 and reg_cost_ratio_adj > 1 in the base period, ensure reg_cost_ratio_adj(y) <= reg_cost_ratio_adj(base_year) for all future periods y. """ ref = df.query("year == @base_year").iloc[0] if ref.gdp_ratio_reg_to_reference < 1 and ref.reg_cost_ratio_adj > 1: return df.assign( reg_cost_ratio_adj=df.reg_cost_ratio_adj.clip( upper=ref.reg_cost_ratio_adj ) ) else: return df # 1. Select base-year GDP data for "gdp_ratio_reg_to_reference". # 2. Drop "year". # 3. Merge `df_region_diff` for "reg_cost_ratio". # 4. Compute slope. # 5. Compute intercept. # 6. Drop "gdp_ratio_reg_to_reference"—because of (1–2), this is the base period # value only. # 7. Merge `df_gdp` again to re-adds "year" and "gdp_ratio_reg_to_reference" with # distinct values for each period. # 8. Compute ref_cost_ratio_adj # 9. Fill 1.0 where NaNs occur in (8), i.e. for the reference region. # 10. Group by (sv, s, r, t) and apply _constrain_cost_ratio(), above, to each # group. # 11. Select the desired columns. return ( df_gdp.query("year == @base_year") .drop("year", axis=1) .merge(region_diff_df, on=["region"]) .eval("slope = (reg_cost_ratio - 1) / (gdp_ratio_reg_to_reference - 1)") .eval("intercept = 1 - slope") .drop("gdp_ratio_reg_to_reference", axis=1) .merge(df_gdp, on=["scenario_version", "scenario", "region"], how="right") .eval("reg_cost_ratio_adj = slope * gdp_ratio_reg_to_reference + intercept") .fillna({"reg_cost_ratio_adj": 1.0}) .groupby( ["scenario_version", "scenario", "region", "message_technology"], group_keys=False, ) .apply(_constrain_cost_ratio, base_year)[ [ "scenario_version", "scenario", "message_technology", "region", "year", "gdp_ratio_reg_to_reference", "reg_cost_ratio_adj", ] ] )