Source code for message_ix_models.model.material.data_petro

from collections import defaultdict

import message_ix
import pandas as pd
from message_ix import make_df

from message_ix_models import ScenarioInfo
from message_ix_models.model.material.data_util import read_timeseries
from message_ix_models.model.material.material_demand import material_demand_calc
from message_ix_models.model.material.util import get_ssp_from_context, read_config
from message_ix_models.util import (
    broadcast,
    nodes_ex_world,
    package_data_path,
    same_node,
)

ssp_mode_map = {
    "SSP1": "CTS core",
    "SSP2": "RTS core",
    "SSP3": "RTS high",
    "SSP4": "CTS high",
    "SSP5": "RTS high",
    "LED": "CTS core",  # TODO: maybe move to OECD projection instead
}

iea_elasticity_map = {
    "CTS core": (0.75, 0.15),
    "CTS high": (0.9, 0.45),
    "RTS core": (0.75, 0.4),
    "RTS high": (0.95, 0.7),
}


[docs] def read_data_petrochemicals(scenario: message_ix.Scenario) -> pd.DataFrame: """Read and clean data from :file:`petrochemicals_techno_economic.xlsx`.""" # Ensure config is loaded, get the context s_info = ScenarioInfo(scenario) fname = "petrochemicals_techno_economic.xlsx" if "R12_CHN" in s_info.N: sheet_n = "data_R12" else: sheet_n = "data_R11" # Read the file data_petro = pd.read_excel( package_data_path("material", "petrochemicals", fname), sheet_name=sheet_n ) # Clean the data data_petro = data_petro.drop(["Source", "Description"], axis=1) return data_petro
def gen_mock_demand_petro( scenario: message_ix.Scenario, gdp_elasticity_2020: float, gdp_elasticity_2030: float, ) -> pd.DataFrame: s_info = ScenarioInfo(scenario) modelyears = s_info.Y fy = scenario.firstmodelyear def get_demand_t1_with_income_elasticity( demand_t0, income_t0, income_t1, elasticity ): return ( elasticity * demand_t0.mul(((income_t1 - income_t0) / income_t0), axis=0) ) + demand_t0 gdp_mer = scenario.par("bound_activity_up", {"technology": "GDP"}) mer_to_ppp = pd.read_csv( package_data_path("material", "other", "mer_to_ppp_default.csv") ).set_index(["node", "year"]) # mer_to_ppp = scenario.par("MERtoPPP").set_index("node", "year") # TODO: might need to be re-activated for different SSPs gdp_mer = gdp_mer.merge( mer_to_ppp.reset_index()[["node", "year", "value"]], left_on=["node_loc", "year_act"], right_on=["node", "year"], ) gdp_mer["gdp_ppp"] = gdp_mer["value_y"] * gdp_mer["value_x"] gdp_mer = gdp_mer[["year", "node_loc", "gdp_ppp"]].reset_index() gdp_mer["Region"] = gdp_mer["node_loc"] # .str.replace("R12_", "") df_gdp_ts = gdp_mer.pivot( index="Region", columns="year", values="gdp_ppp" ).reset_index() num_cols = [i for i in df_gdp_ts.columns if isinstance(i, int)] hist_yrs = [i for i in num_cols if i < fy] df_gdp_ts = ( df_gdp_ts.drop([i for i in hist_yrs if i in df_gdp_ts.columns], axis=1) .set_index("Region") .sort_index() ) # 2018 production # Use as 2020 # The Future of Petrochemicals Methodological Annex # Projections here do not show too much growth until 2050 for some regions. # For division of some regions assumptions made: # PAO, PAS, SAS, EEU,WEU # For R12: China and CPA demand divided by 0.1 and 0.9. # SSP2 R11 baseline GDP projection # The orders of the regions # r = ['R12_AFR', 'R12_RCPA', 'R12_EEU', 'R12_FSU', 'R12_LAM', 'R12_MEA',\ # 'R12_NAM', 'R12_PAO', 'R12_PAS', 'R12_SAS', 'R12_WEU',"R12_CHN"] # if "R12_CHN" in nodes: # nodes.remove("R12_GLB") # dem_2020 = np.array([2.4, 0.44, 3, 5, 11, 40.3, 49.8, 11, # 37.5, 10.7, 29.2, 50.5]) # dem_2020 = pd.Series(dem_2020) # # else: # nodes.remove("R11_GLB") # dem_2020 = np.array([2, 75, 30, 4, 11, 42, 60, 32, 30, 29, 35]) # dem_2020 = pd.Series(dem_2020) from message_ix_models.model.material.material_demand.material_demand_calc import ( read_base_demand, ) df_demand_2020 = read_base_demand( package_data_path() / "material" / "petrochemicals/demand_petro.yaml" ) df_demand_2020 = df_demand_2020.rename({"region": "Region"}, axis=1) df_demand = df_demand_2020.pivot(index="Region", columns="year", values="value") dem_next_yr = df_demand for i in range(len(modelyears) - 1): income_year1 = modelyears[i] income_year2 = modelyears[i + 1] if income_year2 >= 2030: dem_next_yr = get_demand_t1_with_income_elasticity( dem_next_yr, df_gdp_ts[income_year1], df_gdp_ts[income_year2], gdp_elasticity_2030, ) else: dem_next_yr = get_demand_t1_with_income_elasticity( dem_next_yr, df_gdp_ts[income_year1], df_gdp_ts[income_year2], gdp_elasticity_2020, ) df_demand[income_year2] = dem_next_yr df_melt = df_demand.melt(ignore_index=False).reset_index() return make_df( "demand", unit="t", level="demand", value=df_melt.value, time="year", commodity="HVC", year=df_melt.year, node=df_melt["Region"], ) def gen_data_petro_ts( data_petro_ts: pd.DataFrame, results: dict[list], tec_ts: set[str], nodes: list[str] ) -> None: for t in tec_ts: common = dict( time="year", time_origin="year", time_dest="year", ) param_name = data_petro_ts.loc[ (data_petro_ts["technology"] == t), "parameter" ].unique() for p in set(param_name): val = data_petro_ts.loc[ (data_petro_ts["technology"] == t) & (data_petro_ts["parameter"] == p), "value", ] # units = data_petro_ts.loc[ # (data_petro_ts["technology"] == t) & # (data_petro_ts["parameter"] == p), # "units", # ].values[0] mod = data_petro_ts.loc[ (data_petro_ts["technology"] == t) & (data_petro_ts["parameter"] == p), "mode", ] yr = data_petro_ts.loc[ (data_petro_ts["technology"] == t) & (data_petro_ts["parameter"] == p), "year", ] if p == "var_cost": df = make_df( p, technology=t, value=val, unit="t", year_vtg=yr, year_act=yr, mode=mod, **common, ).pipe(broadcast, node_loc=nodes) else: rg = data_petro_ts.loc[ (data_petro_ts["technology"] == t) & (data_petro_ts["parameter"] == p), "region", ] df = make_df( p, technology=t, value=val, unit="t", year_vtg=yr, year_act=yr, mode=mod, node_loc=rg, **common, ) results[p].append(df) def assign_input_outpt( split: str, param_name: str, regions: pd.DataFrame, val: float | int, t: str, rg: str, global_region: str, common: dict, nodes: list[str], ) -> pd.DataFrame: com = split[1] lev = split[2] mod = split[3] if (param_name == "input") and (lev == "import"): df = make_df( param_name, technology=t, commodity=com, level=lev, mode=mod, value=val[regions[regions == rg].index[0]], unit="t", node_loc=rg, node_origin=global_region, **common, ) elif (param_name == "output") and (lev == "export"): df = make_df( param_name, technology=t, commodity=com, level=lev, mode=mod, value=val[regions[regions == rg].index[0]], unit="t", node_loc=rg, node_dest=global_region, **common, ) else: df = make_df( param_name, technology=t, commodity=com, level=lev, mode=mod, value=val[regions[regions == rg].index[0]], unit="t", node_loc=rg, **common, ).pipe(same_node) # Copy parameters to all regions, when node_loc is not GLB if (len(regions) == 1) and (rg != global_region): # print("copying to all R11", rg, lev) df["node_loc"] = None df = df.pipe(broadcast, node_loc=nodes) # .pipe(same_node) # Use same_node only for non-trade technologies if (lev != "import") and (lev != "export"): df = df.pipe(same_node) return df def broadcast_to_regions(df: pd.DataFrame, global_region: str, nodes: list[str]): if "node_loc" in df.columns: if ( len(set(df["node_loc"])) == 1 and list(set(df["node_loc"]))[0] != global_region ): # print("Copying to all R11") df["node_loc"] = None df = df.pipe(broadcast, node_loc=nodes) return df def gen_data_petro_chemicals( scenario: message_ix.Scenario, dry_run: bool = False ) -> dict[str, pd.DataFrame]: # Load configuration context = read_config() config = context["material"]["petro_chemicals"] ssp = get_ssp_from_context(context) # Information about scenario, e.g. node, year s_info = ScenarioInfo(scenario) # Techno-economic assumptions data_petro = read_data_petrochemicals(scenario) data_petro_ts = read_timeseries( scenario, "petrochemicals", "petrochemicals_techno_economic.xlsx" ) # List of data frames, to be concatenated together at end results = defaultdict(list) # For each technology there are differnet input and output combinations # Iterate over technologies modelyears = s_info.Y # s_info.Y is only for modeling years nodes = nodes_ex_world(s_info.N) global_region = [i for i in s_info.N if i.endswith("_GLB")][0] yv_ya = s_info.yv_ya for t in config["technology"]["add"]: t = t.id # years = s_info.Y params = data_petro.loc[(data_petro["technology"] == t), "parameter"].unique() # Availability year of the technology av = data_petro.loc[(data_petro["technology"] == t), "availability"].values[0] modelyears = [year for year in modelyears if year >= av] yva = yv_ya.loc[yv_ya.year_vtg >= av,] # Iterate over parameters for par in params: split = par.split("|") param_name = par.split("|")[0] val = data_petro.loc[ ((data_petro["technology"] == t) & (data_petro["parameter"] == par)), "value", ] regions = data_petro.loc[ ((data_petro["technology"] == t) & (data_petro["parameter"] == par)), "Region", ] # Common parameters for all input and output tables # node_dest and node_origin are the same as node_loc common = dict( year_vtg=yva.year_vtg, year_act=yva.year_act, time="year", time_origin="year", time_dest="year", ) for rg in regions: if len(split) > 1: if (param_name == "input") | (param_name == "output"): df = assign_input_outpt( split, param_name, regions, val, t, rg, global_region, common, nodes, ) elif param_name == "emission_factor": emi = split[1] mod = split[2] df = make_df( param_name, technology=t, value=val[regions[regions == rg].index[0]], emission=emi, mode=mod, unit="t", node_loc=rg, **common, ) elif param_name == "var_cost": mod = split[1] if rg != global_region: df = ( make_df( param_name, technology=t, mode=mod, value=val[regions[regions == rg].index[0]], unit="t", **common, ) .pipe(broadcast, node_loc=nodes) .pipe(same_node) ) else: df = make_df( param_name, technology=t, mode=mod, value=val[regions[regions == rg].index[0]], node_loc=rg, unit="t", **common, ).pipe(same_node) elif param_name == "share_mode_up": mod = split[1] df = ( make_df( param_name, technology=t, mode=mod, shares="steam_cracker", value=val[regions[regions == rg].index[0]], unit="-", **common, ) .pipe(broadcast, node_share=nodes) .pipe(same_node) ) # Rest of the parameters apart from input, output and emission_factor else: df = make_df( param_name, technology=t, value=val[regions[regions == rg].index[0]], unit="t", node_loc=rg, **common, ) df = df.drop_duplicates() # Copy parameters to all regions if (len(regions) == 1) and (rg != global_region): df = broadcast_to_regions(df, global_region, nodes) results[param_name].append(df) share_dict = { "shares": "steam_cracker", "node_share": ["R12_MEA", "R12_NAM"], "technology": "steam_cracker_petro", "mode": "ethane", "year_act": "2020", "time": "year", "value": [0.4, 0.4], "unit": "-", } results["share_mode_lo"].append(make_df("share_mode_lo", **share_dict)) default_gdp_elasticity_2020, default_gdp_elasticity_2030 = iea_elasticity_map[ ssp_mode_map[ssp] ] demand_hvc = material_demand_calc.gen_demand_petro( scenario, "HVC", default_gdp_elasticity_2020, default_gdp_elasticity_2030 ) results["demand"].append(demand_hvc) # Special treatment for time-varying params tec_ts = set(data_petro_ts.technology) # set of tecs in timeseries sheet gen_data_petro_ts(data_petro_ts, results, tec_ts, nodes) results = {par_name: pd.concat(dfs) for par_name, dfs in results.items()} # modify steam cracker hist data (naphtha -> gasoil) to make model feasible df_cap = pd.read_csv( package_data_path( "material", "petrochemicals", "steam_cracking_hist_new_cap.csv" ) ) df_act = pd.read_csv( package_data_path("material", "petrochemicals", "steam_cracking_hist_act.csv") ) df_act.loc[df_act["mode"] == "naphtha", "mode"] = "vacuum_gasoil" df = results["historical_activity"] results["historical_activity"] = pd.concat( [df.loc[df["technology"] != "steam_cracker_petro"], df_act] ) df = results["historical_new_capacity"] results["historical_new_capacity"] = pd.concat( [df.loc[df["technology"] != "steam_cracker_petro"], df_cap] ) # remove growth constraint for R12_AFR to make trade constraints feasible df = results["growth_activity_up"] results["growth_activity_up"] = df[ ~( (df["technology"] == "steam_cracker_petro") & (df["node_loc"] == "R12_AFR") & (df["year_act"] == 2020) ) ] # add 25% total trade bound df_dem = results["demand"] df_dem = df_dem.groupby("year").sum(numeric_only=True) * 0.25 df_dem = df_dem.reset_index() df_dem = df_dem.rename({"year": "year_act"}, axis=1) par_dict = { "node_loc": "R12_GLB", "technology": "trade_petro", "mode": "M1", "time": "year", "unit": "-", } results["bound_activity_up"] = pd.concat( [ results["bound_activity_up"], make_df("bound_activity_up", **df_dem, **par_dict), ] ) # TODO: move this to input xlsx file df = scenario.par( "relation_activity", filters={"relation": "h2_scrub_limit", "technology": "gas_bio"}, ) df["value"] = -(1.33181 * 0.482) # gas input * emission factor of gas df["technology"] = "gas_processing_petro" results["relation_activity"] = df # TODO: move this to input xlsx file df_gro = results["growth_activity_up"] drop_idx = df_gro[ (df_gro["technology"] == "steam_cracker_petro") & (df_gro["node_loc"] == "R12_RCPA") & (df_gro["year_act"] == 2020) ].index results["growth_activity_up"] = results["growth_activity_up"].drop(drop_idx) return results