Source code for message_ix_models.model.material.data_aluminum

from collections import defaultdict
from collections.abc import Iterable

import message_ix
import pandas as pd
from message_ix import make_df

from message_ix_models import ScenarioInfo
from message_ix_models.util import (
    broadcast,
    nodes_ex_world,
    package_data_path,
    same_node,
)

from .data_util import read_rel, read_timeseries
from .material_demand import material_demand_calc
from .util import combine_df_dictionaries, get_ssp_from_context, read_config


[docs]def read_data_aluminum( scenario: message_ix.Scenario, ) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame): """Read and clean data from :file:`aluminum_techno_economic.xlsx`. Parameters ---------- scenario: message_ix.Scenario Scenario instance to build aluminum on Returns ------- tuple of three pd.DataFrames returns aluminum data in three separate groups time indepenendent parameters, relation parameters and time dependent parameters """ # Ensure config is loaded, get the context s_info = ScenarioInfo(scenario) # Shorter access to sets configuration # sets = context["material"]["generic"] fname = "aluminum_techno_economic.xlsx" if "R12_CHN" in s_info.N: sheet_n = "data_R12" else: sheet_n = "data_R11" # Read the file data_alu = pd.read_excel( package_data_path("material", "aluminum", fname), sheet_name=sheet_n ) # Drop columns that don't contain useful information data_alu = data_alu.drop(["Source", "Description"], axis=1) data_alu_rel = read_rel(scenario, "aluminum", "aluminum_techno_economic.xlsx") data_aluminum_ts = read_timeseries( scenario, "aluminum", "aluminum_techno_economic.xlsx" ) # Unit conversion # At the moment this is done in the excel file, can be also done here # To make sure we use the same units return data_alu, data_alu_rel, data_aluminum_ts
[docs]def gen_data_alu_ts(data: pd.DataFrame, nodes: list) -> dict[str, pd.DataFrame]: """ Generates time variable parameter data for aluminum sector Parameters ---------- data: pd.DataFrame time variable data from input file nodes: list regions of model Returns ------- pd.DataFrame key-value pairs of parameter names and parameter data """ tec_ts = set(data.technology) # set of tecs in timeseries sheet common = dict( time="year", time_origin="year", time_dest="year", ) par_dict = defaultdict(list) for t in tec_ts: param_name = data.loc[(data["technology"] == t), "parameter"].unique() for p in set(param_name): val = data.loc[ (data["technology"] == t) & (data["parameter"] == p), "value", ] # units = data.loc[ # (data["technology"] == t) # & (data["parameter"] == p), # "units", # ].values[0] mod = data.loc[ (data["technology"] == t) & (data["parameter"] == p), "mode", ] yr = data.loc[ (data["technology"] == t) & (data["parameter"] == p), "year", ] if p == "var_cost": df = make_df( p, technology=t, value=val, unit="t", year_vtg=yr, year_act=yr, mode=mod, **common, ).pipe(broadcast, node_loc=nodes) else: rg = data.loc[ (data["technology"] == t) & (data["parameter"] == p), "region", ] df = make_df( p, technology=t, value=val, unit="t", year_vtg=yr, year_act=yr, mode=mod, node_loc=rg, **common, ) par_dict[p].append(df) return {par_name: pd.concat(dfs) for par_name, dfs in par_dict.items()}
def gen_data_alu_rel(data: pd.DataFrame, years: list) -> dict[str, pd.DataFrame]: par_dict = defaultdict(list) regions = set(data["Region"].values) for reg in regions: for r in data["relation"].unique(): if r is None: break params = set(data.loc[(data["relation"] == r), "parameter"].values) # This relation should start from 2020... if r == "minimum_recycling_aluminum": modelyears_copy = years[:] if 2020 in modelyears_copy: modelyears_copy.remove(2020) common_rel = dict( year_rel=modelyears_copy, year_act=modelyears_copy, mode="M1", relation=r, ) else: # Use all the model years for other relations... common_rel = dict( year_rel=years, year_act=years, mode="M1", relation=r, ) for par_name in params: if par_name == "relation_activity": tec_list = data.loc[ ((data["relation"] == r) & (data["parameter"] == par_name)), "technology", ] for tec in tec_list.unique(): val = data.loc[ ( (data["relation"] == r) & (data["parameter"] == par_name) & (data["technology"] == tec) & (data["Region"] == reg) ), "value", ].values[0] df = make_df( par_name, technology=tec, value=val, unit="-", node_loc=reg, node_rel=reg, **common_rel, ).pipe(same_node) par_dict[par_name].append(df) elif (par_name == "relation_upper") | (par_name == "relation_lower"): val = data.loc[ ( (data["relation"] == r) & (data["parameter"] == par_name) & (data["Region"] == reg) ), "value", ].values[0] df = make_df( par_name, value=val, unit="-", node_rel=reg, **common_rel ) par_dict[par_name].append(df) return {par_name: pd.concat(dfs) for par_name, dfs in par_dict.items()} def assign_input_outpt( split, param_name: str, regions: pd.DataFrame, val, t: str, rg: str, glb_reg: str, common: dict, yv_ya: pd.DataFrame, nodes, ): # Assign commodity and level names # Later mod can be added com = split[1] lev = split[2] if (param_name == "input") and (lev == "import"): df = make_df( param_name, technology=t, commodity=com, level=lev, value=val[regions[regions == rg].index[0]], unit="t", node_loc=rg, node_origin=glb_reg, **common, ) elif (param_name == "output") and (lev == "export"): df = make_df( param_name, technology=t, commodity=com, level=lev, value=val[regions[regions == rg].index[0]], unit="t", node_loc=rg, node_dest=glb_reg, **common, ) # Assign higher efficiency to younger plants elif ( ((t == "soderberg_aluminum") or (t == "prebake_aluminum")) & (com == "electr") & (param_name == "input") ): # All the vıntage years year_vtg = sorted(set(yv_ya.year_vtg.values)) # Collect the values for the combination of vintage and # active years. input_values_all = [] for yr_v in year_vtg: # The initial year efficiency value input_values_temp = [val[regions[regions == rg].index[0]]] # Reduction after the vintage year year_vtg_filtered = list(filter(lambda op: op >= yr_v, year_vtg)) # Filter the active model years year_act = yv_ya.loc[yv_ya["year_vtg"] == yr_v, "year_act"].values for i in range(len(year_vtg_filtered) - 1): input_values_temp.append(input_values_temp[i] * 1.1) act_year_no = len(year_act) input_values_temp = input_values_temp[-act_year_no:] input_values_all = input_values_all + input_values_temp df = make_df( param_name, technology=t, commodity=com, level=lev, value=input_values_all, unit="t", node_loc=rg, **common, ).pipe(same_node) else: df = make_df( param_name, technology=t, commodity=com, level=lev, value=val[regions[regions == rg].index[0]], unit="t", node_loc=rg, **common, ).pipe(same_node) # Copy parameters to all regions, when node_loc is not GLB if (len(regions) == 1) and (rg != glb_reg): df["node_loc"] = None df = df.pipe(broadcast, node_loc=nodes) # .pipe(same_node) # Use same_node only for non-trade technologies if (lev != "import") and (lev != "export"): df = df.pipe(same_node) return df def gen_data_alu_const( data: pd.DataFrame, config: dict, glb_reg: str, years: Iterable, yv_ya: pd.DataFrame, nodes: list[str], ): results = defaultdict(list) for t in config["technology"]["add"]: t = t.id params = data.loc[(data["technology"] == t), "parameter"].unique() # Obtain the active and vintage years av = data.loc[(data["technology"] == t), "availability"].values[0] years = [year for year in years if year >= av] yv_ya = yv_ya.loc[yv_ya.year_vtg >= av] common = dict( year_vtg=yv_ya.year_vtg, year_act=yv_ya.year_act, mode="M1", time="year", time_origin="year", time_dest="year", ) # Iterate over parameters for par in params: # Obtain the parameter names, commodity,level,emission split = par.split("|") param_name = split[0] # Obtain the scalar value for the parameter val = data.loc[ ((data["technology"] == t) & (data["parameter"] == par)), "value", ] regions = data.loc[ ((data["technology"] == t) & (data["parameter"] == par)), "region", ] for rg in regions: # For the parameters which includes index names if len(split) > 1: if (param_name == "input") | (param_name == "output"): df = assign_input_outpt( split, param_name, regions, val, t, rg, glb_reg, common, yv_ya, nodes, ) elif param_name == "emission_factor": # Assign the emisson type emi = split[1] df = make_df( param_name, technology=t, value=val[regions[regions == rg].index[0]], emission=emi, unit="t", node_loc=rg, **common, ) # Parameters with only parameter name else: df = make_df( param_name, technology=t, value=val[regions[regions == rg].index[0]], unit="t", node_loc=rg, **common, ) # Copy parameters to all regions if ( (len(regions) == 1) and len(set(df["node_loc"])) == 1 and list(set(df["node_loc"]))[0] != glb_reg ): df["node_loc"] = None df = df.pipe(broadcast, node_loc=nodes) results[param_name].append(df) return {par_name: pd.concat(dfs) for par_name, dfs in results.items()}
[docs]def gen_data_aluminum( scenario: message_ix.Scenario, dry_run: bool = False ) -> dict[str, pd.DataFrame]: """ Parameters ---------- scenario: message_ix.Scenario Scenario instance to build aluminum model on dry_run: bool *not implemented* Returns ------- dict[pd.DataFrame] dict with MESSAGEix parameters as keys and parametrization as values stored in pd.DataFrame """ context = read_config() config = context["material"]["aluminum"] # Information about scenario, e.g. node, year s_info = ScenarioInfo(scenario) ssp = get_ssp_from_context(context) # Techno-economic assumptions data_aluminum, data_aluminum_rel, data_aluminum_ts = read_data_aluminum(scenario) # List of data frames, to be concatenated together at end modelyears = s_info.Y yv_ya = s_info.yv_ya nodes = nodes_ex_world(s_info.N) global_region = [i for i in s_info.N if i.endswith("_GLB")][0] const_dict = gen_data_alu_const( data_aluminum, config, global_region, modelyears, yv_ya, nodes ) parname = "demand" demand_dict = {} df = material_demand_calc.derive_demand("aluminum", scenario, ssp=ssp) demand_dict[parname] = df ts_dict = gen_data_alu_ts(data_aluminum_ts, nodes) rel_dict = gen_data_alu_rel(data_aluminum_rel, modelyears) trade_dict = gen_data_alu_trade(scenario) results_aluminum = combine_df_dictionaries( const_dict, ts_dict, rel_dict, demand_dict, trade_dict ) return results_aluminum
def gen_mock_demand_aluminum(scenario: message_ix.Scenario) -> pd.DataFrame: s_info = ScenarioInfo(scenario) nodes = s_info.N nodes.remove("World") # Demand at product level (IAI Global Aluminum Cycle 2018) # Globally: 82.4 Mt # Domestic production + Import # AFR: No Data # CPA - China: 28.2 Mt # EEU / 2 + WEU / 2 = Europe 12.5 Mt # FSU: No data # LAM: South America: 2.5 Mt # MEA: Middle East: 2 # NAM: North America: 14.1 # PAO: Japan: 3 # PAS/2 + SAS /2: Other Asia: 11.5 Mt # Remaining 8.612 Mt shared between AFR and FSU # This is used as 2020 data. # For R12: China and CPA demand divided by 0.1 and 0.9. # The order: # r = ['R12_AFR', 'R12_RCPA', 'R12_EEU', 'R12_FSU', 'R12_LAM', 'R12_MEA',\ # 'R12_NAM', 'R12_PAO', 'R12_PAS', 'R12_SAS', 'R12_WEU',"R12_CHN"] if "R12_CHN" in nodes: nodes.remove("R12_GLB") sheet_n = "data_R12" region_set = "R12_" d = [3, 2, 6, 5, 2.5, 2, 13.6, 3, 4.8, 4.8, 6, 26] else: nodes.remove("R11_GLB") sheet_n = "data_R11" region_set = "R11_" d = [3, 28, 6, 5, 2.5, 2, 13.6, 3, 4.8, 4.8, 6] # SSP2 R11 baseline GDP projection gdp_growth = pd.read_excel( package_data_path("material", "other", "iamc_db ENGAGE baseline GDP PPP.xlsx"), sheet_name=sheet_n, ) gdp_growth = gdp_growth.loc[ (gdp_growth["Scenario"] == "baseline") & (gdp_growth["Region"] != "World") ].drop(["Model", "Variable", "Unit", "Notes", 2000, 2005], axis=1) gdp_growth["Region"] = region_set + gdp_growth["Region"] demand2020_al = ( pd.DataFrame({"Region": nodes, "Val": d}) .join(gdp_growth.set_index("Region"), on="Region") .rename(columns={"Region": "node"}) ) demand2020_al.iloc[:, 3:] = ( demand2020_al.iloc[:, 3:] .div(demand2020_al[2020], axis=0) .multiply(demand2020_al["Val"], axis=0) ) demand2020_al = pd.melt( demand2020_al.drop(["Val", "Scenario"], axis=1), id_vars=["node"], var_name="year", value_name="value", ) return demand2020_al def gen_data_alu_trade(scenario: message_ix.Scenario) -> dict[str, pd.DataFrame]: results = defaultdict(list) data_trade = pd.read_csv( package_data_path("material", "aluminum", "aluminum_trade.csv") ) data_trade.drop_duplicates() s_info = ScenarioInfo(scenario) yv_ya = s_info.yv_ya year_all = yv_ya["year_vtg"].unique() data_trade = data_trade[data_trade["Year"].isin(year_all)] # Divide R12_WEU as 0.7 WEU, 0.3 EEU. data_trade.loc[(data_trade["Region"] == "Europe"), "Value"] *= 0.7 data_trade.loc[(data_trade["Region"] == "Europe"), "Region"] = "West Europe" data_trade_eeu = data_trade[data_trade["Region"] == "West Europe"] data_trade_eeu["Value"] *= 0.3 / 0.7 data_trade_eeu["Region"] = "East Europe" data_trade = pd.concat([data_trade, data_trade_eeu]) # Sum Japan and Oceania as PAO condition = (data_trade["Region"] == "Japan") | (data_trade["Region"] == "Oceania") data_trade_pao = data_trade.loc[condition] data_trade_pao = ( data_trade_pao.groupby(["Variable", "Year"])["Value"].sum().reset_index() ) data_trade_pao["Region"] = "Pacific OECD" data_trade = pd.concat([data_trade, data_trade_pao]) condition_updated = (data_trade["Region"] == "Japan") | ( data_trade["Region"] == "Oceania" ) data_trade = data_trade.drop(data_trade[condition_updated].index) data_trade.reset_index(drop=True, inplace=True) # Divide Other Asia 50-50 to SAS and PAS data_trade.loc[(data_trade["Region"] == "Other Asia"), "Value"] *= 0.5 data_trade.loc[(data_trade["Region"] == "Other Asia"), "Region"] = "South Asia" data_trade_pas = data_trade[data_trade["Region"] == "South Asia"] data_trade_pas["Region"] = "Other Pacific Asia" data_trade = pd.concat([data_trade, data_trade_pas]) # Divide Other Producing Regions 50-50s as Africa and FSU data_trade.loc[(data_trade["Region"] == "Other Producers"), "Value"] *= 0.5 data_trade.loc[(data_trade["Region"] == "Other Producers"), "Region"] = "Africa" data_trade_fsu = data_trade[data_trade["Region"] == "Africa"] data_trade_fsu["Region"] = "Former Soviet Union" data_trade = pd.concat([data_trade, data_trade_fsu]) # Drop non-producers condition = data_trade["Region"] == "Non Producers" data_trade = data_trade.drop(data_trade[condition].index) s_info = ScenarioInfo(scenario) if "R12_CHN" in s_info.N: region_tag = "R12_" china_mapping = "R12_CHN" else: region_tag = "R11_" china_mapping = "R11_CPA" region_mapping = { "China": china_mapping, "West Europe": region_tag + "WEU", "East Europe": region_tag + "EEU", "Pacific OECD": region_tag + "PAO", "South Asia": region_tag + "SAS", "Other Pacific Asia": region_tag + "PAS", "Africa": region_tag + "AFR", "Former Soviet Union": region_tag + "FSU", "Middle East": region_tag + "MEA", "North America": region_tag + "NAM", "South America": region_tag + "LAM", } # Add the data as historical_activity data_trade = data_trade.replace(region_mapping) data_trade.rename( columns={"Region": "node_loc", "Year": "year_act", "Value": "value"}, inplace=True, ) # Trade is at the product level. # For imports this corresponds to: USE|Inputs|Imports data_import = data_trade[data_trade["Variable"] == "USE|Inputs|Imports"] data_import_hist = data_import[data_import["year_act"] <= 2015] data_import_hist["technology"] = "import_aluminum" data_import_hist["mode"] = "M1" data_import_hist["time"] = "year" data_import_hist["unit"] = "-" data_import_hist.drop(["Variable"], axis=1, inplace=True) data_import_hist.reset_index(drop=True) # For exports this corresponds to: MANUFACTURING|Outputs|Exports data_export = data_trade[data_trade["Variable"] == "MANUFACTURING|Outputs|Exports"] data_export_hist = data_export[data_export["year_act"] <= 2015] data_export_hist["technology"] = "export_aluminum" data_export_hist["mode"] = "M1" data_export_hist["time"] = "year" data_export_hist["unit"] = "-" data_export_hist.drop(["Variable"], axis=1, inplace=True) data_export_hist.reset_index(drop=True) results["historical_activity"].append(data_export_hist) results["historical_activity"].append(data_import_hist) # Add data as historical_new_capacity for export for r in data_export_hist["node_loc"].unique(): df_hist_cap = data_export_hist[data_export_hist["node_loc"] == r] df_hist_cap = df_hist_cap.sort_values(by="year_act") df_hist_cap["value_difference"] = df_hist_cap["value"].diff() df_hist_cap["value_difference"] = df_hist_cap["value_difference"].fillna( df_hist_cap["value"] ) df_hist_cap["historical_new_capacity"] = df_hist_cap["value_difference"] / 5 df_hist_cap = df_hist_cap.drop( columns=["mode", "time", "value", "value_difference"], axis=1 ) df_hist_cap.rename( columns={"historical_new_capacity": "value", "year_act": "year_vtg"}, inplace=True, ) df_hist_cap["value"] = df_hist_cap["value"].apply(lambda x: 0 if x < 0 else x) df_hist_cap["unit"] = "-" results["historical_new_capacity"].append(df_hist_cap) # For China fixing 2020 and 2025 values import_chn = data_import[ (data_import["year_act"] == 2020) & (data_import["node_loc"] == "R12_CHN") ] export_chn = data_export[ (data_export["year_act"] == 2020) & (data_export["node_loc"] == "R12_CHN") ] # Merge the DataFrames on 'node_loc' and 'year' merged_df = pd.merge( import_chn, export_chn, on=["node_loc", "year_act"], suffixes=("_import", "_export"), ) # Subtract the 'value_import' from 'value_export' to get net export value merged_df["value"] = merged_df["value_export"] - merged_df["value_import"] # Select relevant columns for the final DataFrame bound_act_net_export_chn = merged_df[["node_loc", "year_act", "value"]] bound_act_net_export_chn["technology"] = "export_aluminum" bound_act_net_export_chn["mode"] = "M1" bound_act_net_export_chn["time"] = "year" bound_act_net_export_chn["unit"] = "-" bound_act_net_export_chn_2025 = bound_act_net_export_chn.replace({2020: 2025}) results["bound_activity_up"].append(bound_act_net_export_chn) results["bound_activity_lo"].append(bound_act_net_export_chn) results["bound_activity_up"].append(bound_act_net_export_chn_2025) results["bound_activity_lo"].append(bound_act_net_export_chn_2025) return {par_name: pd.concat(dfs) for par_name, dfs in results.items()}