Source code for message_ix_models.model.material.data_aluminum

from collections import defaultdict

import message_ix
import pandas as pd
from message_ix import make_df

from message_ix_models import ScenarioInfo
from message_ix_models.util import (
    broadcast,
    nodes_ex_world,
    package_data_path,
    same_node,
)

from .data_util import read_rel, read_timeseries
from .material_demand import material_demand_calc
from .util import combine_df_dictionaries, get_ssp_from_context, read_config


[docs]def read_data_aluminum( scenario: message_ix.Scenario, ) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame): """Read and clean data from :file:`aluminum_techno_economic.xlsx`. Parameters ---------- scenario: message_ix.Scenario Scenario instance to build aluminum on Returns ------- tuple of three pd.DataFrames returns aluminum data in three separate groups time indepenendent parameters, relation parameters and time dependent parameters """ # Ensure config is loaded, get the context s_info = ScenarioInfo(scenario) # Shorter access to sets configuration # sets = context["material"]["generic"] fname = "aluminum_techno_economic.xlsx" if "R12_CHN" in s_info.N: sheet_n = "data_R12" else: sheet_n = "data_R11" # Read the file data_alu = pd.read_excel( package_data_path("material", "aluminum", fname), sheet_name=sheet_n ) # Drop columns that don't contain useful information data_alu = data_alu.drop(["Source", "Description"], axis=1) data_alu_rel = read_rel(scenario, "aluminum", "aluminum_techno_economic.xlsx") data_aluminum_ts = read_timeseries( scenario, "aluminum", "aluminum_techno_economic.xlsx" ) # Unit conversion # At the moment this is done in the excel file, can be also done here # To make sure we use the same units return data_alu, data_alu_rel, data_aluminum_ts
[docs]def gen_data_alu_ts(data: pd.DataFrame, nodes: list) -> dict[str, pd.DataFrame]: """ Generates time variable parameter data for aluminum sector Parameters ---------- data: pd.DataFrame time variable data from input file nodes: list regions of model Returns ------- pd.DataFrame key-value pairs of parameter names and parameter data """ tec_ts = set(data.technology) # set of tecs in timeseries sheet common = dict( time="year", time_origin="year", time_dest="year", ) par_dict = defaultdict(list) for t in tec_ts: param_name = data.loc[(data["technology"] == t), "parameter"].unique() for p in set(param_name): val = data.loc[ (data["technology"] == t) & (data["parameter"] == p), "value", ] # units = data.loc[ # (data["technology"] == t) # & (data["parameter"] == p), # "units", # ].values[0] mod = data.loc[ (data["technology"] == t) & (data["parameter"] == p), "mode", ] yr = data.loc[ (data["technology"] == t) & (data["parameter"] == p), "year", ] if p == "var_cost": df = make_df( p, technology=t, value=val, unit="t", year_vtg=yr, year_act=yr, mode=mod, **common, ).pipe(broadcast, node_loc=nodes) else: rg = data.loc[ (data["technology"] == t) & (data["parameter"] == p), "region", ] df = make_df( p, technology=t, value=val, unit="t", year_vtg=yr, year_act=yr, mode=mod, node_loc=rg, **common, ) par_dict[p].append(df) return {par_name: pd.concat(dfs) for par_name, dfs in par_dict.items()}
def gen_data_alu_rel(data: pd.DataFrame, years: list) -> dict[str, pd.DataFrame]: par_dict = defaultdict(list) regions = set(data["Region"].values) for reg in regions: for r in data["relation"].unique(): if r is None: break params = set(data.loc[(data["relation"] == r), "parameter"].values) # This relation should start from 2020... if r == "minimum_recycling_aluminum": modelyears_copy = years[:] if 2020 in modelyears_copy: modelyears_copy.remove(2020) common_rel = dict( year_rel=modelyears_copy, year_act=modelyears_copy, mode="M1", relation=r, ) else: # Use all the model years for other relations... common_rel = dict( year_rel=years, year_act=years, mode="M1", relation=r, ) for par_name in params: if par_name == "relation_activity": tec_list = data.loc[ ((data["relation"] == r) & (data["parameter"] == par_name)), "technology", ] for tec in tec_list.unique(): val = data.loc[ ( (data["relation"] == r) & (data["parameter"] == par_name) & (data["technology"] == tec) & (data["Region"] == reg) ), "value", ].values[0] df = make_df( par_name, technology=tec, value=val, unit="-", node_loc=reg, node_rel=reg, **common_rel, ).pipe(same_node) par_dict[par_name].append(df) elif (par_name == "relation_upper") | (par_name == "relation_lower"): val = data.loc[ ( (data["relation"] == r) & (data["parameter"] == par_name) & (data["Region"] == reg) ), "value", ].values[0] df = make_df( par_name, value=val, unit="-", node_rel=reg, **common_rel ) par_dict[par_name].append(df) return {par_name: pd.concat(dfs) for par_name, dfs in par_dict.items()} def assign_input_outpt( split, param_name, regions, val, t, rg, glb_reg, common, yv_ya, nodes ): # Assign commodity and level names # Later mod can be added com = split[1] lev = split[2] if (param_name == "input") and (lev == "import"): df = make_df( param_name, technology=t, commodity=com, level=lev, value=val[regions[regions == rg].index[0]], unit="t", node_loc=rg, node_origin=glb_reg, **common, ) elif (param_name == "output") and (lev == "export"): df = make_df( param_name, technology=t, commodity=com, level=lev, value=val[regions[regions == rg].index[0]], unit="t", node_loc=rg, node_dest=glb_reg, **common, ) # Assign higher efficiency to younger plants elif ( ((t == "soderberg_aluminum") or (t == "prebake_aluminum")) & (com == "electr") & (param_name == "input") ): # All the vıntage years year_vtg = sorted(set(yv_ya.year_vtg.values)) # Collect the values for the combination of vintage and # active years. input_values_all = [] for yr_v in year_vtg: # The initial year efficiency value input_values_temp = [val[regions[regions == rg].index[0]]] # Reduction after the vintage year year_vtg_filtered = list(filter(lambda op: op >= yr_v, year_vtg)) # Filter the active model years year_act = yv_ya.loc[yv_ya["year_vtg"] == yr_v, "year_act"].values for i in range(len(year_vtg_filtered) - 1): input_values_temp.append(input_values_temp[i] * 1.1) act_year_no = len(year_act) input_values_temp = input_values_temp[-act_year_no:] input_values_all = input_values_all + input_values_temp df = make_df( param_name, technology=t, commodity=com, level=lev, value=input_values_all, unit="t", node_loc=rg, **common, ).pipe(same_node) else: df = make_df( param_name, technology=t, commodity=com, level=lev, value=val[regions[regions == rg].index[0]], unit="t", node_loc=rg, **common, ).pipe(same_node) # Copy parameters to all regions, when node_loc is not GLB if (len(regions) == 1) and (rg != glb_reg): df["node_loc"] = None df = df.pipe(broadcast, node_loc=nodes) # .pipe(same_node) # Use same_node only for non-trade technologies if (lev != "import") and (lev != "export"): df = df.pipe(same_node) return df def gen_data_alu_const(data, config, glb_reg, years, yv_ya, nodes): results = defaultdict(list) for t in config["technology"]["add"]: params = data.loc[(data["technology"] == t), "parameter"].unique() # Obtain the active and vintage years av = data.loc[(data["technology"] == t), "availability"].values[0] years = [year for year in years if year >= av] yv_ya = yv_ya.loc[yv_ya.year_vtg >= av] common = dict( year_vtg=yv_ya.year_vtg, year_act=yv_ya.year_act, mode="M1", time="year", time_origin="year", time_dest="year", ) # Iterate over parameters for par in params: # Obtain the parameter names, commodity,level,emission split = par.split("|") param_name = split[0] # Obtain the scalar value for the parameter val = data.loc[ ((data["technology"] == t) & (data["parameter"] == par)), "value", ] regions = data.loc[ ((data["technology"] == t) & (data["parameter"] == par)), "region", ] for rg in regions: # For the parameters which includes index names if len(split) > 1: if (param_name == "input") | (param_name == "output"): df = assign_input_outpt( split, param_name, regions, val, t, rg, glb_reg, common, yv_ya, nodes, ) elif param_name == "emission_factor": # Assign the emisson type emi = split[1] df = make_df( param_name, technology=t, value=val[regions[regions == rg].index[0]], emission=emi, unit="t", node_loc=rg, **common, ) # Parameters with only parameter name else: df = make_df( param_name, technology=t, value=val[regions[regions == rg].index[0]], unit="t", node_loc=rg, **common, ) # Copy parameters to all regions if ( (len(regions) == 1) and len(set(df["node_loc"])) == 1 and list(set(df["node_loc"]))[0] != glb_reg ): df["node_loc"] = None df = df.pipe(broadcast, node_loc=nodes) results[param_name].append(df) return {par_name: pd.concat(dfs) for par_name, dfs in results.items()}
[docs]def gen_data_aluminum( scenario: message_ix.Scenario, dry_run: bool = False ) -> dict[str, pd.DataFrame]: """ Parameters ---------- scenario: message_ix.Scenario Scenario instance to build aluminum model on dry_run: bool *not implemented* Returns ------- dict[pd.DataFrame] dict with MESSAGEix parameters as keys and parametrization as values stored in pd.DataFrame """ context = read_config() config = context["material"]["aluminum"] # Information about scenario, e.g. node, year s_info = ScenarioInfo(scenario) ssp = get_ssp_from_context(context) # Techno-economic assumptions data_aluminum, data_aluminum_rel, data_aluminum_ts = read_data_aluminum(scenario) # List of data frames, to be concatenated together at end modelyears = s_info.Y yv_ya = s_info.yv_ya nodes = nodes_ex_world(s_info.N) global_region = [i for i in s_info.N if i.endswith("_GLB")][0] const_dict = gen_data_alu_const( data_aluminum, config, global_region, modelyears, yv_ya, nodes ) parname = "demand" demand_dict = {} df = material_demand_calc.derive_demand( "aluminum", scenario, old_gdp=False, ssp=ssp ) demand_dict[parname] = df ts_dict = gen_data_alu_ts(data_aluminum_ts, nodes) rel_dict = gen_data_alu_rel(data_aluminum_rel, modelyears) results_aluminum = combine_df_dictionaries( const_dict, ts_dict, rel_dict, demand_dict ) return results_aluminum
def gen_mock_demand_aluminum(scenario: message_ix.Scenario) -> pd.DataFrame: s_info = ScenarioInfo(scenario) nodes = s_info.N nodes.remove("World") # Demand at product level (IAI Global Aluminum Cycle 2018) # Globally: 82.4 Mt # Domestic production + Import # AFR: No Data # CPA - China: 28.2 Mt # EEU / 2 + WEU / 2 = Europe 12.5 Mt # FSU: No data # LAM: South America: 2.5 Mt # MEA: Middle East: 2 # NAM: North America: 14.1 # PAO: Japan: 3 # PAS/2 + SAS /2: Other Asia: 11.5 Mt # Remaining 8.612 Mt shared between AFR and FSU # This is used as 2020 data. # For R12: China and CPA demand divided by 0.1 and 0.9. # The order: # r = ['R12_AFR', 'R12_RCPA', 'R12_EEU', 'R12_FSU', 'R12_LAM', 'R12_MEA',\ # 'R12_NAM', 'R12_PAO', 'R12_PAS', 'R12_SAS', 'R12_WEU',"R12_CHN"] if "R12_CHN" in nodes: nodes.remove("R12_GLB") sheet_n = "data_R12" region_set = "R12_" d = [3, 2, 6, 5, 2.5, 2, 13.6, 3, 4.8, 4.8, 6, 26] else: nodes.remove("R11_GLB") sheet_n = "data_R11" region_set = "R11_" d = [3, 28, 6, 5, 2.5, 2, 13.6, 3, 4.8, 4.8, 6] # SSP2 R11 baseline GDP projection gdp_growth = pd.read_excel( package_data_path("material", "other", "iamc_db ENGAGE baseline GDP PPP.xlsx"), sheet_name=sheet_n, ) gdp_growth = gdp_growth.loc[ (gdp_growth["Scenario"] == "baseline") & (gdp_growth["Region"] != "World") ].drop(["Model", "Variable", "Unit", "Notes", 2000, 2005], axis=1) gdp_growth["Region"] = region_set + gdp_growth["Region"] demand2020_al = ( pd.DataFrame({"Region": nodes, "Val": d}) .join(gdp_growth.set_index("Region"), on="Region") .rename(columns={"Region": "node"}) ) demand2020_al.iloc[:, 3:] = ( demand2020_al.iloc[:, 3:] .div(demand2020_al[2020], axis=0) .multiply(demand2020_al["Val"], axis=0) ) demand2020_al = pd.melt( demand2020_al.drop(["Val", "Scenario"], axis=1), id_vars=["node"], var_name="year", value_name="value", ) return demand2020_al