Source code for message_ix_models.model.material.data_cement

from collections import defaultdict

import message_ix
import pandas as pd
from message_ix import make_df

from message_ix_models import ScenarioInfo
from message_ix_models.model.material.data_util import (
    calculate_ini_new_cap,
    read_sector_data,
    read_timeseries,
)
from message_ix_models.model.material.material_demand import material_demand_calc
from message_ix_models.model.material.util import get_ssp_from_context, read_config
from message_ix_models.util import (
    broadcast,
    nodes_ex_world,
    package_data_path,
    same_node,
)


def gen_mock_demand_cement(scenario: message_ix.Scenario) -> pd.DataFrame:
    s_info = ScenarioInfo(scenario)
    nodes = s_info.N
    nodes.remove("World")

    # 2019 production by country (USGS)
    # p43 of https://pubs.usgs.gov/periodicals/mcs2020/mcs2020-cement.pdf

    # For R12: China and CPA demand divided by 0.1 and 0.9.

    # The order:
    # r = ['R12_AFR', 'R12_RCPA', 'R12_EEU', 'R12_FSU', 'R12_LAM', 'R12_MEA',\
    # 'R12_NAM', 'R12_PAO', 'R12_PAS', 'R12_SAS', 'R12_WEU',"R12_CHN"]

    if "R12_CHN" in nodes:
        nodes.remove("R12_GLB")
        sheet_n = "data_R12"
        region_set = "R12_"

        demand2020_top = [76, 229.5, 0, 57, 55, 60, 89, 54, 129, 320, 51, 2065.5]
        # the rest (~900 Mt) allocated by % values in http://www.cembureau.eu/media/clkdda45/activity-report-2019.pdf
        demand2020_rest = [
            4100 * 0.051 - 76,
            (4100 * 0.14 - 155) * 0.2 * 0.1,
            4100 * 0.064 * 0.5,
            4100 * 0.026 - 57,
            4100 * 0.046 * 0.5 - 55,
            (4100 * 0.14 - 155) * 0.2,
            4100 * 0.046 * 0.5,
            12,
            4100 * 0.003,
            (4100 * 0.14 - 155) * 0.6,
            4100 * 0.064 * 0.5 - 51,
            (4100 * 0.14 - 155) * 0.2 * 0.9,
        ]
    else:
        nodes.remove("R11_GLB")
        sheet_n = "data_R11"
        region_set = "R11_"

        demand2020_top = [76, 2295, 0, 57, 55, 60, 89, 54, 129, 320, 51]
        # the rest (~900 Mt) allocated by % values in http://www.cembureau.eu/media/clkdda45/activity-report-2019.pdf
        demand2020_rest = [
            4100 * 0.051 - 76,
            (4100 * 0.14 - 155) * 0.2,
            4100 * 0.064 * 0.5,
            4100 * 0.026 - 57,
            4100 * 0.046 * 0.5 - 55,
            (4100 * 0.14 - 155) * 0.2,
            4100 * 0.046 * 0.5,
            12,
            4100 * 0.003,
            (4100 * 0.14 - 155) * 0.6,
            4100 * 0.064 * 0.5 - 51,
        ]

    # SSP2 R11 baseline GDP projection
    gdp_growth = pd.read_excel(
        package_data_path("material", "other", "iamc_db ENGAGE baseline GDP PPP.xlsx"),
        sheet_name=sheet_n,
    )

    gdp_growth = gdp_growth.loc[
        (gdp_growth["Scenario"] == "baseline") & (gdp_growth["Region"] != "World")
    ].drop(["Model", "Variable", "Unit", "Notes", 2000, 2005], axis=1)

    d = [a + b for a, b in zip(demand2020_top, demand2020_rest)]
    gdp_growth["Region"] = region_set + gdp_growth["Region"]

    # # Regions setting for IMAGE
    # region_cement = pd.read_excel(
    #     package_data_path("material",  "CEMENT.BvR2010.xlsx"),
    #     sheet_name="Timer_Regions", skiprows=range(0,3))[['Region #', 'Name']]\
    #     .drop_duplicates().sort_values(by='Region #')
    #
    # region_cement = region_cement.loc[region_cement['Region #'] < 999]
    # region_cement['node'] = \
    #     ['R11_NAM', 'R11_NAM',
    #      'R11_LAM', 'R11_LAM',
    #      'R11_LAM', 'R11_LAM',
    #      'R11_AFR', 'R11_AFR',
    #      'R11_AFR', 'R11_AFR',
    #      'R11_WEU', 'R11_EEU',
    #      'R11_EEU', 'R11_FSU',
    #      'R11_FSU', 'R11_FSU',
    #      'R11_MEA', 'R11_SAS',
    #      'R11_PAS', 'R11_CPA',
    #      'R11_PAS', 'R11_PAS',
    #      'R11_PAO', 'R11_PAO',
    #      'R11_SAS', 'R11_AFR']
    #
    # # Cement demand 2010 [Mt/year] (IMAGE)
    # demand2010_cement = pd.read_excel(
    #     package_data_path("material",  "CEMENT.BvR2010.xlsx"),
    #     sheet_name="Domestic Consumption", skiprows=range(0,3)).\
    #     groupby(by=["Region #"]).sum()[[2010]].\
    #     join(region_cement.set_index('Region #'), on='Region #').\
    #     rename(columns={2010:'value'})
    #
    # demand2010_cement = demand2010_cement.groupby(by=['node']).sum().reset_index()
    # demand2010_cement['value'] = demand2010_cement['value'] / 1e9 # kg to Mt

    # Directly assigned countries from the table on p43

    demand2020_cement = (
        pd.DataFrame({"Region": nodes, "value": d})
        .join(gdp_growth.set_index("Region"), on="Region")
        .rename(columns={"Region": "node"})
    )

    # demand2010_cement = demand2010_cement.\
    #    join(gdp_growth.rename(columns={'Region':'node'}).set_index('node'), on='node')

    demand2020_cement.iloc[:, 3:] = (
        demand2020_cement.iloc[:, 3:]
        .div(demand2020_cement[2020], axis=0)
        .multiply(demand2020_cement["value"], axis=0)
    )

    # Do this if we have 2020 demand values for buildings
    # sp = get_spec()
    # if 'buildings' in sp['add'].set['technology']:
    #     val = get_scen_mat_demand("cement",scenario) # Mt in 2020
    #     print("Base year demand of {}:".format("cement"), val)
    #     # demand2020_cement['value'] = demand2020_cement['value'] - val['value']
    #     # Scale down all years' demand values by the 2020 ratio
    #     demand2020_cement.iloc[:,3:] =  demand2020_cement.iloc[:,3:].\
    #         multiply(demand2020_cement[2020]- val['value'], axis=0).\
    #         div(demand2020_cement[2020], axis=0)
    #     print("UPDATE {} demand for 2020!".format("cement"))
    #
    demand2020_cement = pd.melt(
        demand2020_cement.drop(["value", "Scenario"], axis=1),
        id_vars=["node"],
        var_name="year",
        value_name="value",
    )

    return demand2020_cement


[docs]def gen_data_cement( scenario: message_ix.Scenario, dry_run: bool = False ) -> dict[str, pd.DataFrame]: """Generate data for materials representation of cement industry.""" # Load configuration context = read_config() config = read_config()["material"]["cement"] ssp = get_ssp_from_context(context) # Information about scenario, e.g. node, year s_info = ScenarioInfo(scenario) context.datafile = "Global_steel_cement_MESSAGE.xlsx" # Techno-economic assumptions data_cement = read_sector_data(scenario, "cement", "Global_cement_MESSAGE.xlsx") # Special treatment for time-dependent Parameters data_cement_ts = read_timeseries(scenario, "cement", "Global_cement_MESSAGE.xlsx") tec_ts = set(data_cement_ts.technology) # set of tecs with var_cost # List of data frames, to be concatenated together at end results = defaultdict(list) # For each technology there are different input and output combinations # Iterate over technologies yv_ya = s_info.yv_ya yv_ya = yv_ya.loc[yv_ya.year_vtg >= 1980] # Do not parametrize GLB region the same way nodes = nodes_ex_world(s_info.N) for t in config["technology"]["add"]: t = t.id params = data_cement.loc[(data_cement["technology"] == t), "parameter"].unique() # Special treatment for time-varying params if t in tec_ts: common = dict( time="year", time_origin="year", time_dest="year", ) param_name = data_cement_ts.loc[ (data_cement_ts["technology"] == t), "parameter" ] for p in set(param_name): val = data_cement_ts.loc[ (data_cement_ts["technology"] == t) & (data_cement_ts["parameter"] == p), "value", ] # units = data_cement_ts.loc[ # (data_cement_ts["technology"] == t) # & (data_cement_ts["parameter"] == p), # "units", # ].values[0] mod = data_cement_ts.loc[ (data_cement_ts["technology"] == t) & (data_cement_ts["parameter"] == p), "mode", ] yr = data_cement_ts.loc[ (data_cement_ts["technology"] == t) & (data_cement_ts["parameter"] == p), "year", ] if p == "var_cost": df = make_df( p, technology=t, value=val, unit="t", year_vtg=yr, year_act=yr, mode=mod, **common, ).pipe(broadcast, node_loc=nodes) else: rg = data_cement_ts.loc[ (data_cement_ts["technology"] == t) & (data_cement_ts["parameter"] == p), "region", ] df = make_df( p, technology=t, value=val, unit="t", year_vtg=yr, year_act=yr, mode=mod, node_loc=rg, **common, ) results[p].append(df) # Iterate over parameters for par in params: # Obtain the parameter names, commodity,level,emission split = par.split("|") param_name = split[0] # Obtain the scalar value for the parameter val = data_cement.loc[ ((data_cement["technology"] == t) & (data_cement["parameter"] == par)), "value", ] # .values regions = data_cement.loc[ ((data_cement["technology"] == t) & (data_cement["parameter"] == par)), "region", ] # .values common = dict( year_vtg=yv_ya.year_vtg, year_act=yv_ya.year_act, # mode="M1", time="year", time_origin="year", time_dest="year", ) for rg in regions: # For the parameters which inlcudes index names if len(split) > 1: if (param_name == "input") | (param_name == "output"): # Assign commodity and level names com = split[1] lev = split[2] mod = split[3] df = make_df( param_name, technology=t, commodity=com, level=lev, value=val[regions[regions == rg].index[0]], mode=mod, unit="t", node_loc=rg, **common, ).pipe(same_node) elif param_name == "emission_factor": # Assign the emisson type emi = split[1] mod = split[2] df = make_df( param_name, technology=t, value=val[regions[regions == rg].index[0]], emission=emi, mode=mod, unit="t", node_loc=rg, **common, ) # .pipe(broadcast, \ # node_loc=nodes)) else: # time-independent var_cost mod = split[1] df = make_df( param_name, technology=t, value=val[regions[regions == rg].index[0]], mode=mod, unit="t", node_loc=rg, **common, ) # .pipe(broadcast, node_loc=nodes)) # Parameters with only parameter name else: df = make_df( param_name, technology=t, value=val[regions[regions == rg].index[0]], unit="t", node_loc=rg, **common, ) # .pipe(broadcast, node_loc=nodes)) if len(regions) == 1: df["node_loc"] = None df = df.pipe(broadcast, node_loc=nodes).pipe(same_node) results[param_name].append(df) # Create external demand param parname = "demand" df_demand = material_demand_calc.derive_demand("cement", scenario, ssp=ssp) results[parname].append(df_demand) # Add CCS as addon parname = "addon_conversion" technology_1 = ["clinker_dry_cement"] df_1 = make_df( parname, mode="M1", type_addon="dry_ccs_cement", value=1, unit="-", **common ).pipe(broadcast, node=nodes, technology=technology_1) technology_2 = ["clinker_wet_cement"] df_2 = make_df( parname, mode="M1", type_addon="wet_ccs_cement", value=1, unit="-", **common ).pipe(broadcast, node=nodes, technology=technology_2) results[parname].append(df_1) results[parname].append(df_2) # Concatenate to one data frame per parameter results = {par_name: pd.concat(dfs) for par_name, dfs in results.items()} results["initial_new_capacity_up"] = pd.concat( [ calculate_ini_new_cap( df_demand=df_demand.copy(deep=True), technology="clinker_dry_ccs_cement", material="cement", ), calculate_ini_new_cap( df_demand=df_demand.copy(deep=True), technology="clinker_wet_ccs_cement", material="cement", ), ] ) return results