Source code for message_ix_models.model.material.data_ammonia

"""
Data and parameter generation for the ammonia fertilizer sector in
MESSAGEix-Materials model.

This module provides functions to read, process, and generate parameter data
for ammonia fertilizer technologies, demand, trade, and related constraints.
"""

from typing import TYPE_CHECKING, Any, cast

import numpy as np
import pandas as pd
from message_ix import make_df

from message_ix_models import ScenarioInfo
from message_ix_models.model.material.demand import gen_demand_petro
from message_ix_models.model.material.util import (
    get_ssp_from_context,
    maybe_remove_water_tec,
    read_config,
)
from message_ix_models.util import (
    broadcast,
    merge_data,
    nodes_ex_world,
    package_data_path,
    same_node,
)

if TYPE_CHECKING:
    from message_ix import Scenario

    from message_ix_models.types import ParameterData

CONVERSION_FACTOR_NH3_N = 17 / 14

ssp_mode_map = {
    "SSP1": "CTS core",
    "SSP2": "RTS core",
    "SSP3": "RTS high",
    "SSP4": "CTS high",
    "SSP5": "RTS high",
    "LED": "CTS core",  # TODO: move to even lower projection
}

iea_elasticity_map = {
    "CTS core": (0.3, 0.3),
    "CTS high": (0.48, 0.48),
    "RTS core": (0.3, 0.3),
    "RTS high": (0.48, 0.48),
}



[docs]
def gen_all_NH3_fert(scenario: "Scenario", dry_run: bool = False) -> "ParameterData":
    """Generate all MESSAGEix parameter data for the ammonia fertilizer sector.

    Parameters
    ----------
    scenario :
        Scenario instance to build ammonia fertilizer model on.
    dry_run :
        If True, do not perform any file writing or scenario modification.
    """
    s_info = ScenarioInfo(scenario)
    return {
        **gen_data(scenario, s_info),
        **gen_data_rel(s_info),
        **gen_data_ts(s_info),
        **gen_demand(),
        **gen_land_input(scenario),
        **gen_resid_demand_NH3(scenario),
    }




[docs]
def broadcast_years(
    df_new: pd.DataFrame, max_lt: int, act_years: pd.Series, vtg_years: pd.Series
) -> pd.DataFrame:
    """Broadcast parameter DataFrame over scenario years and vintages.

    Parameters
    ----------
    df_new :
        DataFrame to broadcast.
    max_lt :
        Maximum technical lifetime.
    act_years :
        Years in modelling horizon
    vtg_years :
        Vintage model years
    """
    if "year_act" in df_new.columns:
        df_new = df_new.pipe(same_node).pipe(broadcast, year_act=act_years)

        if "year_vtg" in df_new.columns:
            df_new = df_new.pipe(
                broadcast,
                year_vtg=np.linspace(
                    0, int(max_lt / 5), int(max_lt / 5 + 1), dtype=int
                ),
            )
            df_new["year_vtg"] = df_new["year_act"] - 5 * df_new["year_vtg"]
            # remove years that are not in scenario set
            df_new = df_new[~df_new["year_vtg"].isin([2065, 2075, 2085, 2095, 2105])]
    else:
        if "year_vtg" in df_new.columns:
            df_new = df_new.pipe(same_node).pipe(broadcast, year_vtg=vtg_years)
    return df_new




[docs]
def gen_data(
    scenario: "Scenario",
    s_info: ScenarioInfo,
    lower_costs: bool = False,
) -> "ParameterData":
    """Generate time-independent parameter data for ammonia fertilizer technologies.

    Parameters
    ----------
    scenario :
        Scenario instance to generate ammonia model data for.
    lower_costs :
        Switch to apply experimental lower costs for certain regions.
    """
    nodes = nodes_ex_world(s_info.N)

    df = pd.read_csv(
        package_data_path(
            "material",
            "ammonia",
            "data_R12.csv",
        ),
    )

    par_dict = {key: value for (key, value) in df.groupby("parameter")}
    for i in par_dict.keys():
        par_dict[i] = par_dict[i].dropna(axis=1)

    vtg_years = s_info.yv_ya[s_info.yv_ya.year_vtg > 2000]["year_vtg"]
    act_years = s_info.yv_ya[s_info.yv_ya.year_vtg > 2000]["year_act"]
    max_lt = par_dict["technical_lifetime"].value.max()
    vtg_years = vtg_years.drop_duplicates()
    act_years = act_years.drop_duplicates()

    for par_name in par_dict.keys():
        df = par_dict[par_name]
        # remove "default" node name to broadcast with all scenario regions later
        df["node_loc"] = df["node_loc"].apply(lambda x: None if x == "default" else x)

        df_new = make_df(par_name, **df)
        # split df into df with default values and df with regionalized values
        df_new_no_reg = df_new[df_new["node_loc"].isna()]
        df_new_reg = df_new[~df_new["node_loc"].isna()]
        # broadcast regions to default parameter values
        df_new_no_reg = df_new_no_reg.pipe(broadcast, node_loc=nodes)
        df_new = pd.concat([df_new_reg, df_new_no_reg])

        # broadcast scenario years
        df_new = broadcast_years(df_new, max_lt, act_years, vtg_years)
        # set import/export node_dest/origin to GLB for input/output
        set_exp_imp_nodes(df_new)
        par_dict[par_name] = df_new

    if lower_costs:
        par_dict = experiment_lower_CPA_SAS_costs(par_dict)

    df_lifetime = par_dict["technical_lifetime"]
    dict_lifetime = (
        df_lifetime.loc[:, ["technology", "value"]]
        .set_index("technology")
        .to_dict()["value"]
    )

    class missingdict(dict):
        def __missing__(self, key):
            return 1

    dict_lifetime = missingdict(dict_lifetime)
    for i in par_dict.keys():
        if ("year_vtg" in par_dict[i].columns) & ("year_act" in par_dict[i].columns):
            df_temp = par_dict[i]
            df_temp["lifetime"] = df_temp["technology"].map(dict_lifetime)
            df_temp = df_temp[
                (df_temp["year_act"] - df_temp["year_vtg"]) <= df_temp["lifetime"]
            ]
            par_dict[i] = df_temp.drop("lifetime", axis="columns")
    pars = ["inv_cost", "fix_cost"]
    tec_list = [
        "biomass_NH3",
        "electr_NH3",
        "gas_NH3",
        "coal_NH3",
        "fueloil_NH3",
        "biomass_NH3_ccs",
        "gas_NH3_ccs",
        "coal_NH3_ccs",
        "fueloil_NH3_ccs",
    ]
    cost_conv = pd.read_csv(
        package_data_path("material", "ammonia", "cost_conv_nh3.csv"),
        index_col=0,
        dtype={
            "year": int,
            "convergence": float,
        },
    )
    for p in pars:
        conv_cost_df = pd.DataFrame()
        df = par_dict[p]
        for tec in tec_list:
            year_col = "year_vtg" if p == "inv_cost" else "year_act"

            df_tecs = df[df["technology"] == tec]
            df_tecs = df_tecs.merge(cost_conv, left_on=year_col, right_index=True)
            df_tecs_nam = df_tecs[df_tecs["node_loc"] == "R12_NAM"]
            df_tecs = df_tecs.merge(
                df_tecs_nam[[year_col, "value"]], left_on=year_col, right_on=year_col
            )
            df_tecs["diff"] = df_tecs["value_x"] - df_tecs["value_y"]
            df_tecs["diff"] = df_tecs["diff"] * (1 - df_tecs["convergence"])
            df_tecs["new_val"] = df_tecs["value_x"] - df_tecs["diff"]
            df_tecs["value"] = df_tecs["new_val"]
            conv_cost_df = pd.concat([conv_cost_df, make_df(p, **df_tecs)])
        par_dict[p] = pd.concat([df[~df["technology"].isin(tec_list)], conv_cost_df])

    # HACK: quick fix to enable compatibility with water build
    maybe_remove_water_tec(scenario, par_dict)
    # add 2020 and 2025 CCS bounds
    merge_data(par_dict, gen_ccs_bounds(s_info))

    return par_dict




[docs]
def gen_data_rel(s_info: ScenarioInfo) -> "ParameterData":
    """Generate relation parameter data for ammonia fertilizer sector.

    Parameters
    ----------
    s_info:
        Scenario information to infer model regions and years.
    """
    nodes = s_info.N
    if "World" in nodes:
        nodes.pop(nodes.index("World"))
    if "R12_GLB" in nodes:
        nodes.pop(nodes.index("R12_GLB"))

    df = pd.read_csv(
        package_data_path(
            "material",
            "ammonia",
            "relations_R12.csv",
        ),
    )
    df.groupby("parameter")
    par_dict = {key: value for (key, value) in df.groupby("parameter")}
    # for i in par_dict.keys():
    # par_dict[i] = par_dict[i].dropna(axis=1)

    act_years = s_info.yv_ya[s_info.yv_ya.year_vtg > 2000]["year_act"]
    act_years = act_years.drop_duplicates()

    for par_name in par_dict.keys():
        df = par_dict[par_name]
        # remove "default" node name to broadcast with all scenario regions later
        df["node_rel"] = df["node_rel"].apply(lambda x: None if x == "all" else x)
        df_dict = cast(dict[str, Any], df.to_dict())
        df = make_df(par_name, **df_dict)
        # split df into df with default values and df with regionalized values
        df_all_regs = df[df["node_rel"].isna()]
        df_single_regs = df.copy(deep=True).loc[~df["node_rel"].isna()]

        # broadcast regions to default parameter values
        df_all_regs = df_all_regs.pipe(broadcast, node_rel=nodes)

        def same_node_if_nan(df):
            if df["node_loc"] == "same":
                df["node_loc"] = df["node_rel"]
            return df

        if "node_loc" in df_all_regs.columns:
            df_all_regs = df_all_regs.apply(lambda x: same_node_if_nan(x), axis=1)

        if "node_loc" in df_single_regs.columns:
            df_single_regs["node_loc"] = df_single_regs["node_loc"].apply(
                lambda x: None if x == "all" else x
            )
            df_new_reg_all_regs = df_single_regs.copy(deep=True).loc[
                df_single_regs["node_loc"].isna()
            ]
            df_new_reg_all_regs = df_new_reg_all_regs.pipe(broadcast, node_loc=nodes)
            df_single_regs = pd.concat(
                [
                    df_single_regs[~df_single_regs["node_loc"].isna()],
                    df_new_reg_all_regs,
                ]
            )

        df = pd.concat([df_single_regs, df_all_regs])

        # broadcast scenario years
        if "year_rel" in df.columns:
            df = df.pipe(broadcast, year_rel=act_years)

        if "year_act" in df.columns:
            df["year_act"] = df["year_rel"]

        # set import/export node_dest/origin to GLB for input/output
        set_exp_imp_nodes(df)
        par_dict[par_name] = df

    return par_dict




[docs]
def gen_data_ts(
    s_info: ScenarioInfo,
) -> "ParameterData":
    """Generate time-series parameter data for ammonia fertilizer sector.

    Parameters
    ----------
    s_info:
        Scenario information to infer model regions.
    """
    # s_info.yv_ya
    nodes = s_info.N
    if "World" in nodes:
        nodes.pop(nodes.index("World"))
    if "R12_GLB" in nodes:
        nodes.pop(nodes.index("R12_GLB"))

    df = pd.read_csv(
        package_data_path(
            "material",
            "ammonia",
            "timeseries_R12.csv",
        ),
    )
    df["year_act"] = df["year_act"].astype("Int64")
    df["year_vtg"] = df["year_vtg"].astype("Int64")
    par_dict = {key: value for (key, value) in df.groupby("parameter")}
    for i in par_dict.keys():
        par_dict[i] = par_dict[i].dropna(axis=1)

    for par_name in par_dict.keys():
        df = par_dict[par_name]
        # remove "default" node name to broadcast with all scenario regions later
        df["node_loc"] = df["node_loc"].apply(lambda x: None if x == "default" else x)

        df_new = make_df(par_name, **df)
        # split df into df with default values and df with regionalized values
        df_new_no_reg = df_new[df_new["node_loc"].isna()]
        df_new_reg = df_new[~df_new["node_loc"].isna()]
        # broadcast regions to default parameter values
        df_new_no_reg = df_new_no_reg.pipe(broadcast, node_loc=nodes)
        df_new = pd.concat([df_new_reg, df_new_no_reg])

        # set import/export node_dest/origin to GLB for input/output
        set_exp_imp_nodes(df_new)
        par_dict[par_name] = df_new

    # convert floats
    par_dict["historical_activity"] = par_dict["historical_activity"].astype(
        {"year_act": "int32"}
    )
    par_dict["historical_new_capacity"] = par_dict["historical_new_capacity"].astype(
        {"year_vtg": "int32"}
    )
    par_dict["bound_activity_lo"] = par_dict["bound_activity_lo"].astype(
        {"year_act": "int32"}
    )

    return par_dict




[docs]
def set_exp_imp_nodes(df: pd.DataFrame) -> None:
    """Helper to set import/export node_dest/origin to GLB for input/output technologies

    Parameters
    ----------
    df:
        DataFrame with technology and node columns.
    """
    if "node_dest" in df.columns:
        df.loc[df["technology"].str.contains("export"), "node_dest"] = "R12_GLB"
    if "node_origin" in df.columns:
        df.loc[df["technology"].str.contains("import"), "node_origin"] = "R12_GLB"




[docs]
def read_demand() -> dict[str, pd.DataFrame | pd.Series]:
    """Read and clean demand and trade data for ammonia fertilizer."""

    N_demand_GLO = pd.read_csv(
        package_data_path(
            "material",
            "ammonia",
            "NFertilizer_demand.csv",
        ),
    )
    N_demand_GLO = N_demand_GLO.set_axis(
        [int(i) if i.isdigit() else i for i in N_demand_GLO.columns], axis=1
    )

    # NH3 feedstock share by region in 2010 (from http://ietd.iipnetwork.org/content/ammonia#benchmarks)
    feedshare_GLO = pd.read_csv(
        package_data_path(
            "material",
            "ammonia",
            "NH3_feedstock_share.csv",
        ),
        skiprows=14,
    )

    # Read parameters in xlsx
    te_params = pd.read_csv(
        package_data_path("material", "ammonia", "old_TE_sheet.csv"),
        nrows=72,
    )
    te_params = te_params.set_axis(
        [int(i) if i.isdigit() else i for i in te_params.columns], axis=1
    )

    n_inputs_per_tech = 12  # Number of input params per technology

    input_fuel = te_params[2010][
        list(range(4, te_params.shape[0], n_inputs_per_tech))
    ].reset_index(drop=True)

    capacity_factor = te_params[2010][
        list(range(11, te_params.shape[0], n_inputs_per_tech))
    ].reset_index(drop=True)

    # Regional N demand in 2010
    ND = N_demand_GLO.loc[N_demand_GLO.Scenario == "NoPolicy", ["Region", 2010]]
    ND = ND[ND.Region != "World"]
    ND.Region = "R12_" + ND.Region
    ND = ND.set_index("Region")

    # Derive total energy (GWa) of NH3 production (based on demand 2010)
    N_energy = feedshare_GLO[feedshare_GLO.Region != "R12_GLB"].join(ND, on="Region")
    N_energy = pd.concat(
        [
            N_energy.Region,
            N_energy[["gas_pct", "coal_pct", "oil_pct"]].multiply(
                N_energy[2010], axis="index"
            ),
        ],
        axis=1,
    )
    N_energy.gas_pct *= input_fuel[2] * CONVERSION_FACTOR_NH3_N  # NH3 / N
    N_energy.coal_pct *= input_fuel[3] * CONVERSION_FACTOR_NH3_N
    N_energy.oil_pct *= input_fuel[4] * CONVERSION_FACTOR_NH3_N
    N_energy = pd.concat(
        [N_energy.Region, N_energy.sum(axis=1, numeric_only=True)], axis=1
    ).rename(columns={0: "totENE", "Region": "node"})  # GWa

    # N_trade_R12 = pd.read_csv(
    #    package_data_path("material", "ammonia", "trade.FAO.R12.csv"), index_col=0
    # )
    N_trade_R12 = pd.read_csv(
        package_data_path(
            "material",
            "ammonia",
            "NFertilizer_trade.csv",
        ),
    )

    N_trade_R12.region = "R12_" + N_trade_R12.region
    N_trade_R12.quantity = N_trade_R12.quantity
    N_trade_R12.unit = "t"
    N_trade_R12 = N_trade_R12.assign(time="year")
    N_trade_R12 = N_trade_R12.rename(
        columns={
            "quantity": "value",
            "region": "node_loc",
            "year": "year_act",
        }
    )

    df = N_trade_R12.loc[N_trade_R12.year_act == 2010,]
    df = df.pivot(index="node_loc", columns="type", values="value")
    NP = pd.DataFrame({"netimp": df["import"] - df.export, "demand": ND[2010]})
    NP["prod"] = NP.demand - NP.netimp

    # NH3_trade_R12 = pd.read_csv(
    #    package_data_path(
    #        "material", "ammonia", "NH3_trade_BACI_R12_aggregation.csv"
    #    )
    # )  # , index_col=0)
    NH3_trade_R12 = pd.read_csv(
        package_data_path(
            "material",
            "ammonia",
            "NH3_trade_R12_aggregated.csv",
        ),
    )

    NH3_trade_R12.region = "R12_" + NH3_trade_R12.region
    NH3_trade_R12.quantity = NH3_trade_R12.quantity / 1e6
    NH3_trade_R12.unit = "t"
    NH3_trade_R12 = NH3_trade_R12.assign(time="year")
    NH3_trade_R12 = NH3_trade_R12.rename(
        columns={
            "quantity": "value",
            "region": "node_loc",
            "year": "year_act",
        }
    )

    # Derive total energy (GWa) of NH3 production (based on demand 2010)
    N_feed = feedshare_GLO[feedshare_GLO.Region != "R11_GLB"].join(NP, on="Region")
    N_feed = pd.concat(
        [
            N_feed.Region,
            N_feed[["gas_pct", "coal_pct", "oil_pct"]].multiply(
                N_feed["prod"], axis="index"
            ),
        ],
        axis=1,
    )
    N_feed.gas_pct *= input_fuel[2] * 17 / 14
    N_feed.coal_pct *= input_fuel[3] * 17 / 14
    N_feed.oil_pct *= input_fuel[4] * 17 / 14
    N_feed = pd.concat(
        [N_feed.Region, N_feed.sum(axis=1, numeric_only=True)], axis=1
    ).rename(columns={0: "totENE", "Region": "node"})

    # Process the regional historical activities

    fs_GLO = feedshare_GLO.copy()
    fs_GLO.insert(1, "bio_pct", 0.0)
    fs_GLO.insert(2, "elec_pct", 0.0)
    # 17/14 NH3:N ratio, to get NH3 activity based on N demand
    # => No NH3 loss assumed during production

    fs_GLO.iloc[:, 1:6] = input_fuel[5] * fs_GLO.iloc[:, 1:6]
    fs_GLO.insert(6, "NH3_to_N", 1)

    # Share of feedstocks for NH3 prodution
    # (based on 2010 => Assumed fixed for any past years)
    feedshare = fs_GLO.sort_values(["Region"]).set_index("Region").drop("R12_GLB")

    # Get historical N demand from SSP2-nopolicy (may need to vary for diff scenarios)
    N_demand_raw = N_demand_GLO[N_demand_GLO["Region"] != "World"].copy()
    N_demand_raw["Region"] = "R12_" + N_demand_raw["Region"]
    N_demand_raw = N_demand_raw.set_index("Region")
    N_demand = (
        N_demand_raw.loc[
            (N_demand_raw.Scenario == "NoPolicy")  # & (N_demand_raw.Region != "World")
        ]
        # .reset_index()
        .loc[:, 2010]
    )  # 2010 tot N demand
    # N_demand = N_demand.repeat(6)
    # act2010 = (feedshare.values.flatten() * N_demand).reset_index(drop=True)

    return {
        "act2010": feedshare.mul(N_demand, axis=0),
        "feedshare_GLO": feedshare_GLO,
        "ND": ND,
        "N_energy": N_energy,
        "feedshare": feedshare,
        # "act2010": act2010,
        "capacity_factor": capacity_factor,
        "N_feed": N_feed,
        "N_trade_R12": N_trade_R12,
        "NH3_trade_R12": NH3_trade_R12,
    }




[docs]
def gen_demand() -> "ParameterData":
    """Generate adjusted i_feed demand with explicit ammonia demand."""
    N_energy = read_demand()["N_feed"]  # updated feed with imports accounted

    demand_fs_org = pd.read_csv(
        package_data_path("material", "ammonia", "demand_i_feed_R12.csv"),
    )

    df = demand_fs_org.loc[demand_fs_org.year == 2010, :].join(
        N_energy.set_index("node"), on="node"
    )
    sh = pd.DataFrame(
        {
            "node": demand_fs_org.loc[demand_fs_org.year == 2010, "node"],
            "r_feed": df.totENE / df.value,
        }
    )  # share of NH3 energy among total feedstock (no trade assumed)
    df = demand_fs_org.join(sh.set_index("node"), on="node")
    df.value *= 1 - df.r_feed  # Carve out the same % from tot i_feed values
    df = df.drop("r_feed", axis=1)
    df = df.drop("Unnamed: 0", axis=1)
    # TODO: refactor with a more sophisticated solution to reduce i_feed
    df.loc[df["value"] < 0, "value"] = 0  # temporary solution to avoid negative values
    return {"demand": df}




[docs]
def gen_resid_demand_NH3(scenario: "Scenario") -> "ParameterData":
    """Generate residual demand for ammonia excluding fertilizer use.

    Parameters
    ----------
    scenario:
        Scenario to generate residual demand for.
    """
    context = read_config()
    ssp = get_ssp_from_context(context)

    default_gdp_elasticity_2020, default_gdp_elasticity_2030 = iea_elasticity_map[
        ssp_mode_map[ssp]
    ]
    df_2025 = pd.read_csv(package_data_path("material", "ammonia", "demand_2025.csv"))
    df_residual = gen_demand_petro(
        scenario, "NH3", default_gdp_elasticity_2020, default_gdp_elasticity_2030
    )
    df_residual = df_residual[df_residual["year"] != 2025]
    df_residual = pd.concat([df_2025, df_residual])

    return {"demand": df_residual}




[docs]
def gen_land_input(scenario: "Scenario") -> "ParameterData":
    """Read fertilizer demand from GLOBIOM soft-link and return as land input parameter.

    Parameters
    ----------
    scenario:
        Scenario to query fertilizer demand from.
    """
    df = scenario.par("land_output", {"commodity": "Fertilizer Use|Nitrogen"})
    df["level"] = "final_material"
    return {"land_input": df}




[docs]
def experiment_lower_CPA_SAS_costs(
    par_dict: dict[str, pd.DataFrame],
) -> dict[str, pd.DataFrame]:
    """Lower coal and fueloil based production costs for CPA and SAS regions.

    Parameters
    ----------
    par_dict:
        Dictionary with parameter data including cost parameters.
    """
    cost_list = ["inv_cost", "fix_cost"]
    scaler = {
        "R12_RCPA": [0.66 * 0.91, 0.75 * 0.9],
        "R12_CHN": [0.66 * 0.91, 0.75 * 0.9],
        "R12_SAS": [0.59, 1],
    }
    tec_list = ["fueloil_NH3", "coal_NH3"]
    for c in cost_list:
        df = par_dict[c]
        for k in scaler.keys():
            df_tmp = df.loc[df["node_loc"] == k]
            for e, t in enumerate(tec_list):
                df_tmp.loc[df_tmp["technology"] == t, "value"] = df_tmp.loc[
                    df_tmp["technology"] == t, "value"
                ].mul(scaler[k][e])
                df_tmp.loc[df_tmp["technology"] == t + "_ccs", "value"] = df_tmp.loc[
                    df_tmp["technology"] == t + "_ccs", "value"
                ].mul(scaler[k][e])
            df.loc[df["node_loc"] == k] = df_tmp
        par_dict[c] = df

    return par_dict




[docs]
def gen_ccs_bounds(s_info) -> "ParameterData":
    """Generate bounds for CCS technologies for 2020 and 2025.

    Parameters
    ----------
    s_info:
        Scenario information used to determine the nodes and years.
    """
    common = {
        "technology": [
            "biomass_NH3_ccs",
            "gas_NH3_ccs",
            "coal_NH3_ccs",
            "fueloil_NH3_ccs",
        ],
        "mode": "M1",
        "time": "year",
        "unit": "???",
        "value": 0,
    }
    df = (
        make_df("bound_activity_up", **common)
        .pipe(broadcast, node_loc=nodes_ex_world(s_info.N))
        .pipe(broadcast, year_act=[2020, 2025])
    )
    return {"bound_activity_up": df}