"""Prepare data for water use for cooling & energy technologies."""
import numpy as np
import pandas as pd
from message_ix import Scenario, make_df
from message_ix_models import Context
from message_ix_models.model.water.data.demands import read_water_availability
from message_ix_models.model.water.utils import map_yv_ya_lt
from message_ix_models.util import (
broadcast,
minimum_version,
package_data_path,
same_node,
same_time,
)
[docs]
@minimum_version("message_ix 3.7")
def map_basin_region_wat(context: "Context") -> pd.DataFrame:
"""
Calculate share of water availability of basins per each parent region.
The parent region could be global message regions or country
Parameters
----------
context : .Context
Returns
-------
data : pandas.DataFrame
"""
info = context["water build info"]
if "year" in context.time:
PATH = package_data_path(
"water", "delineation", f"basins_by_region_simpl_{context.regions}.csv"
)
df_x = pd.read_csv(PATH)
# Adding freshwater supply constraints
# Reading data, the data is spatially and temprally aggregated from GHMs
path1 = package_data_path(
"water",
"availability",
f"qtot_5y_{context.RCP}_{context.REL}_{context.regions}.csv",
)
df_sw = pd.read_csv(path1)
df_sw.drop(["Unnamed: 0"], axis=1, inplace=True)
# Reading data, the data is spatially and temporally aggregated from GHMs
df_sw["BCU_name"] = df_x["BCU_name"]
df_sw["MSGREG"] = (
context.map_ISO_c[context.regions]
if context.type_reg == "country"
else f"{context.regions}_" + df_sw["BCU_name"].str.split("|").str[-1]
)
df_sw = df_sw.set_index(["MSGREG", "BCU_name"])
# Calculating ratio of water availability in basin by region
df_sw = df_sw.groupby(["MSGREG"]).apply(lambda x: x / x.sum())
df_sw.reset_index(level=0, drop=True, inplace=True)
df_sw.reset_index(inplace=True)
df_sw["Region"] = "B" + df_sw["BCU_name"].astype(str)
df_sw["Mode"] = df_sw["Region"].replace(regex=["^B"], value="M")
df_sw.drop(columns=["BCU_name"], inplace=True)
df_sw.set_index(["MSGREG", "Region", "Mode"], inplace=True)
df_sw = df_sw.stack().reset_index(level=0).reset_index()
df_sw.columns = pd.Index(["region", "mode", "date", "MSGREG", "share"])
df_sw.sort_values(["region", "date", "MSGREG", "share"], inplace=True)
df_sw["year"] = pd.DatetimeIndex(df_sw["date"]).year
df_sw["time"] = "year"
df_sw = df_sw[df_sw["year"].isin(info.Y)]
df_sw.reset_index(drop=True, inplace=True)
else:
# add water return flows for cooling tecs
# Use share of basin availability to distribute the return flow from
path3 = package_data_path(
"water",
"availability",
f"qtot_5y_m_{context.RCP}_{context.REL}_{context.regions}.csv",
)
df_sw = pd.read_csv(path3)
# reading sample for assiging basins
PATH = package_data_path(
"water", "delineation", f"basins_by_region_simpl_{context.regions}.csv"
)
df_x = pd.read_csv(PATH)
# Reading data, the data is spatially and temporally aggregated from GHMs
df_sw["BCU_name"] = df_x["BCU_name"]
df_sw["MSGREG"] = (
context.map_ISO_c[context.regions]
if context.type_reg == "country"
else f"{context.regions}_" + df_sw["BCU_name"].str.split("|").str[-1]
)
df_sw = df_sw.set_index(["MSGREG", "BCU_name"])
df_sw.drop(columns="Unnamed: 0", inplace=True)
# Calculating ratio of water availability in basin by region
df_sw = df_sw.groupby(["MSGREG"]).apply(lambda x: x / x.sum())
df_sw.reset_index(level=0, drop=True, inplace=True)
df_sw.reset_index(inplace=True)
df_sw["Region"] = "B" + df_sw["BCU_name"].astype(str)
df_sw["Mode"] = df_sw["Region"].replace(regex=["^B"], value="M")
df_sw.drop(columns=["BCU_name"], inplace=True)
df_sw.set_index(["MSGREG", "Region", "Mode"], inplace=True)
df_sw = df_sw.stack().reset_index(level=0).reset_index()
df_sw.columns = pd.Index(["node", "mode", "date", "MSGREG", "share"])
df_sw.sort_values(["node", "date", "MSGREG", "share"], inplace=True)
df_sw["year"] = pd.DatetimeIndex(df_sw["date"]).year
df_sw["time"] = pd.DatetimeIndex(df_sw["date"]).month
df_sw = df_sw[df_sw["year"].isin(info.Y)]
df_sw.reset_index(drop=True, inplace=True)
return df_sw
[docs]
def add_water_supply(context: "Context") -> dict[str, pd.DataFrame]:
"""Add Water supply infrastructure
This function links the water supply based on different settings and options.
It defines the supply linkages for freshwater, groundwater and salinewater.
Parameters
----------
context : .Context
Returns
-------
data : dict of (str -> pandas.DataFrame)
Keys are MESSAGE parameter names such as 'input', 'fix_cost'.
Values are data frames ready for :meth:`~.Scenario.add_par`.
Years in the data include the model horizon indicated by
``context["water build info"]``, plus the additional year 2010.
"""
# define an empty dictionary
results = {}
# Reference to the water configuration
info = context["water build info"]
# load the scenario from context
# scen = context.get_scenario()
scen = Scenario(context.get_platform(), **context.core.scenario_info)
# year_wat = (2010, 2015)
fut_year = info.Y
year_wat = (2010, 2015, *info.Y)
sub_time = context.time
print(sub_time)
# first activity year for all water technologies is 2020
first_year = scen.firstmodelyear
print(" future year = ", fut_year)
print(" year_wat = ", year_wat)
# reading basin_delineation
FILE = f"basins_by_region_simpl_{context.regions}.csv"
PATH = package_data_path("water", "delineation", FILE)
df_node = pd.read_csv(PATH)
# Assigning proper nomenclature
df_node["node"] = "B" + df_node["BCU_name"].astype(str)
df_node["mode"] = "M" + df_node["BCU_name"].astype(str)
df_node["region"] = (
context.map_ISO_c[context.regions]
if context.type_reg == "country"
else f"{context.regions}_" + df_node["REGION"].astype(str)
)
# Storing the energy MESSAGE region names
node_region = df_node["region"].unique()
# reading groundwater energy intensity data
FILE1 = f"gw_energy_intensity_depth_{context.regions}.csv"
PATH1 = package_data_path("water", "availability", FILE1)
df_gwt = pd.read_csv(PATH1)
df_gwt["region"] = (
context.map_ISO_c[context.regions]
if context.type_reg == "country"
else f"{context.regions}_" + df_gwt["REGION"].astype(str)
)
# reading groundwater energy intensity data
FILE2 = f"historical_new_cap_gw_sw_km3_year_{context.regions}.csv"
PATH2 = package_data_path("water", "availability", FILE2)
df_hist = pd.read_csv(PATH2)
df_hist["BCU_name"] = "B" + df_hist["BCU_name"].astype(str)
if context.nexus_set == "cooling":
# Add output df for surfacewater supply for regions
output_df = (
make_df(
"output",
technology="extract_surfacewater",
value=1,
unit="km3",
year_vtg=year_wat,
year_act=year_wat,
level="water_supply",
commodity="freshwater",
mode="M1",
time="year",
time_dest="year",
time_origin="year",
)
.pipe(broadcast, node_loc=node_region)
.pipe(same_node)
)
# Add output df for groundwater supply for regions
output_df = pd.concat(
[
output_df,
make_df(
"output",
technology="extract_groundwater",
value=1,
unit="km3",
year_vtg=year_wat,
year_act=year_wat,
level="water_supply",
commodity="freshwater",
mode="M1",
time="year",
time_dest="year",
time_origin="year",
)
.pipe(broadcast, node_loc=node_region)
.pipe(same_node),
]
)
# Add output of saline water supply for regions
output_df = pd.concat(
[
output_df,
make_df(
"output",
technology="extract_salinewater",
value=1,
unit="km3",
year_vtg=year_wat,
year_act=year_wat,
level="saline_supply",
commodity="saline_ppl",
mode="M1",
time="year",
time_dest="year",
time_origin="year",
)
.pipe(broadcast, node_loc=node_region)
.pipe(same_node),
]
)
results["output"] = output_df
elif context.nexus_set == "nexus":
# input data frame for slack technology balancing equality with demands
inp = (
make_df(
"input",
technology="return_flow",
value=1,
unit="-",
level="water_avail_basin",
commodity="surfacewater_basin",
mode="M1",
year_vtg=year_wat,
year_act=year_wat,
)
.pipe(
broadcast,
node_loc=df_node["node"],
time=pd.Series(sub_time),
)
.pipe(same_node)
.pipe(same_time)
)
inp = pd.concat(
[
inp,
make_df(
"input",
technology="gw_recharge",
value=1,
unit="-",
level="water_avail_basin",
commodity="groundwater_basin",
mode="M1",
year_vtg=year_wat,
year_act=year_wat,
)
.pipe(
broadcast,
node_loc=df_node["node"],
time=pd.Series(sub_time),
)
.pipe(same_node)
.pipe(same_time),
]
)
# input dataframe linking water supply to energy dummy technology
inp = pd.concat(
[
inp,
make_df(
"input",
technology="basin_to_reg",
value=1,
unit="-",
level="water_supply_basin",
commodity="freshwater_basin",
mode=df_node["mode"],
node_origin=df_node["node"],
node_loc=df_node["region"],
)
.pipe(
broadcast,
year_vtg=year_wat,
time=pd.Series(sub_time),
)
.pipe(same_time),
]
)
inp["year_act"] = inp["year_vtg"]
# # input data frame for slack technology balancing equality with demands
# inp = pd.concat([inp,
# make_df(
# "input",
# technology="salinewater_return",
# value=1,
# unit="-",
# level="water_avail_basin",
# commodity="salinewater_basin",
# mode="M1",
# time="year",
# time_origin="year",
# node_origin=df_node["node"],
# node_loc=df_node["node"],
# ).pipe(broadcast, year_vtg=year_wat, year_act=year_wat)
# ])
# input data frame for freshwater supply
yv_ya_sw = map_yv_ya_lt(year_wat, 50, first_year)
inp = pd.concat(
[
inp,
make_df(
"input",
technology="extract_surfacewater",
value=1,
unit="-",
level="water_avail_basin",
commodity="surfacewater_basin",
mode="M1",
node_origin=df_node["node"],
node_loc=df_node["node"],
)
.pipe(
broadcast,
yv_ya_sw,
time=pd.Series(sub_time),
)
.pipe(same_time),
]
)
# input dataframe for groundwater supply
yv_ya_gw = map_yv_ya_lt(year_wat, 20, first_year)
inp = pd.concat(
[
inp,
make_df(
"input",
technology="extract_groundwater",
value=1,
unit="-",
level="water_avail_basin",
commodity="groundwater_basin",
mode="M1",
node_origin=df_node["node"],
node_loc=df_node["node"],
)
.pipe(
broadcast,
yv_ya_gw,
time=pd.Series(sub_time),
)
.pipe(same_time),
]
)
# electricity input dataframe for extract freshwater supply
# low: 0.001141553, mid: 0.018835616, high: 0.03652968
inp = pd.concat(
[
inp,
make_df(
"input",
technology="extract_surfacewater",
value=0.018835616,
unit="-",
level="final",
commodity="electr",
mode="M1",
time_origin="year",
node_origin=df_node["region"],
node_loc=df_node["node"],
).pipe(
broadcast,
yv_ya_sw,
time=pd.Series(sub_time),
),
]
)
inp = pd.concat(
[
inp,
make_df(
"input",
technology="extract_groundwater",
value=df_gwt["GW_per_km3_per_year"] + 0.043464579,
unit="-",
level="final",
commodity="electr",
mode="M1",
time_origin="year",
node_origin=df_node["region"],
node_loc=df_node["node"],
).pipe(
broadcast,
yv_ya_gw,
time=pd.Series(sub_time),
),
]
)
inp = pd.concat(
[
inp,
make_df(
"input",
technology="extract_gw_fossil",
value=(df_gwt["GW_per_km3_per_year"] + 0.043464579)
* 2, # twice as much normal gw
unit="-",
level="final",
commodity="electr",
mode="M1",
time_origin="year",
node_origin=df_node["region"],
node_loc=df_node["node"],
).pipe(
broadcast,
yv_ya_gw,
time=pd.Series(sub_time),
),
]
)
if context.type_reg == "global":
inp.loc[
(inp["technology"].str.contains("extract_gw_fossil"))
& (inp["year_act"] == 2020)
& (inp["node_loc"] == "R11_SAS"),
"value",
] *= 0.5
results["input"] = inp
# Add output df for freshwater supply for basins
output_df = (
make_df(
"output",
technology="extract_surfacewater",
value=1,
unit="-",
level="water_supply_basin",
commodity="freshwater_basin",
mode="M1",
node_loc=df_node["node"],
node_dest=df_node["node"],
)
.pipe(
broadcast,
yv_ya_sw,
time=pd.Series(sub_time),
)
.pipe(same_time)
)
# Add output df for groundwater supply for basins
output_df = pd.concat(
[
output_df,
make_df(
"output",
technology="extract_groundwater",
value=1,
unit="-",
level="water_supply_basin",
commodity="freshwater_basin",
mode="M1",
node_loc=df_node["node"],
node_dest=df_node["node"],
)
.pipe(
broadcast,
yv_ya_gw,
time=pd.Series(sub_time),
)
.pipe(same_time),
]
)
# Add output df for groundwater supply for basins
output_df = pd.concat(
[
output_df,
make_df(
"output",
technology="extract_gw_fossil",
value=1,
unit="-",
level="water_supply_basin",
commodity="freshwater_basin",
mode="M1",
node_loc=df_node["node"],
node_dest=df_node["node"],
time_origin="year",
)
.pipe(
broadcast,
yv_ya_gw,
time=pd.Series(sub_time),
)
.pipe(same_time),
]
)
# Add output of saline water supply for regions
output_df = pd.concat(
[
output_df,
make_df(
"output",
technology="extract_salinewater",
value=1,
unit="km3",
year_vtg=year_wat,
year_act=year_wat,
level="saline_supply",
commodity="saline_ppl",
mode="M1",
time="year",
time_dest="year",
time_origin="year",
)
.pipe(broadcast, node_loc=node_region)
.pipe(same_node),
]
)
hist_new_cap = make_df(
"historical_new_capacity",
node_loc=df_hist["BCU_name"],
technology="extract_surfacewater",
value=df_hist["hist_cap_sw_km3_year"] / 5, # n period
unit="km3/year",
year_vtg=2015,
)
hist_new_cap = pd.concat(
[
hist_new_cap,
make_df(
"historical_new_capacity",
node_loc=df_hist["BCU_name"],
technology="extract_groundwater",
value=df_hist["hist_cap_gw_km3_year"] / 5,
unit="km3/year",
year_vtg=2015,
),
]
)
results["historical_new_capacity"] = hist_new_cap
# output data frame linking water supply to energy dummy technology
output_df = pd.concat(
[
output_df,
make_df(
"output",
technology="basin_to_reg",
value=1,
unit="-",
level="water_supply",
commodity="freshwater",
time_dest="year",
node_loc=df_node["region"],
node_dest=df_node["region"],
mode=df_node["mode"],
).pipe(broadcast, year_vtg=year_wat, time=pd.Series(sub_time)),
]
)
output_df["year_act"] = output_df["year_vtg"]
results["output"] = output_df
# dummy variable cost for dummy water to energy technology
var = make_df(
"var_cost",
technology="basin_to_reg",
mode=df_node["mode"],
node_loc=df_node["region"],
value=20,
unit="-",
).pipe(broadcast, year_vtg=year_wat, time=pd.Series(sub_time))
var["year_act"] = var["year_vtg"]
# # Dummy cost for extract surface ewater to prioritize water sources
# var = pd.concat([var, make_df(
# "var_cost",
# technology='extract_surfacewater',
# value= 0.0001,
# unit="USD/km3",
# mode="M1",
# time="year",
# ).pipe(broadcast, year_vtg=year_wat,
# year_act=year_wat, node_loc=df_node["node"]
# )
# ])
# # Dummy cost for extract groundwater
# var = pd.concat([var, make_df(
# "var_cost",
# technology='extract_groundwater',
# value= 0.001,
# unit="USD/km3",
# mode="M1",
# time="year",
# ).pipe(broadcast, year_vtg=year_wat,
# year_act=year_wat, node_loc=df_node["node"]
# ])
# )
results["var_cost"] = var
# load the share of sw
df_sw = map_basin_region_wat(context)
share = make_df(
"share_mode_up",
shares="share_basin",
technology="basin_to_reg",
mode=df_sw["mode"],
node_share=df_sw["MSGREG"],
time=df_sw["time"],
value=df_sw["share"],
unit="%",
year_act=df_sw["year"],
)
results["share_mode_up"] = share
tl = (
make_df(
"technical_lifetime",
technology="extract_surfacewater",
value=50,
unit="y",
)
.pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"])
.pipe(same_node)
)
tl = pd.concat(
[
tl,
make_df(
"technical_lifetime",
technology="extract_groundwater",
value=20,
unit="y",
)
.pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"])
.pipe(same_node),
]
)
tl = pd.concat(
[
tl,
make_df(
"technical_lifetime",
technology="extract_gw_fossil",
value=20,
unit="y",
)
.pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"])
.pipe(same_node),
]
)
results["technical_lifetime"] = tl
# Prepare dataframe for investments
inv_cost = make_df(
"inv_cost",
technology="extract_surfacewater",
value=155.57,
unit="USD/km3",
).pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"])
inv_cost = pd.concat(
[
inv_cost,
make_df(
"inv_cost",
technology="extract_groundwater",
value=54.52,
unit="USD/km3",
).pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"]),
]
)
inv_cost = pd.concat(
[
inv_cost,
make_df(
"inv_cost",
technology="extract_gw_fossil",
value=54.52 * 150, # assume higher as normal GW
unit="USD/km3",
).pipe(broadcast, year_vtg=year_wat, node_loc=df_node["node"]),
]
)
results["inv_cost"] = inv_cost
fix_cost = make_df(
"fix_cost",
technology="extract_gw_fossil",
value=300, # assumed
unit="USD/km3",
).pipe(broadcast, yv_ya_gw, node_loc=df_node["node"])
results["fix_cost"] = fix_cost
return results
[docs]
def add_e_flow(context: "Context") -> dict[str, pd.DataFrame]:
"""Add environmental flows
This function bounds the available water and allocates the environmental
flows.Environmental flow bounds are calculated using Variable Monthly Flow
(VMF) method. The VMF method is applied to wet and dry seasonal runoff
values. These wet and dry seasonal values are then aggregated to annual
values.Environmental flows in the model will be incorporated as bounds on
'return_flow' technology. The lower bound on this technology will ensure
that certain amount of water remain
Parameters
----------
context : .Context
Returns
-------
data : dict of (str -> pandas.DataFrame)
Keys are MESSAGE parameter names such as 'input', 'fix_cost'.
Values are data frames ready for :meth:`~.Scenario.add_par`.
Years in the data include the model horizon indicated by
``context["water build info"]``, plus the additional year 2010.
"""
# define an empty dictionary
results = {}
info = context["water build info"]
# Adding freshwater supply constraints
# Reading data, the data is spatially and temprally aggregated from GHMs
df_sw, df_gw = read_water_availability(context)
# reading sample for assiging basins
PATH = package_data_path(
"water", "delineation", f"basins_by_region_simpl_{context.regions}.csv"
)
df_x = pd.read_csv(PATH)
dmd_df = make_df(
"demand",
node="B" + df_sw["Region"].astype(str),
commodity="surfacewater_basin",
level="water_avail_basin",
year=df_sw["year"],
time=df_sw["time"],
value=df_sw["value"],
unit="km3/year",
)
dmd_df = dmd_df[dmd_df["year"] >= 2025].reset_index(drop=True)
dmd_df["value"] = dmd_df["value"].apply(lambda x: x if x >= 0 else 0)
if "year" in context.time:
# Reading data, the data is spatially and temporally aggregated from GHMs
path1 = package_data_path(
"water",
"availability",
f"e-flow_{context.RCP}_{context.regions}.csv",
)
df_env = pd.read_csv(path1)
df_env.drop(["Unnamed: 0"], axis=1, inplace=True)
df_env.index = df_x["BCU_name"].index
df_env = df_env.stack().reset_index()
df_env.columns = pd.Index(["Region", "years", "value"])
df_env.sort_values(["Region", "years", "value"], inplace=True)
df_env.fillna(0, inplace=True)
df_env.reset_index(drop=True, inplace=True)
df_env["year"] = pd.DatetimeIndex(df_env["years"]).year
df_env["time"] = "year"
df_env2210 = df_env[df_env["year"] == 2100].copy()
df_env2210.loc["year"] = 2110
df_env = pd.concat([df_env, df_env2210])
df_env = df_env[df_env["year"].isin(info.Y)]
else:
# Reading data, the data is spatially and temporally aggregated from GHMs
path1 = package_data_path(
"water",
"availability",
f"e-flow_5y_m_{context.RCP}_{context.regions}.csv",
)
df_env = pd.read_csv(path1)
df_env.drop(["Unnamed: 0"], axis=1, inplace=True)
# new_cols = pd.to_datetime(df_env.columns, format="%Y/%m/%d")
# df_env.columns = new_cols
df_env.index = df_x["BCU_name"].index
df_env = df_env.stack().reset_index()
df_env.columns = pd.Index(["Region", "years", "value"])
df_env.sort_values(["Region", "years", "value"], inplace=True)
df_env.fillna(0, inplace=True)
df_env.reset_index(drop=True, inplace=True)
df_env["year"] = pd.DatetimeIndex(df_env["years"]).year
df_env["time"] = pd.DatetimeIndex(df_env["years"]).month
df_env2210 = df_env[df_env["year"] == 2100].copy()
df_env2210.loc["year"] = 2110
df_env = pd.concat([df_env, df_env2210])
df_env = df_env[df_env["year"].isin(info.Y)]
# Return a processed dataframe for env flow calculations
if context.SDG != "baseline":
# dataframe to put constraints on env flows
eflow_df = make_df(
"bound_activity_lo",
node_loc="B" + df_env["Region"].astype(str),
technology="return_flow",
year_act=df_env["year"],
mode="M1",
time=df_env["time"],
value=df_env["value"],
unit="km3/year",
)
eflow_df["value"] = eflow_df["value"].apply(lambda x: x if x >= 0 else 0)
eflow_df = eflow_df[eflow_df["year_act"] >= 2025].reset_index(drop=True)
dmd_df.sort_values(by=["node", "year"], inplace=True)
dmd_df.reset_index(drop=True, inplace=True)
eflow_df.sort_values(by=["node_loc", "year_act"], inplace=True)
eflow_df.reset_index(drop=True, inplace=True)
eflow_df["value"] = np.where(
eflow_df["value"] >= 0.7 * dmd_df["value"],
0.7 * dmd_df["value"],
eflow_df["value"],
)
results["bound_activity_lo"] = eflow_df
return results