"""
Calculate distances between pairs of ports
"""
import math
import os
from itertools import combinations
import pandas as pd
from scgraph.geographs.marnet import marnet_geograph
from message_ix_models.util import package_data_path
[docs]
def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees) using the Haversine formula.
Args:
lat1: Latitude of the first point
lon1: Longitude of the first point
lat2: Latitude of the second point
lon2: Longitude of the second point
Outputs:
Distance in kilometers
"""
# Convert decimal degrees to radians
lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
# Haversine formula
dlat = lat2 - lat1
dlon = lon2 - lon1
a = (
math.sin(dlat / 2) ** 2
+ math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
)
c = 2 * math.asin(math.sqrt(a))
# Radius of earth in kilometers
r = 6371
return c * r
[docs]
def calculate_port_distances(df: pd.DataFrame) -> pd.DataFrame:
"""
Read CSV file with port data and calculate distances between all port combinations.
Args:
df: DataFrame containing Port, Latitude, Longitude columns
Outputs:
DataFrame with columns 'Port1', 'Port2', 'Distance_km'
"""
# Check if required columns exist
required_columns = ["Port", "Latitude", "Longitude"]
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
raise ValueError(f"Missing required columns: {missing_columns}")
# Remove rows with missing coordinates
ports_clean = df.dropna(subset=["Latitude", "Longitude"])
if ports_clean.empty:
raise ValueError("No valid coordinate data found in the file")
# Get all combinations of ports (without repetition)
port_combinations = list(combinations(ports_clean.index, 2))
print(f"Calculating distances for {len(port_combinations)} port pairs...")
# Calculate distances between all port combinations
distances = []
for i, j in port_combinations:
port1 = ports_clean.iloc[i]
port2 = ports_clean.iloc[j]
distance = marnet_geograph.get_shortest_path(
origin_node={
"latitude": port1["Latitude"],
"longitude": port1["Longitude"],
},
destination_node={
"latitude": port2["Latitude"],
"longitude": port2["Longitude"],
},
)
distances.append(
{
"Port1": port1["Port"],
"Port2": port2["Port"],
"Distance_km": round(distance["length"], 2),
}
)
# Create DataFrame with results
outdf1 = pd.DataFrame(distances)
# Concatenate other direction too
outdf2 = outdf1.copy()
outdf2 = outdf2.rename(columns={"Port1": "Port2", "Port2": "Port1"})
outdf = pd.concat([outdf1, outdf2])
return outdf
[docs]
def calculate_distance(regional_specification: str = "R12"):
"""
Run distance calculation.
Args:
regional_specification: MESSAGE regional specification (e.g., "R12")
Outputs:
CSV file in data/bilateralize/distances/ that includes
distances for regional specification
"""
# Specify the path to CSV file
csv_path = os.path.abspath(
os.path.join(
os.path.dirname(package_data_path("bilateralize")),
"bilateralize",
"distances",
)
)
infile = pd.read_excel(
os.path.join(csv_path, "distances.xlsx"), sheet_name="node_ports"
)
infile = infile[infile["Regionalization"] == regional_specification]
# Calculate distances
df = calculate_port_distances(infile)
# Add regions back
for i in ["1", "2"]:
df = df.merge(
infile[["Node", "Port"]], left_on="Port" + i, right_on="Port", how="left"
)
df = df.rename(columns={"Node": "Node" + i})
df = df[["Node1", "Port1", "Node2", "Port2", "Distance_km"]]
df.to_csv(
os.path.join(csv_path, regional_specification + "_distances.csv"), index=False
)