Source code for message_ix_models.tools.bilateralize.calculate_distance

"""
Calculate distances between pairs of ports
"""

import math
import os
from itertools import combinations

import pandas as pd
from scgraph.geographs.marnet import marnet_geograph

from message_ix_models.util import package_data_path


[docs] def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float: """ Calculate the great circle distance between two points on the earth (specified in decimal degrees) using the Haversine formula. Args: lat1: Latitude of the first point lon1: Longitude of the first point lat2: Latitude of the second point lon2: Longitude of the second point Outputs: Distance in kilometers """ # Convert decimal degrees to radians lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2]) # Haversine formula dlat = lat2 - lat1 dlon = lon2 - lon1 a = ( math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2 ) c = 2 * math.asin(math.sqrt(a)) # Radius of earth in kilometers r = 6371 return c * r
[docs] def calculate_port_distances(df: pd.DataFrame) -> pd.DataFrame: """ Read CSV file with port data and calculate distances between all port combinations. Args: df: DataFrame containing Port, Latitude, Longitude columns Outputs: DataFrame with columns 'Port1', 'Port2', 'Distance_km' """ # Check if required columns exist required_columns = ["Port", "Latitude", "Longitude"] missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: raise ValueError(f"Missing required columns: {missing_columns}") # Remove rows with missing coordinates ports_clean = df.dropna(subset=["Latitude", "Longitude"]) if ports_clean.empty: raise ValueError("No valid coordinate data found in the file") # Get all combinations of ports (without repetition) port_combinations = list(combinations(ports_clean.index, 2)) print(f"Calculating distances for {len(port_combinations)} port pairs...") # Calculate distances between all port combinations distances = [] for i, j in port_combinations: port1 = ports_clean.iloc[i] port2 = ports_clean.iloc[j] distance = marnet_geograph.get_shortest_path( origin_node={ "latitude": port1["Latitude"], "longitude": port1["Longitude"], }, destination_node={ "latitude": port2["Latitude"], "longitude": port2["Longitude"], }, ) distances.append( { "Port1": port1["Port"], "Port2": port2["Port"], "Distance_km": round(distance["length"], 2), } ) # Create DataFrame with results outdf1 = pd.DataFrame(distances) # Concatenate other direction too outdf2 = outdf1.copy() outdf2 = outdf2.rename(columns={"Port1": "Port2", "Port2": "Port1"}) outdf = pd.concat([outdf1, outdf2]) return outdf
[docs] def calculate_distance(regional_specification: str = "R12"): """ Run distance calculation. Args: regional_specification: MESSAGE regional specification (e.g., "R12") Outputs: CSV file in data/bilateralize/distances/ that includes distances for regional specification """ # Specify the path to CSV file csv_path = os.path.abspath( os.path.join( os.path.dirname(package_data_path("bilateralize")), "bilateralize", "distances", ) ) infile = pd.read_excel( os.path.join(csv_path, "distances.xlsx"), sheet_name="node_ports" ) infile = infile[infile["Regionalization"] == regional_specification] # Calculate distances df = calculate_port_distances(infile) # Add regions back for i in ["1", "2"]: df = df.merge( infile[["Node", "Port"]], left_on="Port" + i, right_on="Port", how="left" ) df = df.rename(columns={"Node": "Node" + i}) df = df[["Node1", "Port1", "Node2", "Port2", "Distance_km"]] df.to_csv( os.path.join(csv_path, regional_specification + "_distances.csv"), index=False )