Shortest Pathsยถ
Definition
Hypergraph shortest paths measure distance via hyperedges; order/size can change step costs.
You will learn
Compute hypergraph shortest paths and analyze how order affects distance.
Overviewยถ
Compute shortest paths in hypergraphs and inspect path statistics.
Compare order/size effects on path structure.
Setupยถ
[ ]:
import matplotlib as mpl
mpl.rcParams.update({
"figure.figsize": (6, 4),
"figure.dpi": 120,
"savefig.dpi": 150,
})
[ ]:
import pickle as pk
import numpy as np
import pandas as pd
fname = ""
shortest_paths_ho_min = pk.load(open(fname, 'rb'))
_REDS = {}
for i, dicto in shortest_paths_ho_min.items():
reds = {}
for j, spl in dicto.items():
if i==j:
continue
# print(i, j, spl)
# print(spl['redundancies'])
reds[j] = spl['redundancies']
l = len(spl['redundancies'])
if l>0:
if l not in _REDS:
_REDS[l] = [spl['redundancies']]
else:
_REDS[l].append(spl['redundancies'])
REDS = {}
for k, v in _REDS.items():
REDS[k] = {'mean': np.array(v).mean(axis=1).mean(axis=0) if np.array(v).ndim > 1 else np.array(v).mean(axis=0),
'std': np.array(v).mean(axis=1).std(axis=0) if np.array(v).ndim > 1 else np.array(v).std(axis=0)
}
df_redund = pd.DataFrame(REDS).T.reset_index()
df_redund.rename(columns={'index': 'spl'}, inplace=True)
# print(df_redund.mean()[['mean', 'std']])
df = pd.concat([df, pd.DataFrame(df_redund.mean()[['mean', 'std']]).T.rename(index={0:ds})])
AVGS = {}
PERC_PATHS_WITH_NO_REDUNDANCIES = {}
PERC_OF_PATH_WITH_REDUNDANCY = {}
for k, v in _REDS.items():
# average number of redundant steps per path length
AVGS[k] = np.array(v).sum(axis=1).mean()
# percentage of paths that have no redundant steps
percentagees = pd.Series(np.array(v).sum(axis=1)).value_counts(normalize=True)
if 0 in percentagees.index:
PERC_PATHS_WITH_NO_REDUNDANCIES[k] = percentagees[0]
else:
PERC_PATHS_WITH_NO_REDUNDANCIES[k] = 0
# (np.array(arr)!=0).sum() / len(arr)
fname = f"{root}/HODATASETS/{ds}/paths_static/" + "{f}.pck"
# THIS ONLY EXTRACTS THE LAST FEATURE OF THE SHORTEST PATHS DICT WHICH IS IN FACT STANDARD TO ALL 3 OPTIONS
pk.dump(AVGS, open(fname.format(f='avg_num_redundancies_per_path_len'), 'wb'))
pk.dump(PERC_PATHS_WITH_NO_REDUNDANCIES, open(fname.format(f='percentage_of_paths_with_no_redundancies'), 'wb'))
# # A MORE PROCESSED VERSION FOR PLOTTING
# # pk.dump(REDS, open(fname.format(f='avg_redundancies_per_length'), 'wb'))
print(f"{ds}: Done")
[ ]:
# USING CALC_PROP_OF_EACH_PATH_IS_DYAD
datasets_to_plot = ['InVS15', 'SFHH', 'LH10', 'Thiers13', 'Highschool', 'Workplace', 'Hospital', 'Copenhagen']
import pickle as pk
from hypergraphx.measures.shortest_paths import calc_prop_of_each_path_is_dyad
root = "/Users/bln1/Documents/phd/Y1/shortest-paths"
ds = 'Copenhagen'
temp_df_for_props = {}
for ds in (sorted(datasets_to_plot)):
if ds == 'InVS15':
continue
print(f"{ds.upper()}", end='\r')
all_sp_max = pk.load(open(f"{root}/HODATASETS/{ds}/paths_static/all_shortest_paths_ho_max.pck", 'rb'))
all_sp_mean = pk.load(open(f"{root}/HODATASETS/{ds}/paths_static/all_shortest_paths_ho_mean.pck", 'rb'))
all_sp_min = pk.load(open(f"{root}/HODATASETS/{ds}/paths_static/all_shortest_paths_ho_min.pck", 'rb'))
splho = pk.load(open(f"{root}/HODATASETS/{ds}/paths_static/spl_ho.pck", 'rb'))
prop_dy_max = calc_prop_of_each_path_is_dyad(shortest_paths_ho=all_sp_max, SPL_ho=splho)
prop_dy_mean = calc_prop_of_each_path_is_dyad(shortest_paths_ho=all_sp_mean, SPL_ho=splho)
prop_dy_min = calc_prop_of_each_path_is_dyad(shortest_paths_ho=all_sp_min, SPL_ho=splho)
temp = prop_dy_min.set_index('spl').join(prop_dy_mean.set_index('spl'), on='spl', lsuffix='_min', rsuffix='_mean').join(prop_dy_max.set_index('spl'), on='spl', rsuffix='_max')
temp.rename(columns={'pd_min': 'proportion_min', 'pd_mean': 'proportion_mean', 'pd': 'proportion_max'}, inplace=True)
temp.reset_index(inplace=True)
temp_df_for_props[ds] = temp
temp.plot(kind='bar', x='spl', y=['proportion_min', 'proportion_mean', 'proportion_max'], color=['seagreen', 'orange', 'blue'],
width=0.8,
title=f'Dyadic proportion of paths for {ds}', legend=True, alpha=0.8)
[ ]:
import pickle as pk
import os
from hypergraphx.measures.temporal.temporal_shortest_paths import embed_time_series_for_dataset
from hypergraphx.measures.temporal.temporal_shortest_paths import calc_shortest_fastest_paths_temporal_hypergraphs
import pandas as pd
from matplotlib.ticker import MaxNLocator
import matplotlib.pyplot as plt
import numpy as np
verbose = True
root = None
dsdir = None
dataset = None
[ ]:
# 1. always FIRST embed statically
# Load Time series
if verbose: print("1. LOAD TIME SERIES")
TS = pk.load(open(f'{root}/processed/TN_{dataset}.pck', 'rb'))
static_node_to_integer_lbl_map, integer_lbl_to_static_node_map, V, Vt0 = embed_time_series_for_dataset(TS=TS, dataset=dataset, root=root, verbose=verbose)
[ ]:
# 2. For each regime, and each hyperedge selection strategy, run the full pipeline
for regime in ['HO', 'DY', 'HOonly']:
for option in ['min', 'max', 'mean']:
if verbose: print(f"\n\n\nREGIME: {regime}, STRATEGY: {option.upper()}\n")
outputs = calc_shortest_fastest_paths_temporal_hypergraphs(root=root,
dataset_name=dataset,
verbose=verbose,
option=option,
regime=regime)
PATH_LEN_ARRAY_regime_option = outputs[0]
AVG_ORD_ARRAY_regime_option = outputs[1]
PATH_TIME_ARRAY_regime_option = outputs[2]
DYADIC_PROP_ARRAY_regime_option = outputs[3]
BEST_TIME_ARRAY_regime_option = outputs[4]
[ ]:
# EXAMPLE of PLOTTING
datasets_to_plot = None #insert here a list
root = "your/root/directory" #insert here your root directory
fastest_or_shortest_flag = 'F' #F for fastest, S for shortest
nrows = 4
ncols = 6
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*5, 5*nrows))
axes = axes.ravel()
AVG_ORD_ALL_OPTIONS = {}
for ds, ax in zip(datasets_to_plot, axes):
ax.set_title(f"{ds.upper()}", fontsize=18)
PL_HO_all = pk.load(open(f"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}PL_HO_mean.pck", 'rb'))
SAO_mean = pk.load(open(f"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}AO_HO_mean.pck", 'rb'))
SAO_max = pk.load(open(f"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}AO_HO_max.pck", 'rb'))
SAO_min = pk.load(open(f"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}AO_HO_min.pck", 'rb'))
# pd.Series
avg_ord_all_options_df = pd.DataFrame(np.vstack([
PL_HO_all.ravel() ,
SAO_min.ravel(),
SAO_mean.ravel() ,
SAO_max.ravel(),
]),
index=['path_len', 'avg_ord_min', 'avg_ord_mean', 'avg_ord_max']).dropna(axis=1)
# avg_ord_all_options_df = avg_ord_all_options_df.T[['path_len_mean', 'avg_ord_mean', 'avg_ord_max', 'avg_ord_min']].rename(columns={'path_len_mean':'path_len'})
avg_ord_all_options_df = avg_ord_all_options_df.T
avg_ord_all_options_df['path_len'] = avg_ord_all_options_df['path_len'].astype(int)
AVG_ORD_ALL_OPTIONS[ds] = dict(avg_ord_all_options_df.mean(axis=0)[['avg_ord_min', 'avg_ord_mean', 'avg_ord_max']].round(2))
avg_ord_all_options_df = avg_ord_all_options_df.groupby('path_len').agg({'avg_ord_min': ['mean', 'std'],
'avg_ord_mean': ['mean', 'std'],
'avg_ord_max': ['mean', 'std'],
}).round(3)
avg_ord_all_options_df.reset_index(inplace=True)
avg_ord_all_options_df.columns = ['path_len', 'avg_ord_min', 'avg_ord_min_std', 'avg_ord_mean', 'avg_ord_mean_std', 'avg_ord_max', 'avg_ord_max_std']
avg_ord_all_options_df['path_len'] = avg_ord_all_options_df['path_len'].astype(int)
avg_ord_all_options_df.plot(kind='line', style='--o', x='path_len', y=['avg_ord_min', 'avg_ord_mean', 'avg_ord_max'],
color=['seagreen', 'orange', 'blue'],
label=['Min', 'Mean', 'Max'],
xlabel='Path length', ylabel='',
ax=ax)
for option, col in zip(['min', 'mean', 'max'], ['seagreen', 'orange', 'blue']):
ax.fill_between(avg_ord_all_options_df['path_len'],
avg_ord_all_options_df[f'avg_ord_{option}'] - avg_ord_all_options_df[f'avg_ord_{option}_std'],
avg_ord_all_options_df[f'avg_ord_{option}'] + avg_ord_all_options_df[f'avg_ord_{option}_std'],
color=col, alpha=0.1)
plt.suptitle(f"Average order for {'fastest' if fastest_or_shortest_flag=='F' else 'shortest'} paths", fontsize=24)
plt.tight_layout(pad=2.5)
[ ]:
# PLOTTING REDUNDANCY INFO
figheight = 5
regime='HO'
df = pd.DataFrame()
DATAROOT = "/Users/bln1/Documents/phd/Y1/shortest-paths/HODATASETS"
for ds in datasets_to_plot:
dsdir = f"{root}/HODATASETS/{ds}"
file_Fname= f"{dsdir}/paths_temporal/redundancy_F_{ds}_{regime}.pck"
if not os.path.isfile(file_Fname):
print("File not found: " + file_Fname)
continue
red_info_F = pk.load(open(file_Fname, 'rb'))
file_Sname= f"{dsdir}/paths_temporal/redundancy_S_{ds}_{regime}.pck"
assert os.path.isfile(file_Sname), "The fastest path file was found, but not the shortest path file. Something is wrong."
red_info_S = pk.load(open(file_Sname, 'rb'))
fig, axes = plt.subplots(1, 2, figsize=(figheight*2.2, figheight*0.75), dpi=100)
ax = axes[0]
red_info_F.index = red_info_F.index.astype(int)
red_info_F.plot(kind='line', style='--o', y='avg_num_redund_F_mean',
ax=ax,
label='Fastest',
title='Average number of redundancies',
legend=False,
markersize=3)
ax.fill_between(red_info_F.index.astype(int),
red_info_F['avg_num_redund_F_mean'] - red_info_F['avg_num_redund_F_std'],
red_info_F['avg_num_redund_F_mean'] + red_info_F['avg_num_redund_F_std'],
color='b',
alpha=0.1,
)
axes[0].set_xlabel(r"$\ell$")
ax.set_ylim(0)
ax = axes[1]
df = red_info_F['perc_of_path_is_redund_F_mean']
xvals = df.index.astype(int)
df.index = df.index.astype(int)
df.plot(
kind='bar',
color=['royalblue'], #, 'seagreen'
alpha=0.8,
title='% Steps with a redundancy present',
ax=axes[1], legend=False)
axes[0].xaxis.set_major_locator(MaxNLocator(integer=True))
axes[1].set_xlabel(r"$\ell$")
fig.tight_layout(pad=1.0)