Shortest Paths¶

Definition

Hypergraph shortest paths measure distance via hyperedges; order/size can change step costs.

You will learn

Compute hypergraph shortest paths and analyze how order affects distance.

Overview¶

Compute shortest paths in hypergraphs and inspect path statistics.
Compare order/size effects on path structure.

Setup¶

[ ]:

import matplotlib as mpl

mpl.rcParams.update({
    "figure.figsize": (6, 4),
    "figure.dpi": 120,
    "savefig.dpi": 150,
})

[ ]:

import pickle as pk
import numpy as np
import pandas as pd

fname = ""
shortest_paths_ho_min = pk.load(open(fname, 'rb'))

_REDS = {}

for i, dicto in shortest_paths_ho_min.items():

    reds = {}
    for j, spl in dicto.items():
        if i==j:
            continue
        # print(i, j, spl)
        # print(spl['redundancies'])
        reds[j] = spl['redundancies']
        l = len(spl['redundancies'])
        if l>0:
            if l not in _REDS:
                _REDS[l] = [spl['redundancies']]
            else:
                _REDS[l].append(spl['redundancies'])

REDS = {}
for k, v in _REDS.items():
    REDS[k] = {'mean': np.array(v).mean(axis=1).mean(axis=0) if np.array(v).ndim > 1 else np.array(v).mean(axis=0),
            'std': np.array(v).mean(axis=1).std(axis=0) if np.array(v).ndim > 1 else np.array(v).std(axis=0)
            }

df_redund = pd.DataFrame(REDS).T.reset_index()
df_redund.rename(columns={'index': 'spl'}, inplace=True)

# print(df_redund.mean()[['mean', 'std']])

df = pd.concat([df, pd.DataFrame(df_redund.mean()[['mean', 'std']]).T.rename(index={0:ds})])

AVGS = {}
PERC_PATHS_WITH_NO_REDUNDANCIES = {}
PERC_OF_PATH_WITH_REDUNDANCY = {}
for k, v in _REDS.items():
    # average number of redundant steps per path length
    AVGS[k] = np.array(v).sum(axis=1).mean()
    # percentage of paths that have no redundant steps
    percentagees = pd.Series(np.array(v).sum(axis=1)).value_counts(normalize=True)
    if 0 in percentagees.index:
        PERC_PATHS_WITH_NO_REDUNDANCIES[k] = percentagees[0]
    else:
        PERC_PATHS_WITH_NO_REDUNDANCIES[k] = 0

    # (np.array(arr)!=0).sum() / len(arr)


fname = f"{root}/HODATASETS/{ds}/paths_static/" + "{f}.pck"
# THIS ONLY EXTRACTS THE LAST FEATURE OF THE SHORTEST PATHS DICT WHICH IS IN FACT STANDARD TO ALL 3 OPTIONS
pk.dump(AVGS, open(fname.format(f='avg_num_redundancies_per_path_len'), 'wb'))
pk.dump(PERC_PATHS_WITH_NO_REDUNDANCIES, open(fname.format(f='percentage_of_paths_with_no_redundancies'), 'wb'))

# # A MORE PROCESSED VERSION FOR PLOTTING
# # pk.dump(REDS, open(fname.format(f='avg_redundancies_per_length'), 'wb'))
print(f"{ds}: Done")

[ ]:

# USING CALC_PROP_OF_EACH_PATH_IS_DYAD

datasets_to_plot = ['InVS15', 'SFHH', 'LH10', 'Thiers13', 'Highschool', 'Workplace', 'Hospital', 'Copenhagen']
import pickle as pk
from hypergraphx.measures.shortest_paths import calc_prop_of_each_path_is_dyad


root = "/Users/bln1/Documents/phd/Y1/shortest-paths"
ds = 'Copenhagen'


temp_df_for_props = {}

for ds in (sorted(datasets_to_plot)):
    if ds == 'InVS15':
        continue
    print(f"{ds.upper()}", end='\r')

    all_sp_max = pk.load(open(f"{root}/HODATASETS/{ds}/paths_static/all_shortest_paths_ho_max.pck", 'rb'))
    all_sp_mean = pk.load(open(f"{root}/HODATASETS/{ds}/paths_static/all_shortest_paths_ho_mean.pck", 'rb'))
    all_sp_min = pk.load(open(f"{root}/HODATASETS/{ds}/paths_static/all_shortest_paths_ho_min.pck", 'rb'))
    splho = pk.load(open(f"{root}/HODATASETS/{ds}/paths_static/spl_ho.pck", 'rb'))


    prop_dy_max = calc_prop_of_each_path_is_dyad(shortest_paths_ho=all_sp_max, SPL_ho=splho)
    prop_dy_mean = calc_prop_of_each_path_is_dyad(shortest_paths_ho=all_sp_mean, SPL_ho=splho)
    prop_dy_min = calc_prop_of_each_path_is_dyad(shortest_paths_ho=all_sp_min, SPL_ho=splho)

    temp = prop_dy_min.set_index('spl').join(prop_dy_mean.set_index('spl'), on='spl', lsuffix='_min', rsuffix='_mean').join(prop_dy_max.set_index('spl'), on='spl', rsuffix='_max')
    temp.rename(columns={'pd_min': 'proportion_min', 'pd_mean': 'proportion_mean', 'pd': 'proportion_max'}, inplace=True)
    temp.reset_index(inplace=True)

    temp_df_for_props[ds] = temp



temp.plot(kind='bar', x='spl', y=['proportion_min', 'proportion_mean', 'proportion_max'], color=['seagreen', 'orange', 'blue'],
          width=0.8,
          title=f'Dyadic proportion of paths for {ds}', legend=True, alpha=0.8)

[ ]:

import pickle as pk
import os
from hypergraphx.measures.temporal.temporal_shortest_paths import embed_time_series_for_dataset
from hypergraphx.measures.temporal.temporal_shortest_paths import calc_shortest_fastest_paths_temporal_hypergraphs
import pandas as pd
from matplotlib.ticker import MaxNLocator

import matplotlib.pyplot as plt
import numpy as np


verbose = True
root = None
dsdir = None
dataset = None

[ ]:

# 1. always FIRST embed statically

# Load Time series
if verbose: print("1. LOAD TIME SERIES")
TS = pk.load(open(f'{root}/processed/TN_{dataset}.pck', 'rb'))

static_node_to_integer_lbl_map, integer_lbl_to_static_node_map, V, Vt0 = embed_time_series_for_dataset(TS=TS, dataset=dataset, root=root, verbose=verbose)

[ ]:

# 2. For each regime, and each hyperedge selection strategy, run the full pipeline

for regime in ['HO', 'DY', 'HOonly']:
    for option in ['min', 'max', 'mean']:
        if verbose: print(f"\n\n\nREGIME: {regime}, STRATEGY: {option.upper()}\n")

        outputs = calc_shortest_fastest_paths_temporal_hypergraphs(root=root,
                                                        dataset_name=dataset,
                                                        verbose=verbose,
                                                        option=option,
                                                        regime=regime)
        PATH_LEN_ARRAY_regime_option = outputs[0]
        AVG_ORD_ARRAY_regime_option = outputs[1]
        PATH_TIME_ARRAY_regime_option = outputs[2]
        DYADIC_PROP_ARRAY_regime_option = outputs[3]
        BEST_TIME_ARRAY_regime_option = outputs[4]

[ ]:

# EXAMPLE of PLOTTING


datasets_to_plot = None         #insert here a list
root = "your/root/directory"  #insert here your root directory

fastest_or_shortest_flag = 'F' #F for fastest, S for shortest

nrows = 4
ncols = 6

fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*5, 5*nrows))
axes = axes.ravel()

AVG_ORD_ALL_OPTIONS = {}

for ds, ax in zip(datasets_to_plot, axes):
    ax.set_title(f"{ds.upper()}", fontsize=18)

    PL_HO_all = pk.load(open(f"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}PL_HO_mean.pck", 'rb'))
    SAO_mean = pk.load(open(f"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}AO_HO_mean.pck", 'rb'))
    SAO_max = pk.load(open(f"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}AO_HO_max.pck", 'rb'))
    SAO_min = pk.load(open(f"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}AO_HO_min.pck", 'rb'))

    # pd.Series
    avg_ord_all_options_df = pd.DataFrame(np.vstack([
                            PL_HO_all.ravel() ,
                            SAO_min.ravel(),
                            SAO_mean.ravel() ,
                            SAO_max.ravel(),
            ]),
                index=['path_len', 'avg_ord_min', 'avg_ord_mean', 'avg_ord_max']).dropna(axis=1)


    # avg_ord_all_options_df = avg_ord_all_options_df.T[['path_len_mean', 'avg_ord_mean', 'avg_ord_max', 'avg_ord_min']].rename(columns={'path_len_mean':'path_len'})

    avg_ord_all_options_df = avg_ord_all_options_df.T
    avg_ord_all_options_df['path_len'] = avg_ord_all_options_df['path_len'].astype(int)

    AVG_ORD_ALL_OPTIONS[ds] = dict(avg_ord_all_options_df.mean(axis=0)[['avg_ord_min', 'avg_ord_mean', 'avg_ord_max']].round(2))

    avg_ord_all_options_df = avg_ord_all_options_df.groupby('path_len').agg({'avg_ord_min': ['mean', 'std'],
                                                    'avg_ord_mean': ['mean', 'std'],
                                                    'avg_ord_max': ['mean', 'std'],
                                                    }).round(3)

    avg_ord_all_options_df.reset_index(inplace=True)
    avg_ord_all_options_df.columns = ['path_len', 'avg_ord_min', 'avg_ord_min_std', 'avg_ord_mean', 'avg_ord_mean_std', 'avg_ord_max', 'avg_ord_max_std']

    avg_ord_all_options_df['path_len'] = avg_ord_all_options_df['path_len'].astype(int)


    avg_ord_all_options_df.plot(kind='line', style='--o', x='path_len', y=['avg_ord_min', 'avg_ord_mean', 'avg_ord_max'],
                                color=['seagreen', 'orange', 'blue'],
                                label=['Min', 'Mean', 'Max'],
                                xlabel='Path length', ylabel='',
                                ax=ax)


    for option, col in zip(['min', 'mean', 'max'], ['seagreen', 'orange', 'blue']):
        ax.fill_between(avg_ord_all_options_df['path_len'],
                        avg_ord_all_options_df[f'avg_ord_{option}'] - avg_ord_all_options_df[f'avg_ord_{option}_std'],
                        avg_ord_all_options_df[f'avg_ord_{option}'] + avg_ord_all_options_df[f'avg_ord_{option}_std'],
                        color=col, alpha=0.1)



plt.suptitle(f"Average order for {'fastest' if fastest_or_shortest_flag=='F' else 'shortest'} paths", fontsize=24)

plt.tight_layout(pad=2.5)

[ ]:

# PLOTTING REDUNDANCY INFO

figheight = 5
regime='HO'

df = pd.DataFrame()

DATAROOT = "/Users/bln1/Documents/phd/Y1/shortest-paths/HODATASETS"


for ds in datasets_to_plot:
    dsdir = f"{root}/HODATASETS/{ds}"
    file_Fname= f"{dsdir}/paths_temporal/redundancy_F_{ds}_{regime}.pck"

    if not os.path.isfile(file_Fname):
        print("File not found: " + file_Fname)
        continue
    red_info_F = pk.load(open(file_Fname, 'rb'))

    file_Sname= f"{dsdir}/paths_temporal/redundancy_S_{ds}_{regime}.pck"
    assert os.path.isfile(file_Sname), "The fastest path file was found, but not the shortest path file. Something is wrong."
    red_info_S = pk.load(open(file_Sname, 'rb'))


    fig, axes = plt.subplots(1, 2, figsize=(figheight*2.2, figheight*0.75), dpi=100)
    ax = axes[0]
    red_info_F.index = red_info_F.index.astype(int)
    red_info_F.plot(kind='line', style='--o', y='avg_num_redund_F_mean',
                    ax=ax,
                    label='Fastest',
                    title='Average number of redundancies',
                    legend=False,
                    markersize=3)
    ax.fill_between(red_info_F.index.astype(int),
                    red_info_F['avg_num_redund_F_mean'] - red_info_F['avg_num_redund_F_std'],
                    red_info_F['avg_num_redund_F_mean'] + red_info_F['avg_num_redund_F_std'],
                    color='b',
                    alpha=0.1,
                    )


    axes[0].set_xlabel(r"$\ell$")
    ax.set_ylim(0)

    ax = axes[1]


    df = red_info_F['perc_of_path_is_redund_F_mean']
    xvals = df.index.astype(int)
    df.index = df.index.astype(int)
    df.plot(
            kind='bar',
            color=['royalblue'],  #, 'seagreen'
            alpha=0.8,
            title='% Steps with a redundancy present',
            ax=axes[1], legend=False)
    axes[0].xaxis.set_major_locator(MaxNLocator(integer=True))
    axes[1].set_xlabel(r"$\ell$")


    fig.tight_layout(pad=1.0)