{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Shortest Paths\n" ], "id": "6abf659d" }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ ".. admonition:: Definition\n", "\n", " Hypergraph shortest paths measure distance via hyperedges; order/size can change step costs.\n" ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ ".. admonition:: You will learn\n", "\n", " Compute hypergraph shortest paths and analyze how order affects distance.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Overview\n", "\n", "- Compute shortest paths in hypergraphs and inspect path statistics.\n", "- Compare order/size effects on path structure.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup\n" ] }, { "cell_type": "code", "metadata": {}, "execution_count": null, "outputs": [], "source": [ "import matplotlib as mpl\n", "\n", "mpl.rcParams.update({\n", " \"figure.figsize\": (6, 4),\n", " \"figure.dpi\": 120,\n", " \"savefig.dpi\": 150,\n", "})\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pickle as pk\n", "import numpy as np\n", "import pandas as pd\n", "\n", "fname = \"\"\n", "shortest_paths_ho_min = pk.load(open(fname, 'rb'))\n", "\n", "_REDS = {}\n", "\n", "for i, dicto in shortest_paths_ho_min.items():\n", "\n", " reds = {}\n", " for j, spl in dicto.items():\n", " if i==j:\n", " continue\n", " # print(i, j, spl)\n", " # print(spl['redundancies'])\n", " reds[j] = spl['redundancies']\n", " l = len(spl['redundancies'])\n", " if l>0:\n", " if l not in _REDS:\n", " _REDS[l] = [spl['redundancies']]\n", " else:\n", " _REDS[l].append(spl['redundancies'])\n", "\n", "REDS = {}\n", "for k, v in _REDS.items():\n", " REDS[k] = {'mean': np.array(v).mean(axis=1).mean(axis=0) if np.array(v).ndim > 1 else np.array(v).mean(axis=0),\n", " 'std': np.array(v).mean(axis=1).std(axis=0) if np.array(v).ndim > 1 else np.array(v).std(axis=0)\n", " }\n", "\n", "df_redund = pd.DataFrame(REDS).T.reset_index()\n", "df_redund.rename(columns={'index': 'spl'}, inplace=True)\n", "\n", "# print(df_redund.mean()[['mean', 'std']])\n", "\n", "df = pd.concat([df, pd.DataFrame(df_redund.mean()[['mean', 'std']]).T.rename(index={0:ds})])\n", "\n", "AVGS = {}\n", "PERC_PATHS_WITH_NO_REDUNDANCIES = {}\n", "PERC_OF_PATH_WITH_REDUNDANCY = {}\n", "for k, v in _REDS.items():\n", " # average number of redundant steps per path length\n", " AVGS[k] = np.array(v).sum(axis=1).mean()\n", " # percentage of paths that have no redundant steps\n", " percentagees = pd.Series(np.array(v).sum(axis=1)).value_counts(normalize=True)\n", " if 0 in percentagees.index:\n", " PERC_PATHS_WITH_NO_REDUNDANCIES[k] = percentagees[0]\n", " else: \n", " PERC_PATHS_WITH_NO_REDUNDANCIES[k] = 0\n", "\n", " # (np.array(arr)!=0).sum() / len(arr)\n", "\n", "\n", "fname = f\"{root}/HODATASETS/{ds}/paths_static/\" + \"{f}.pck\"\n", "# THIS ONLY EXTRACTS THE LAST FEATURE OF THE SHORTEST PATHS DICT WHICH IS IN FACT STANDARD TO ALL 3 OPTIONS\n", "pk.dump(AVGS, open(fname.format(f='avg_num_redundancies_per_path_len'), 'wb'))\n", "pk.dump(PERC_PATHS_WITH_NO_REDUNDANCIES, open(fname.format(f='percentage_of_paths_with_no_redundancies'), 'wb'))\n", "\n", "# # A MORE PROCESSED VERSION FOR PLOTTING\n", "# # pk.dump(REDS, open(fname.format(f='avg_redundancies_per_length'), 'wb'))\n", "print(f\"{ds}: Done\")\n" ], "id": "65c60e08" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# USING CALC_PROP_OF_EACH_PATH_IS_DYAD \n", "\n", "datasets_to_plot = ['InVS15', 'SFHH', 'LH10', 'Thiers13', 'Highschool', 'Workplace', 'Hospital', 'Copenhagen']\n", "import pickle as pk\n", "from hypergraphx.measures.shortest_paths import calc_prop_of_each_path_is_dyad\n", "\n", "\n", "root = \"/Users/bln1/Documents/phd/Y1/shortest-paths\"\n", "ds = 'Copenhagen'\n", "\n", "\n", "temp_df_for_props = {}\n", "\n", "for ds in (sorted(datasets_to_plot)):\n", " if ds == 'InVS15':\n", " continue\n", " print(f\"{ds.upper()}\", end='\\r')\n", "\n", " all_sp_max = pk.load(open(f\"{root}/HODATASETS/{ds}/paths_static/all_shortest_paths_ho_max.pck\", 'rb'))\n", " all_sp_mean = pk.load(open(f\"{root}/HODATASETS/{ds}/paths_static/all_shortest_paths_ho_mean.pck\", 'rb'))\n", " all_sp_min = pk.load(open(f\"{root}/HODATASETS/{ds}/paths_static/all_shortest_paths_ho_min.pck\", 'rb'))\n", " splho = pk.load(open(f\"{root}/HODATASETS/{ds}/paths_static/spl_ho.pck\", 'rb'))\n", "\n", "\n", " prop_dy_max = calc_prop_of_each_path_is_dyad(shortest_paths_ho=all_sp_max, SPL_ho=splho)\n", " prop_dy_mean = calc_prop_of_each_path_is_dyad(shortest_paths_ho=all_sp_mean, SPL_ho=splho)\n", " prop_dy_min = calc_prop_of_each_path_is_dyad(shortest_paths_ho=all_sp_min, SPL_ho=splho)\n", "\n", " temp = prop_dy_min.set_index('spl').join(prop_dy_mean.set_index('spl'), on='spl', lsuffix='_min', rsuffix='_mean').join(prop_dy_max.set_index('spl'), on='spl', rsuffix='_max')\n", " temp.rename(columns={'pd_min': 'proportion_min', 'pd_mean': 'proportion_mean', 'pd': 'proportion_max'}, inplace=True)\n", " temp.reset_index(inplace=True)\n", " \n", " temp_df_for_props[ds] = temp\n", "\n", "\n", "\n", "temp.plot(kind='bar', x='spl', y=['proportion_min', 'proportion_mean', 'proportion_max'], color=['seagreen', 'orange', 'blue'], \n", " width=0.8, \n", " title=f'Dyadic proportion of paths for {ds}', legend=True, alpha=0.8)" ], "id": "6dcb49c4" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pickle as pk\n", "import os\n", "from hypergraphx.measures.temporal.temporal_shortest_paths import embed_time_series_for_dataset\n", "from hypergraphx.measures.temporal.temporal_shortest_paths import calc_shortest_fastest_paths_temporal_hypergraphs\n", "import pandas as pd\n", "from matplotlib.ticker import MaxNLocator\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "\n", "verbose = True\n", "root = None\n", "dsdir = None\n", "dataset = None" ], "id": "70cddb3e" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 1. always FIRST embed statically\n", "\n", "# Load Time series\n", "if verbose: print(\"1. LOAD TIME SERIES\")\n", "TS = pk.load(open(f'{root}/processed/TN_{dataset}.pck', 'rb'))\n", "\n", "static_node_to_integer_lbl_map, integer_lbl_to_static_node_map, V, Vt0 = embed_time_series_for_dataset(TS=TS, dataset=dataset, root=root, verbose=verbose)\n", " \n", " " ], "id": "7b246ab8" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 2. For each regime, and each hyperedge selection strategy, run the full pipeline\n", "\n", "for regime in ['HO', 'DY', 'HOonly']:\n", " for option in ['min', 'max', 'mean']:\n", " if verbose: print(f\"\\n\\n\\nREGIME: {regime}, STRATEGY: {option.upper()}\\n\")\n", "\n", " outputs = calc_shortest_fastest_paths_temporal_hypergraphs(root=root,\n", " dataset_name=dataset,\n", " verbose=verbose, \n", " option=option, \n", " regime=regime)\n", " PATH_LEN_ARRAY_regime_option = outputs[0]\n", " AVG_ORD_ARRAY_regime_option = outputs[1]\n", " PATH_TIME_ARRAY_regime_option = outputs[2]\n", " DYADIC_PROP_ARRAY_regime_option = outputs[3]\n", " BEST_TIME_ARRAY_regime_option = outputs[4]" ], "id": "4177e059" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# EXAMPLE of PLOTTING\n", "\n", "\n", "datasets_to_plot = None #insert here a list\n", "root = \"your/root/directory\" #insert here your root directory\n", "\n", "fastest_or_shortest_flag = 'F' #F for fastest, S for shortest\n", "\n", "nrows = 4\n", "ncols = 6 \n", "\n", "fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*5, 5*nrows))\n", "axes = axes.ravel()\n", "\n", "AVG_ORD_ALL_OPTIONS = {}\n", "\n", "for ds, ax in zip(datasets_to_plot, axes):\n", " ax.set_title(f\"{ds.upper()}\", fontsize=18)\n", "\n", " PL_HO_all = pk.load(open(f\"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}PL_HO_mean.pck\", 'rb'))\n", " SAO_mean = pk.load(open(f\"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}AO_HO_mean.pck\", 'rb'))\n", " SAO_max = pk.load(open(f\"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}AO_HO_max.pck\", 'rb'))\n", " SAO_min = pk.load(open(f\"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}AO_HO_min.pck\", 'rb'))\n", "\n", " # pd.Series\n", " avg_ord_all_options_df = pd.DataFrame(np.vstack([\n", " PL_HO_all.ravel() , \n", " SAO_min.ravel(),\n", " SAO_mean.ravel() ,\n", " SAO_max.ravel(), \n", " ]), \n", " index=['path_len', 'avg_ord_min', 'avg_ord_mean', 'avg_ord_max']).dropna(axis=1)\n", "\n", "\n", " # avg_ord_all_options_df = avg_ord_all_options_df.T[['path_len_mean', 'avg_ord_mean', 'avg_ord_max', 'avg_ord_min']].rename(columns={'path_len_mean':'path_len'})\n", "\n", " avg_ord_all_options_df = avg_ord_all_options_df.T\n", " avg_ord_all_options_df['path_len'] = avg_ord_all_options_df['path_len'].astype(int)\n", "\n", " AVG_ORD_ALL_OPTIONS[ds] = dict(avg_ord_all_options_df.mean(axis=0)[['avg_ord_min', 'avg_ord_mean', 'avg_ord_max']].round(2))\n", "\n", " avg_ord_all_options_df = avg_ord_all_options_df.groupby('path_len').agg({'avg_ord_min': ['mean', 'std'], \n", " 'avg_ord_mean': ['mean', 'std'], \n", " 'avg_ord_max': ['mean', 'std'], \n", " }).round(3)\n", "\n", " avg_ord_all_options_df.reset_index(inplace=True)\n", " avg_ord_all_options_df.columns = ['path_len', 'avg_ord_min', 'avg_ord_min_std', 'avg_ord_mean', 'avg_ord_mean_std', 'avg_ord_max', 'avg_ord_max_std']\n", "\n", " avg_ord_all_options_df['path_len'] = avg_ord_all_options_df['path_len'].astype(int)\n", " \n", "\n", " avg_ord_all_options_df.plot(kind='line', style='--o', x='path_len', y=['avg_ord_min', 'avg_ord_mean', 'avg_ord_max'], \n", " color=['seagreen', 'orange', 'blue'], \n", " label=['Min', 'Mean', 'Max'], \n", " xlabel='Path length', ylabel='', \n", " ax=ax)\n", "\n", " \n", " for option, col in zip(['min', 'mean', 'max'], ['seagreen', 'orange', 'blue']):\n", " ax.fill_between(avg_ord_all_options_df['path_len'],\n", " avg_ord_all_options_df[f'avg_ord_{option}'] - avg_ord_all_options_df[f'avg_ord_{option}_std'],\n", " avg_ord_all_options_df[f'avg_ord_{option}'] + avg_ord_all_options_df[f'avg_ord_{option}_std'],\n", " color=col, alpha=0.1)\n", " \n", " \n", "\n", "plt.suptitle(f\"Average order for {'fastest' if fastest_or_shortest_flag=='F' else 'shortest'} paths\", fontsize=24)\n", "\n", "plt.tight_layout(pad=2.5)\n", "\n" ], "id": "8c8e70cd" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# PLOTTING REDUNDANCY INFO\n", "\n", "figheight = 5\n", "regime='HO'\n", "\n", "df = pd.DataFrame()\n", "\n", "DATAROOT = \"/Users/bln1/Documents/phd/Y1/shortest-paths/HODATASETS\"\n", "\n", "\n", "for ds in datasets_to_plot:\n", " dsdir = f\"{root}/HODATASETS/{ds}\" \n", " file_Fname= f\"{dsdir}/paths_temporal/redundancy_F_{ds}_{regime}.pck\"\n", "\n", " if not os.path.isfile(file_Fname):\n", " print(\"File not found: \" + file_Fname)\n", " continue\n", " red_info_F = pk.load(open(file_Fname, 'rb'))\n", "\n", " file_Sname= f\"{dsdir}/paths_temporal/redundancy_S_{ds}_{regime}.pck\"\n", " assert os.path.isfile(file_Sname), \"The fastest path file was found, but not the shortest path file. Something is wrong.\"\n", " red_info_S = pk.load(open(file_Sname, 'rb'))\n", "\n", "\n", " fig, axes = plt.subplots(1, 2, figsize=(figheight*2.2, figheight*0.75), dpi=100)\n", " ax = axes[0]\n", " red_info_F.index = red_info_F.index.astype(int)\n", " red_info_F.plot(kind='line', style='--o', y='avg_num_redund_F_mean', \n", " ax=ax, \n", " label='Fastest', \n", " title='Average number of redundancies', \n", " legend=False,\n", " markersize=3)\n", " ax.fill_between(red_info_F.index.astype(int),\n", " red_info_F['avg_num_redund_F_mean'] - red_info_F['avg_num_redund_F_std'],\n", " red_info_F['avg_num_redund_F_mean'] + red_info_F['avg_num_redund_F_std'],\n", " color='b', \n", " alpha=0.1, \n", " )\n", "\n", " \n", " axes[0].set_xlabel(r\"$\\ell$\")\n", " ax.set_ylim(0)\n", "\n", " ax = axes[1]\n", "\n", "\n", " df = red_info_F['perc_of_path_is_redund_F_mean']\n", " xvals = df.index.astype(int)\n", " df.index = df.index.astype(int)\n", " df.plot(\n", " kind='bar',\n", " color=['royalblue'], #, 'seagreen' \n", " alpha=0.8, \n", " title='% Steps with a redundancy present', \n", " ax=axes[1], legend=False)\n", " axes[0].xaxis.set_major_locator(MaxNLocator(integer=True))\n", " axes[1].set_xlabel(r\"$\\ell$\")\n", "\n", "\n", " fig.tight_layout(pad=1.0)\n", "\n", "\n" ], "id": "9a602d77" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }