{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Shortest Paths\n"
      ],
      "id": "6abf659d"
    },
    {
      "cell_type": "raw",
      "metadata": {
        "raw_mimetype": "text/restructuredtext"
      },
      "source": [
        ".. admonition:: Definition\n",
        "\n",
        "   Hypergraph shortest paths measure distance via hyperedges; order/size can change step costs.\n"
      ]
    },
    {
      "cell_type": "raw",
      "metadata": {
        "raw_mimetype": "text/restructuredtext"
      },
      "source": [
        ".. admonition:: You will learn\n",
        "\n",
        "   Compute hypergraph shortest paths and analyze how order affects distance.\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Overview\n",
        "\n",
        "- Compute shortest paths in hypergraphs and inspect path statistics.\n",
        "- Compare order/size effects on path structure.\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Setup\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {},
      "execution_count": null,
      "outputs": [],
      "source": [
        "import matplotlib as mpl\n",
        "\n",
        "mpl.rcParams.update({\n",
        "    \"figure.figsize\": (6, 4),\n",
        "    \"figure.dpi\": 120,\n",
        "    \"savefig.dpi\": 150,\n",
        "})\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import pickle as pk\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "\n",
        "fname = \"\"\n",
        "shortest_paths_ho_min = pk.load(open(fname, 'rb'))\n",
        "\n",
        "_REDS = {}\n",
        "\n",
        "for i, dicto in shortest_paths_ho_min.items():\n",
        "\n",
        "    reds = {}\n",
        "    for j, spl in dicto.items():\n",
        "        if i==j:\n",
        "            continue\n",
        "        # print(i, j, spl)\n",
        "        # print(spl['redundancies'])\n",
        "        reds[j] = spl['redundancies']\n",
        "        l = len(spl['redundancies'])\n",
        "        if l>0:\n",
        "            if l not in _REDS:\n",
        "                _REDS[l] = [spl['redundancies']]\n",
        "            else:\n",
        "                _REDS[l].append(spl['redundancies'])\n",
        "\n",
        "REDS = {}\n",
        "for k, v in _REDS.items():\n",
        "    REDS[k] = {'mean': np.array(v).mean(axis=1).mean(axis=0) if np.array(v).ndim > 1 else np.array(v).mean(axis=0),\n",
        "            'std': np.array(v).mean(axis=1).std(axis=0) if np.array(v).ndim > 1 else np.array(v).std(axis=0)\n",
        "            }\n",
        "\n",
        "df_redund = pd.DataFrame(REDS).T.reset_index()\n",
        "df_redund.rename(columns={'index': 'spl'}, inplace=True)\n",
        "\n",
        "# print(df_redund.mean()[['mean', 'std']])\n",
        "\n",
        "df = pd.concat([df, pd.DataFrame(df_redund.mean()[['mean', 'std']]).T.rename(index={0:ds})])\n",
        "\n",
        "AVGS = {}\n",
        "PERC_PATHS_WITH_NO_REDUNDANCIES = {}\n",
        "PERC_OF_PATH_WITH_REDUNDANCY = {}\n",
        "for k, v in _REDS.items():\n",
        "    # average number of redundant steps per path length\n",
        "    AVGS[k] = np.array(v).sum(axis=1).mean()\n",
        "    # percentage of paths that have no redundant steps\n",
        "    percentagees = pd.Series(np.array(v).sum(axis=1)).value_counts(normalize=True)\n",
        "    if 0 in percentagees.index:\n",
        "        PERC_PATHS_WITH_NO_REDUNDANCIES[k] = percentagees[0]\n",
        "    else: \n",
        "        PERC_PATHS_WITH_NO_REDUNDANCIES[k] = 0\n",
        "\n",
        "    # (np.array(arr)!=0).sum() / len(arr)\n",
        "\n",
        "\n",
        "fname = f\"{root}/HODATASETS/{ds}/paths_static/\" + \"{f}.pck\"\n",
        "# THIS ONLY EXTRACTS THE LAST FEATURE OF THE SHORTEST PATHS DICT WHICH IS IN FACT STANDARD TO ALL 3 OPTIONS\n",
        "pk.dump(AVGS, open(fname.format(f='avg_num_redundancies_per_path_len'), 'wb'))\n",
        "pk.dump(PERC_PATHS_WITH_NO_REDUNDANCIES, open(fname.format(f='percentage_of_paths_with_no_redundancies'), 'wb'))\n",
        "\n",
        "# # A MORE PROCESSED VERSION FOR PLOTTING\n",
        "# # pk.dump(REDS, open(fname.format(f='avg_redundancies_per_length'), 'wb'))\n",
        "print(f\"{ds}: Done\")\n"
      ],
      "id": "65c60e08"
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# USING CALC_PROP_OF_EACH_PATH_IS_DYAD \n",
        "\n",
        "datasets_to_plot = ['InVS15', 'SFHH', 'LH10', 'Thiers13', 'Highschool', 'Workplace', 'Hospital', 'Copenhagen']\n",
        "import pickle as pk\n",
        "from hypergraphx.measures.shortest_paths import calc_prop_of_each_path_is_dyad\n",
        "\n",
        "\n",
        "root = \"/Users/bln1/Documents/phd/Y1/shortest-paths\"\n",
        "ds = 'Copenhagen'\n",
        "\n",
        "\n",
        "temp_df_for_props = {}\n",
        "\n",
        "for ds in (sorted(datasets_to_plot)):\n",
        "    if ds == 'InVS15':\n",
        "        continue\n",
        "    print(f\"{ds.upper()}\", end='\\r')\n",
        "\n",
        "    all_sp_max = pk.load(open(f\"{root}/HODATASETS/{ds}/paths_static/all_shortest_paths_ho_max.pck\", 'rb'))\n",
        "    all_sp_mean = pk.load(open(f\"{root}/HODATASETS/{ds}/paths_static/all_shortest_paths_ho_mean.pck\", 'rb'))\n",
        "    all_sp_min = pk.load(open(f\"{root}/HODATASETS/{ds}/paths_static/all_shortest_paths_ho_min.pck\", 'rb'))\n",
        "    splho = pk.load(open(f\"{root}/HODATASETS/{ds}/paths_static/spl_ho.pck\", 'rb'))\n",
        "\n",
        "\n",
        "    prop_dy_max = calc_prop_of_each_path_is_dyad(shortest_paths_ho=all_sp_max, SPL_ho=splho)\n",
        "    prop_dy_mean = calc_prop_of_each_path_is_dyad(shortest_paths_ho=all_sp_mean, SPL_ho=splho)\n",
        "    prop_dy_min = calc_prop_of_each_path_is_dyad(shortest_paths_ho=all_sp_min, SPL_ho=splho)\n",
        "\n",
        "    temp = prop_dy_min.set_index('spl').join(prop_dy_mean.set_index('spl'), on='spl', lsuffix='_min', rsuffix='_mean').join(prop_dy_max.set_index('spl'), on='spl', rsuffix='_max')\n",
        "    temp.rename(columns={'pd_min': 'proportion_min', 'pd_mean': 'proportion_mean', 'pd': 'proportion_max'}, inplace=True)\n",
        "    temp.reset_index(inplace=True)\n",
        "    \n",
        "    temp_df_for_props[ds] = temp\n",
        "\n",
        "\n",
        "\n",
        "temp.plot(kind='bar', x='spl', y=['proportion_min', 'proportion_mean', 'proportion_max'], color=['seagreen', 'orange', 'blue'], \n",
        "          width=0.8, \n",
        "          title=f'Dyadic proportion of paths for {ds}', legend=True, alpha=0.8)"
      ],
      "id": "6dcb49c4"
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import pickle as pk\n",
        "import os\n",
        "from hypergraphx.measures.temporal.temporal_shortest_paths import embed_time_series_for_dataset\n",
        "from hypergraphx.measures.temporal.temporal_shortest_paths import calc_shortest_fastest_paths_temporal_hypergraphs\n",
        "import pandas as pd\n",
        "from matplotlib.ticker import MaxNLocator\n",
        "\n",
        "import matplotlib.pyplot as plt\n",
        "import numpy as np\n",
        "\n",
        "\n",
        "verbose = True\n",
        "root = None\n",
        "dsdir = None\n",
        "dataset = None"
      ],
      "id": "70cddb3e"
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# 1. always FIRST embed statically\n",
        "\n",
        "# Load Time series\n",
        "if verbose: print(\"1. LOAD TIME SERIES\")\n",
        "TS = pk.load(open(f'{root}/processed/TN_{dataset}.pck', 'rb'))\n",
        "\n",
        "static_node_to_integer_lbl_map, integer_lbl_to_static_node_map, V, Vt0 = embed_time_series_for_dataset(TS=TS, dataset=dataset, root=root, verbose=verbose)\n",
        "            \n",
        "       "
      ],
      "id": "7b246ab8"
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# 2. For each regime, and each hyperedge selection strategy, run the full pipeline\n",
        "\n",
        "for regime in ['HO', 'DY', 'HOonly']:\n",
        "    for option in ['min', 'max', 'mean']:\n",
        "        if verbose: print(f\"\\n\\n\\nREGIME: {regime}, STRATEGY: {option.upper()}\\n\")\n",
        "\n",
        "        outputs = calc_shortest_fastest_paths_temporal_hypergraphs(root=root,\n",
        "                                                        dataset_name=dataset,\n",
        "                                                        verbose=verbose, \n",
        "                                                        option=option, \n",
        "                                                        regime=regime)\n",
        "        PATH_LEN_ARRAY_regime_option = outputs[0]\n",
        "        AVG_ORD_ARRAY_regime_option = outputs[1]\n",
        "        PATH_TIME_ARRAY_regime_option = outputs[2]\n",
        "        DYADIC_PROP_ARRAY_regime_option = outputs[3]\n",
        "        BEST_TIME_ARRAY_regime_option = outputs[4]"
      ],
      "id": "4177e059"
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# EXAMPLE of PLOTTING\n",
        "\n",
        "\n",
        "datasets_to_plot = None         #insert here a list\n",
        "root = \"your/root/directory\"  #insert here your root directory\n",
        "\n",
        "fastest_or_shortest_flag = 'F' #F for fastest, S for shortest\n",
        "\n",
        "nrows = 4\n",
        "ncols = 6 \n",
        "\n",
        "fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*5, 5*nrows))\n",
        "axes = axes.ravel()\n",
        "\n",
        "AVG_ORD_ALL_OPTIONS = {}\n",
        "\n",
        "for ds, ax in zip(datasets_to_plot, axes):\n",
        "    ax.set_title(f\"{ds.upper()}\", fontsize=18)\n",
        "\n",
        "    PL_HO_all = pk.load(open(f\"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}PL_HO_mean.pck\", 'rb'))\n",
        "    SAO_mean = pk.load(open(f\"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}AO_HO_mean.pck\", 'rb'))\n",
        "    SAO_max = pk.load(open(f\"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}AO_HO_max.pck\", 'rb'))\n",
        "    SAO_min = pk.load(open(f\"{root}/{ds}/paths_temporal/{fastest_or_shortest_flag}AO_HO_min.pck\", 'rb'))\n",
        "\n",
        "    # pd.Series\n",
        "    avg_ord_all_options_df = pd.DataFrame(np.vstack([\n",
        "                            PL_HO_all.ravel() , \n",
        "                            SAO_min.ravel(),\n",
        "                            SAO_mean.ravel() ,\n",
        "                            SAO_max.ravel(), \n",
        "            ]), \n",
        "                index=['path_len', 'avg_ord_min', 'avg_ord_mean', 'avg_ord_max']).dropna(axis=1)\n",
        "\n",
        "\n",
        "    # avg_ord_all_options_df = avg_ord_all_options_df.T[['path_len_mean', 'avg_ord_mean', 'avg_ord_max', 'avg_ord_min']].rename(columns={'path_len_mean':'path_len'})\n",
        "\n",
        "    avg_ord_all_options_df = avg_ord_all_options_df.T\n",
        "    avg_ord_all_options_df['path_len'] = avg_ord_all_options_df['path_len'].astype(int)\n",
        "\n",
        "    AVG_ORD_ALL_OPTIONS[ds] = dict(avg_ord_all_options_df.mean(axis=0)[['avg_ord_min', 'avg_ord_mean', 'avg_ord_max']].round(2))\n",
        "\n",
        "    avg_ord_all_options_df = avg_ord_all_options_df.groupby('path_len').agg({'avg_ord_min': ['mean', 'std'], \n",
        "                                                    'avg_ord_mean': ['mean', 'std'], \n",
        "                                                    'avg_ord_max': ['mean', 'std'], \n",
        "                                                    }).round(3)\n",
        "\n",
        "    avg_ord_all_options_df.reset_index(inplace=True)\n",
        "    avg_ord_all_options_df.columns = ['path_len', 'avg_ord_min', 'avg_ord_min_std', 'avg_ord_mean', 'avg_ord_mean_std', 'avg_ord_max', 'avg_ord_max_std']\n",
        "\n",
        "    avg_ord_all_options_df['path_len'] = avg_ord_all_options_df['path_len'].astype(int)\n",
        "    \n",
        "\n",
        "    avg_ord_all_options_df.plot(kind='line', style='--o', x='path_len', y=['avg_ord_min', 'avg_ord_mean', 'avg_ord_max'], \n",
        "                                color=['seagreen', 'orange', 'blue'], \n",
        "                                label=['Min', 'Mean', 'Max'], \n",
        "                                xlabel='Path length', ylabel='', \n",
        "                                ax=ax)\n",
        "\n",
        "        \n",
        "    for option, col in zip(['min', 'mean', 'max'], ['seagreen', 'orange', 'blue']):\n",
        "        ax.fill_between(avg_ord_all_options_df['path_len'],\n",
        "                        avg_ord_all_options_df[f'avg_ord_{option}'] - avg_ord_all_options_df[f'avg_ord_{option}_std'],\n",
        "                        avg_ord_all_options_df[f'avg_ord_{option}'] + avg_ord_all_options_df[f'avg_ord_{option}_std'],\n",
        "                        color=col, alpha=0.1)\n",
        "        \n",
        "    \n",
        "\n",
        "plt.suptitle(f\"Average order for {'fastest' if fastest_or_shortest_flag=='F' else 'shortest'} paths\", fontsize=24)\n",
        "\n",
        "plt.tight_layout(pad=2.5)\n",
        "\n"
      ],
      "id": "8c8e70cd"
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# PLOTTING REDUNDANCY INFO\n",
        "\n",
        "figheight = 5\n",
        "regime='HO'\n",
        "\n",
        "df = pd.DataFrame()\n",
        "\n",
        "DATAROOT = \"/Users/bln1/Documents/phd/Y1/shortest-paths/HODATASETS\"\n",
        "\n",
        "\n",
        "for ds in datasets_to_plot:\n",
        "    dsdir = f\"{root}/HODATASETS/{ds}\"  \n",
        "    file_Fname= f\"{dsdir}/paths_temporal/redundancy_F_{ds}_{regime}.pck\"\n",
        "\n",
        "    if not os.path.isfile(file_Fname):\n",
        "        print(\"File not found: \" + file_Fname)\n",
        "        continue\n",
        "    red_info_F = pk.load(open(file_Fname, 'rb'))\n",
        "\n",
        "    file_Sname= f\"{dsdir}/paths_temporal/redundancy_S_{ds}_{regime}.pck\"\n",
        "    assert os.path.isfile(file_Sname), \"The fastest path file was found, but not the shortest path file. Something is wrong.\"\n",
        "    red_info_S = pk.load(open(file_Sname, 'rb'))\n",
        "\n",
        "\n",
        "    fig, axes = plt.subplots(1, 2, figsize=(figheight*2.2, figheight*0.75), dpi=100)\n",
        "    ax = axes[0]\n",
        "    red_info_F.index = red_info_F.index.astype(int)\n",
        "    red_info_F.plot(kind='line', style='--o', y='avg_num_redund_F_mean', \n",
        "                    ax=ax, \n",
        "                    label='Fastest', \n",
        "                    title='Average number of redundancies', \n",
        "                    legend=False,\n",
        "                    markersize=3)\n",
        "    ax.fill_between(red_info_F.index.astype(int),\n",
        "                    red_info_F['avg_num_redund_F_mean'] - red_info_F['avg_num_redund_F_std'],\n",
        "                    red_info_F['avg_num_redund_F_mean'] + red_info_F['avg_num_redund_F_std'],\n",
        "                    color='b', \n",
        "                    alpha=0.1, \n",
        "                    )\n",
        "\n",
        "    \n",
        "    axes[0].set_xlabel(r\"$\\ell$\")\n",
        "    ax.set_ylim(0)\n",
        "\n",
        "    ax = axes[1]\n",
        "\n",
        "\n",
        "    df = red_info_F['perc_of_path_is_redund_F_mean']\n",
        "    xvals = df.index.astype(int)\n",
        "    df.index = df.index.astype(int)\n",
        "    df.plot(\n",
        "            kind='bar',\n",
        "            color=['royalblue'],  #, 'seagreen' \n",
        "            alpha=0.8, \n",
        "            title='% Steps with a redundancy present', \n",
        "            ax=axes[1], legend=False)\n",
        "    axes[0].xaxis.set_major_locator(MaxNLocator(integer=True))\n",
        "    axes[1].set_xlabel(r\"$\\ell$\")\n",
        "\n",
        "\n",
        "    fig.tight_layout(pad=1.0)\n",
        "\n",
        "\n"
      ],
      "id": "9a602d77"
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.11.5"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}