{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Extract Statistically Validated Hypergraph\n" ], "id": "41a2159e" }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ ".. admonition:: Definition\n", "\n", " Statistical filtering removes hyperedges that are not significant under a null model.\n" ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ ".. admonition:: You will learn\n", "\n", " Extract statistically validated substructures and compare with the original hypergraph.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Overview\n", "\n", "- Extract statistically validated substructures and cores.\n", "- Compare filtered and original hypergraphs.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup\n" ] }, { "cell_type": "code", "metadata": {}, "execution_count": null, "outputs": [], "source": [ "import matplotlib as mpl\n", "\n", "mpl.rcParams.update({\n", " \"figure.figsize\": (6, 4),\n", " \"figure.dpi\": 120,\n", " \"savefig.dpi\": 150,\n", "})\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import sys\n", "\n", "import numpy as np\n", "\n", "sys.path.append(\"..\")\n", "from hypergraphx.core import Hypergraph\n", "from hypergraphx.filters import get_svh, get_svc\n", "\n", "np.random.seed(123)" ], "id": "13df5aa6" }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def line_to_hyperedge(line):\n", " hye = line\n", " hye = line.strip(\"\\n\")\n", " hye = [int(node) for node in line.split(\" \")]\n", " return hye\n", "\n", "def line_to_weight(line):\n", " return int(line.strip(\"\\n\"))" ], "id": "29326425" }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Load Justice dataset.\n", "with open(\"./_example_data/justice_data/hyperedges.txt\", \"r\") as hye_file:\n", " hyes = list(map(line_to_hyperedge,hye_file.readlines()))\n", "with open(\"./_example_data/justice_data/weights.txt\", \"r\") as weight_file:\n", " weights = list(map(line_to_weight,weight_file.readlines()))\n", "\n", "hyes = [tuple(hye) for hye in hyes]\n", "justice = Hypergraph(hyes, weighted=True, weights=weights)" ], "id": "a8a6c83a" }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "justice_svh = get_svh(justice,\n", " mp=True)" ], "id": "200b9374" }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Show Statistically Validated Hyperlinks" ], "id": "966b6284" }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
edgepvaluefdr
1(0, 2)1.186904e-60True
22(1, 6)7.192356e-05True
24(1, 8)2.210969e-13True
49(3, 5)1.153760e-16True
53(4, 7)2.501053e-35True
68(8, 9)2.931209e-06True
73(9, 12)7.456268e-18True
75(9, 14)3.458745e-05True
81(11, 17)6.734703e-05True
85(12, 15)4.004282e-24True
96(13, 19)1.645878e-76True
132(20, 21)5.618608e-06True
133(20, 22)2.346363e-08True
134(20, 23)1.421147e-11True
139(21, 24)9.122267e-07True
160(24, 30)2.371769e-05True
161(24, 31)3.748224e-05True
171(26, 29)1.257113e-53True
181(27, 32)4.729259e-06True
185(28, 30)1.426895e-04True
191(29, 33)4.870316e-19True
193(29, 36)9.671574e-10True
195(30, 31)9.654795e-11True
198(30, 34)4.293666e-20True
205(32, 33)7.718603e-10True
212(34, 35)2.639628e-06True
\n", "
" ], "text/plain": [ " edge pvalue fdr\n", "1 (0, 2) 1.186904e-60 True\n", "22 (1, 6) 7.192356e-05 True\n", "24 (1, 8) 2.210969e-13 True\n", "49 (3, 5) 1.153760e-16 True\n", "53 (4, 7) 2.501053e-35 True\n", "68 (8, 9) 2.931209e-06 True\n", "73 (9, 12) 7.456268e-18 True\n", "75 (9, 14) 3.458745e-05 True\n", "81 (11, 17) 6.734703e-05 True\n", "85 (12, 15) 4.004282e-24 True\n", "96 (13, 19) 1.645878e-76 True\n", "132 (20, 21) 5.618608e-06 True\n", "133 (20, 22) 2.346363e-08 True\n", "134 (20, 23) 1.421147e-11 True\n", "139 (21, 24) 9.122267e-07 True\n", "160 (24, 30) 2.371769e-05 True\n", "161 (24, 31) 3.748224e-05 True\n", "171 (26, 29) 1.257113e-53 True\n", "181 (27, 32) 4.729259e-06 True\n", "185 (28, 30) 1.426895e-04 True\n", "191 (29, 33) 4.870316e-19 True\n", "193 (29, 36) 9.671574e-10 True\n", "195 (30, 31) 9.654795e-11 True\n", "198 (30, 34) 4.293666e-20 True\n", "205 (32, 33) 7.718603e-10 True\n", "212 (34, 35) 2.639628e-06 True" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "order = 2\n", "justice_svh[order].query('fdr')" ], "id": "ca94a85a" }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Extract Statistically Validated Cores" ], "id": "dc3bed6b" }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Load Walmart dataset.\n", "with open(\"./_example_data/walmart_data/walmart-trips-reduced.txt\", \"r\") as hye_file:\n", " hyes = list(map(line_to_hyperedge,hye_file.readlines()))\n", "\n", "hyes = [tuple(hye) for hye in hyes]\n", "hyes = list(set(hyes))\n", "walmart = Hypergraph(hyes, weighted=False) # unweighted example" ], "id": "5cf0ca81" }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "walmart_svc = get_svc(walmart,\n", " alpha=0.01,\n", " max_order=4)" ], "id": "4be81056" }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Show Statistically Validated Cores" ], "id": "dfe83df5" }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
grouppvaluewfdr
4670(437, 2176, 2625, 5349)6.240333e-264True
6244(329, 525, 1830, 5815)1.121532e-182True
7294(39391, 39392, 39395, 39396)8.279634e-262True
8184(169, 925, 1764, 3555)2.884992e-192True
8187(169, 925, 3555, 12076)1.755229e-192True
...............
174744(937, 34575)5.083782e-113True
205389(7545, 27574)1.114978e-093True
215347(10586, 49880)1.744001e-113True
224440(21358, 30668)2.214484e-103True
226303(962, 22220)8.589182e-103True
\n", "

499 rows \u00d7 4 columns

\n", "
" ], "text/plain": [ " group pvalue w fdr\n", "4670 (437, 2176, 2625, 5349) 6.240333e-26 4 True\n", "6244 (329, 525, 1830, 5815) 1.121532e-18 2 True\n", "7294 (39391, 39392, 39395, 39396) 8.279634e-26 2 True\n", "8184 (169, 925, 1764, 3555) 2.884992e-19 2 True\n", "8187 (169, 925, 3555, 12076) 1.755229e-19 2 True\n", "... ... ... .. ...\n", "174744 (937, 34575) 5.083782e-11 3 True\n", "205389 (7545, 27574) 1.114978e-09 3 True\n", "215347 (10586, 49880) 1.744001e-11 3 True\n", "224440 (21358, 30668) 2.214484e-10 3 True\n", "226303 (962, 22220) 8.589182e-10 3 True\n", "\n", "[499 rows x 4 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "walmart_svc.query('fdr')" ], "id": "3d97bcd3" } ], "metadata": { "kernelspec": { "display_name": "hgx-installation", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.17" } }, "nbformat": 4, "nbformat_minor": 5 }