{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Extract Statistically Validated Hypergraph\n" ], "id": "41a2159e" }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ ".. admonition:: Definition\n", "\n", " Statistical filtering removes hyperedges that are not significant under a null model.\n" ] }, { "cell_type": "raw", "metadata": { "raw_mimetype": "text/restructuredtext" }, "source": [ ".. admonition:: You will learn\n", "\n", " Extract statistically validated substructures and compare with the original hypergraph.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Overview\n", "\n", "- Extract statistically validated substructures and cores.\n", "- Compare filtered and original hypergraphs.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup\n" ] }, { "cell_type": "code", "metadata": {}, "execution_count": null, "outputs": [], "source": [ "import matplotlib as mpl\n", "\n", "mpl.rcParams.update({\n", " \"figure.figsize\": (6, 4),\n", " \"figure.dpi\": 120,\n", " \"savefig.dpi\": 150,\n", "})\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import sys\n", "\n", "import numpy as np\n", "\n", "sys.path.append(\"..\")\n", "from hypergraphx.core import Hypergraph\n", "from hypergraphx.filters import get_svh, get_svc\n", "\n", "np.random.seed(123)" ], "id": "13df5aa6" }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def line_to_hyperedge(line):\n", " hye = line\n", " hye = line.strip(\"\\n\")\n", " hye = [int(node) for node in line.split(\" \")]\n", " return hye\n", "\n", "def line_to_weight(line):\n", " return int(line.strip(\"\\n\"))" ], "id": "29326425" }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Load Justice dataset.\n", "with open(\"./_example_data/justice_data/hyperedges.txt\", \"r\") as hye_file:\n", " hyes = list(map(line_to_hyperedge,hye_file.readlines()))\n", "with open(\"./_example_data/justice_data/weights.txt\", \"r\") as weight_file:\n", " weights = list(map(line_to_weight,weight_file.readlines()))\n", "\n", "hyes = [tuple(hye) for hye in hyes]\n", "justice = Hypergraph(hyes, weighted=True, weights=weights)" ], "id": "a8a6c83a" }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "justice_svh = get_svh(justice,\n", " mp=True)" ], "id": "200b9374" }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Show Statistically Validated Hyperlinks" ], "id": "966b6284" }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | edge | \n", "pvalue | \n", "fdr | \n", "
|---|---|---|---|
| 1 | \n", "(0, 2) | \n", "1.186904e-60 | \n", "True | \n", "
| 22 | \n", "(1, 6) | \n", "7.192356e-05 | \n", "True | \n", "
| 24 | \n", "(1, 8) | \n", "2.210969e-13 | \n", "True | \n", "
| 49 | \n", "(3, 5) | \n", "1.153760e-16 | \n", "True | \n", "
| 53 | \n", "(4, 7) | \n", "2.501053e-35 | \n", "True | \n", "
| 68 | \n", "(8, 9) | \n", "2.931209e-06 | \n", "True | \n", "
| 73 | \n", "(9, 12) | \n", "7.456268e-18 | \n", "True | \n", "
| 75 | \n", "(9, 14) | \n", "3.458745e-05 | \n", "True | \n", "
| 81 | \n", "(11, 17) | \n", "6.734703e-05 | \n", "True | \n", "
| 85 | \n", "(12, 15) | \n", "4.004282e-24 | \n", "True | \n", "
| 96 | \n", "(13, 19) | \n", "1.645878e-76 | \n", "True | \n", "
| 132 | \n", "(20, 21) | \n", "5.618608e-06 | \n", "True | \n", "
| 133 | \n", "(20, 22) | \n", "2.346363e-08 | \n", "True | \n", "
| 134 | \n", "(20, 23) | \n", "1.421147e-11 | \n", "True | \n", "
| 139 | \n", "(21, 24) | \n", "9.122267e-07 | \n", "True | \n", "
| 160 | \n", "(24, 30) | \n", "2.371769e-05 | \n", "True | \n", "
| 161 | \n", "(24, 31) | \n", "3.748224e-05 | \n", "True | \n", "
| 171 | \n", "(26, 29) | \n", "1.257113e-53 | \n", "True | \n", "
| 181 | \n", "(27, 32) | \n", "4.729259e-06 | \n", "True | \n", "
| 185 | \n", "(28, 30) | \n", "1.426895e-04 | \n", "True | \n", "
| 191 | \n", "(29, 33) | \n", "4.870316e-19 | \n", "True | \n", "
| 193 | \n", "(29, 36) | \n", "9.671574e-10 | \n", "True | \n", "
| 195 | \n", "(30, 31) | \n", "9.654795e-11 | \n", "True | \n", "
| 198 | \n", "(30, 34) | \n", "4.293666e-20 | \n", "True | \n", "
| 205 | \n", "(32, 33) | \n", "7.718603e-10 | \n", "True | \n", "
| 212 | \n", "(34, 35) | \n", "2.639628e-06 | \n", "True | \n", "
| \n", " | group | \n", "pvalue | \n", "w | \n", "fdr | \n", "
|---|---|---|---|---|
| 4670 | \n", "(437, 2176, 2625, 5349) | \n", "6.240333e-26 | \n", "4 | \n", "True | \n", "
| 6244 | \n", "(329, 525, 1830, 5815) | \n", "1.121532e-18 | \n", "2 | \n", "True | \n", "
| 7294 | \n", "(39391, 39392, 39395, 39396) | \n", "8.279634e-26 | \n", "2 | \n", "True | \n", "
| 8184 | \n", "(169, 925, 1764, 3555) | \n", "2.884992e-19 | \n", "2 | \n", "True | \n", "
| 8187 | \n", "(169, 925, 3555, 12076) | \n", "1.755229e-19 | \n", "2 | \n", "True | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 174744 | \n", "(937, 34575) | \n", "5.083782e-11 | \n", "3 | \n", "True | \n", "
| 205389 | \n", "(7545, 27574) | \n", "1.114978e-09 | \n", "3 | \n", "True | \n", "
| 215347 | \n", "(10586, 49880) | \n", "1.744001e-11 | \n", "3 | \n", "True | \n", "
| 224440 | \n", "(21358, 30668) | \n", "2.214484e-10 | \n", "3 | \n", "True | \n", "
| 226303 | \n", "(962, 22220) | \n", "8.589182e-10 | \n", "3 | \n", "True | \n", "
499 rows \u00d7 4 columns
\n", "