Extract Statistically Validated Hypergraphยถ
Definition
Statistical filtering removes hyperedges that are not significant under a null model.
You will learn
Extract statistically validated substructures and compare with the original hypergraph.
Overviewยถ
Extract statistically validated substructures and cores.
Compare filtered and original hypergraphs.
Setupยถ
[ ]:
import matplotlib as mpl
mpl.rcParams.update({
"figure.figsize": (6, 4),
"figure.dpi": 120,
"savefig.dpi": 150,
})
[1]:
import sys
import numpy as np
sys.path.append("..")
from hypergraphx.core import Hypergraph
from hypergraphx.filters import get_svh, get_svc
np.random.seed(123)
[2]:
def line_to_hyperedge(line):
hye = line
hye = line.strip("\n")
hye = [int(node) for node in line.split(" ")]
return hye
def line_to_weight(line):
return int(line.strip("\n"))
[3]:
# Load Justice dataset.
with open("./_example_data/justice_data/hyperedges.txt", "r") as hye_file:
hyes = list(map(line_to_hyperedge,hye_file.readlines()))
with open("./_example_data/justice_data/weights.txt", "r") as weight_file:
weights = list(map(line_to_weight,weight_file.readlines()))
hyes = [tuple(hye) for hye in hyes]
justice = Hypergraph(hyes, weighted=True, weights=weights)
[5]:
justice_svh = get_svh(justice,
mp=True)
Show Statistically Validated Hyperlinksยถ
[6]:
order = 2
justice_svh[order].query('fdr')
[6]:
| edge | pvalue | fdr | |
|---|---|---|---|
| 1 | (0, 2) | 1.186904e-60 | True |
| 22 | (1, 6) | 7.192356e-05 | True |
| 24 | (1, 8) | 2.210969e-13 | True |
| 49 | (3, 5) | 1.153760e-16 | True |
| 53 | (4, 7) | 2.501053e-35 | True |
| 68 | (8, 9) | 2.931209e-06 | True |
| 73 | (9, 12) | 7.456268e-18 | True |
| 75 | (9, 14) | 3.458745e-05 | True |
| 81 | (11, 17) | 6.734703e-05 | True |
| 85 | (12, 15) | 4.004282e-24 | True |
| 96 | (13, 19) | 1.645878e-76 | True |
| 132 | (20, 21) | 5.618608e-06 | True |
| 133 | (20, 22) | 2.346363e-08 | True |
| 134 | (20, 23) | 1.421147e-11 | True |
| 139 | (21, 24) | 9.122267e-07 | True |
| 160 | (24, 30) | 2.371769e-05 | True |
| 161 | (24, 31) | 3.748224e-05 | True |
| 171 | (26, 29) | 1.257113e-53 | True |
| 181 | (27, 32) | 4.729259e-06 | True |
| 185 | (28, 30) | 1.426895e-04 | True |
| 191 | (29, 33) | 4.870316e-19 | True |
| 193 | (29, 36) | 9.671574e-10 | True |
| 195 | (30, 31) | 9.654795e-11 | True |
| 198 | (30, 34) | 4.293666e-20 | True |
| 205 | (32, 33) | 7.718603e-10 | True |
| 212 | (34, 35) | 2.639628e-06 | True |
Extract Statistically Validated Coresยถ
[7]:
# Load Walmart dataset.
with open("./_example_data/walmart_data/walmart-trips-reduced.txt", "r") as hye_file:
hyes = list(map(line_to_hyperedge,hye_file.readlines()))
hyes = [tuple(hye) for hye in hyes]
hyes = list(set(hyes))
walmart = Hypergraph(hyes, weighted=False) # unweighted example
[9]:
walmart_svc = get_svc(walmart,
alpha=0.01,
max_order=4)
Show Statistically Validated Coresยถ
[10]:
walmart_svc.query('fdr')
[10]:
| group | pvalue | w | fdr | |
|---|---|---|---|---|
| 4670 | (437, 2176, 2625, 5349) | 6.240333e-26 | 4 | True |
| 6244 | (329, 525, 1830, 5815) | 1.121532e-18 | 2 | True |
| 7294 | (39391, 39392, 39395, 39396) | 8.279634e-26 | 2 | True |
| 8184 | (169, 925, 1764, 3555) | 2.884992e-19 | 2 | True |
| 8187 | (169, 925, 3555, 12076) | 1.755229e-19 | 2 | True |
| ... | ... | ... | ... | ... |
| 174744 | (937, 34575) | 5.083782e-11 | 3 | True |
| 205389 | (7545, 27574) | 1.114978e-09 | 3 | True |
| 215347 | (10586, 49880) | 1.744001e-11 | 3 | True |
| 224440 | (21358, 30668) | 2.214484e-10 | 3 | True |
| 226303 | (962, 22220) | 8.589182e-10 | 3 | True |
499 rows ร 4 columns