Extract Statistically Validated Hypergraphยถ

Definition

Statistical filtering removes hyperedges that are not significant under a null model.

You will learn

Extract statistically validated substructures and compare with the original hypergraph.

Overviewยถ

  • Extract statistically validated substructures and cores.

  • Compare filtered and original hypergraphs.

Setupยถ

[ ]:
import matplotlib as mpl

mpl.rcParams.update({
    "figure.figsize": (6, 4),
    "figure.dpi": 120,
    "savefig.dpi": 150,
})

[1]:
import sys

import numpy as np

sys.path.append("..")
from hypergraphx.core import Hypergraph
from hypergraphx.filters import get_svh, get_svc

np.random.seed(123)
[2]:
def line_to_hyperedge(line):
    hye = line
    hye = line.strip("\n")
    hye = [int(node) for node in line.split(" ")]
    return hye

def line_to_weight(line):
    return int(line.strip("\n"))
[3]:
# Load Justice dataset.
with open("./_example_data/justice_data/hyperedges.txt", "r") as hye_file:
    hyes = list(map(line_to_hyperedge,hye_file.readlines()))
with open("./_example_data/justice_data/weights.txt", "r") as weight_file:
    weights = list(map(line_to_weight,weight_file.readlines()))

hyes = [tuple(hye) for hye in hyes]
justice = Hypergraph(hyes, weighted=True, weights=weights)
[5]:
justice_svh = get_svh(justice,
              mp=True)

Extract Statistically Validated Coresยถ

[7]:
# Load Walmart dataset.
with open("./_example_data/walmart_data/walmart-trips-reduced.txt", "r") as hye_file:
    hyes = list(map(line_to_hyperedge,hye_file.readlines()))

hyes = [tuple(hye) for hye in hyes]
hyes = list(set(hyes))
walmart = Hypergraph(hyes, weighted=False)  # unweighted example
[9]:
walmart_svc = get_svc(walmart,
                      alpha=0.01,
                      max_order=4)

Show Statistically Validated Coresยถ

[10]:
walmart_svc.query('fdr')
[10]:
group pvalue w fdr
4670 (437, 2176, 2625, 5349) 6.240333e-26 4 True
6244 (329, 525, 1830, 5815) 1.121532e-18 2 True
7294 (39391, 39392, 39395, 39396) 8.279634e-26 2 True
8184 (169, 925, 1764, 3555) 2.884992e-19 2 True
8187 (169, 925, 3555, 12076) 1.755229e-19 2 True
... ... ... ... ...
174744 (937, 34575) 5.083782e-11 3 True
205389 (7545, 27574) 1.114978e-09 3 True
215347 (10586, 49880) 1.744001e-11 3 True
224440 (21358, 30668) 2.214484e-10 3 True
226303 (962, 22220) 8.589182e-10 3 True

499 rows ร— 4 columns