#21 Add H-Significance Test, beta estimate.

cybercongress · Oct 16, 2018 · 3d42ace · 3d42ace
1 parent 0e21662
commit 3d42ace
Show file tree

Hide file tree

Showing 4 changed files with 240 additions and 0 deletions.
diff --git a/research/ethereum/common/calculate_significance.py b/research/ethereum/common/calculate_significance.py
@@ -0,0 +1,89 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy.optimize as op
+import scipy.sparse
+import scipy.sparse.linalg
+from scipy.sparse import csr_matrix
+
+from common.calculate_spring_rank import calculate_Hamiltonion, calculate_spring_rank
+
+
+# noinspection PyTypeChecker
+def estimate_beta(A: csr_matrix, rank: [float]) -> float:
+    """
+    Use Maximum Likelihood Estimate to estimate inversed temperature beta from a network and its inferred rankings/scores
+    Input:
+        A: graph adjacency matrix where matrix[i,j] is the weight of an edge from node i to j
+        rank: SpringRank inferred scores/rankings of nodes
+    Output:
+        beta: MLE estimate of inversed tempurature
+    """
+
+    def f(beta, A, rnk):
+        n = rnk.size
+        y = 0.
+
+        for i in range(n):
+            for j in range(n):
+                d = rnk[i] - rnk[j]
+                p = (1 + np.exp(-2 * beta * d)) ** (-1)
+                y = y + d * (A[i, j] - (A[i, j] + A[j, i]) * p)
+
+        return y
+
+    normed_rank = rank + abs(min(rank))
+    beta_0 = 0.1
+    return op.fsolve(f, beta_0, args=(A, normed_rank))[0]
+
+
+def test_ranks_significance(A: csr_matrix, n_repetitions=100, plot=True):
+    """
+    Given an adjacency matrix, test if the hierarchical structure is statitically significant compared to null model
+    The null model contains randomized directions of edges while preserving total degree between each pair of nodes
+    Input:
+        A: graph adjacency matrix where matrix[i,j] is the weight of an edge from node i to j
+        n_repetitions: number of null models to generate
+        plot: if True shows histogram of null models' energy distribution
+    Output:
+        p-value: probability of observing the Hamiltonion energy lower than that of the real network if null model is true
+        plot: histogram of energy of null models with dashed line as the energy of real network
+    """
+
+    # place holder for outputs
+    H = np.zeros(n_repetitions)
+    H0 = calculate_Hamiltonion(A, calculate_spring_rank(A)[1])
+
+    # generate null models
+    for i in range(n_repetitions):
+        B = randomize_edge_direction(A)
+        H[i] = calculate_Hamiltonion(B, calculate_spring_rank(B)[1])
+
+    p_val = np.sum(H < H0) / n_repetitions
+
+    # Plot
+    if plot:
+        plt.hist(H)
+        plt.axvline(x=H0, color='r', linestyle='dashed')
+        plt.show()
+
+    return (p_val, H)
+
+
+def randomize_edge_direction(A: csr_matrix) -> csr_matrix:
+    """
+    Randomize directions of edges while preserving the total degree.
+    Used when values in A are not integers
+    Input:
+        A: graph adjacency matrix where A[i,j] is the weight of an edge from node i to j
+    Output:
+        An adjacency matrix representing a null model
+    """
+
+    n = A.shape[0]
+    (r, c, v) = scipy.sparse.find(A)
+    for i in range(v.size):
+        if np.random.random_sample() > 0.5:
+            temp = r[i]
+            r[i] = c[i]
+            c[i] = temp
+    return scipy.sparse.csr_matrix((v, (r, c)), shape=(n, n))
diff --git a/research/ethereum/common/calculate_spring_rank.py b/research/ethereum/common/calculate_spring_rank.py
@@ -4,7 +4,9 @@
 import numpy as np
 import scipy.sparse
 import scipy.sparse.linalg
+
 from networkx import DiGraph
+from scipy.sparse import csr_matrix
 
 from common.adjacency_list_to_graph import print_graph_info, Edge
 from common.graph_to_matrix import build_matrix
@@ -102,3 +104,92 @@ def update_rank(graph: DiGraph, rank: Rank, new_edges: List[Edge]) -> (int, Rank
     rank = dict(zip(nodes, raw_rank))
 
     return iterations, rank
+
+
+def calculate_violations(A: csr_matrix, rank: [float]) -> (float, float):
+    """
+    Calculate number of violations in a graph given SpringRank scores
+    A violaton is an edge going from a lower ranked node to a higher ranked one
+    Input:
+        A: graph adjacency matrix where A[i,j] is the weight of an edge from node i to j
+        rank: SpringRank result
+    Output:
+        number of violations
+        proportion of all edges against violations
+    """
+
+    rank_sort = np.argsort(rank)[::-1]  # Get sorted by rank A indices
+    A_sort = A[rank_sort,][:, rank_sort]  # Create new matrix, where elements sorted by rank desc
+
+    # All elements below main triangle is connection from low-ranked node
+    # to high-ranked node. So to get all violation just sum all elements
+    # below main triangle.
+    viol = scipy.sparse.tril(A_sort, -1).sum()
+
+    m = A_sort.sum()  # All matrix weights sum
+
+    return (viol, viol / m)
+
+
+def calculate_min_violations(A: csr_matrix) -> (float, float):
+    """
+    Calculate the minimum number of violations in a graph for all possible rankings
+    A violaton is an edge going from a lower ranked node to a higher ranked one
+    Minimum number is calculated by summing bidirectional interactions.
+    Input:
+        A: graph adjacency matrix where A[i,j] is the weight of an edge from node i to j
+    Output:
+        minimum number of violations
+        proportion of all edges against minimum violations
+    """
+
+    min_viol = 0
+    for i in range(A.shape[0]):
+        for j in range(i + 1, A.shape[0] - 1):
+            if A[i, j] > 0 and A[j, i] > 0:
+                min_viol = min_viol + min(A[i, j], A[j, i])
+
+    m = A.sum()
+    return (min_viol, min_viol / m)
+
+
+def calculate_system_violated_energy(A: csr_matrix, rank: [float]) -> (float, float):
+    """
+    Calculate number of violations in a graph given SpringRank scores
+    A violaton is an edge going from a lower ranked node to a higher ranked one
+        weighted by the difference between these two nodes
+    Input:
+        A: graph adjacency matrix where A[i,j] is the weight of an edge from node i to j
+        rank: SpringRank scores
+    Output:
+        system violated energy
+        proportion of system energy against system violated energy
+    """
+    i, j, v = scipy.sparse.find(A)  # I,J,V contain the row indices, column indices, and values of the nonzero entries.
+    normed_rank = (rank - min(rank)) / (max(rank) - min(rank))  # normalize
+    wv = 0
+    for e in range(len(v)):  # for all nodes interactions
+        if normed_rank[i[e]] < normed_rank[j[e]]:  # compare ranks of two nodes i and j
+            wv = wv + v[e] * (normed_rank[j[e]] - normed_rank[i[e]])
+
+    H = calculate_Hamiltonion(A, rank)
+    return (wv, wv / H)
+
+
+def calculate_Hamiltonion(A: csr_matrix, rank: [float]) -> float:
+    """
+    Calculate the Hamiltonion of the network
+    Input:
+        A: graph adjacency matrix where matrix[i,j] is the weight of an edge from node i to j
+        rank: SpringRank scores
+    Output:
+        H: Hamiltonion energy of the system
+    """
+
+    H = 0.0
+
+    for i in range(A.shape[0]):
+        for j in range(A.shape[1]):
+            H = H + 0.5 * A[i, j] * (rank[i] - rank[j] - 1) ** 2
+
+    return H
diff --git a/research/ethereum/common/generate_graph.py b/research/ethereum/common/generate_graph.py
@@ -0,0 +1,26 @@
+import numpy as np
+from scipy.sparse import csr_matrix
+from scipy.sparse import lil_matrix
+
+
+def generate_graph(N, beta=1, c=1) -> (csr_matrix, [float]):
+    """
+    Generate a graph.
+    Edges are drawn from a Poisson distribution, assuming all spring constants are 1
+    Input:
+        beta:   inversed temperature (noise)
+        c:      sparsity constant
+    Output:
+        A: an adjacency matrix where A[i,j] is the weight of an edge from node i to j
+        raw_rank:
+    """
+
+    rank = np.random.normal(scale=10, size=N)
+    A = lil_matrix((N, N), dtype=float)
+
+    for i in range(N):
+        for j in range(N):
+            mu = np.exp(-0.5 * beta * (rank[i] - rank[j] - 1) ** 2)
+            A[i, j] = np.random.poisson(c * mu)
+
+    return (A.tocsr(), rank)
diff --git a/research/ethereum/common/test_calculate_rank.py b/research/ethereum/common/test_calculate_rank.py
@@ -0,0 +1,34 @@
+import unittest
+
+from common.calculate_significance import estimate_beta, test_ranks_significance
+from common.calculate_spring_rank import calculate_spring_rank, calculate_Hamiltonion
+from common.generate_graph import generate_graph
+
+
+class GenerateGraphTest(unittest.TestCase):
+
+    def test_generate_graph(self):
+        print("Generating network")
+        origin_beta = 2.1
+        A, origin_raw_rank = generate_graph(N=70, beta=origin_beta, c=1)
+
+        print("Calculating rank")
+        iterations, calculated_raw_rank = calculate_spring_rank(A)
+        print(f"Iterations: {iterations}")
+
+        print("Estimate beta")
+        calculated_from_origin_beta = estimate_beta(A, origin_raw_rank)
+        calculated_beta = estimate_beta(A, calculated_raw_rank)
+        print(f"Calculated betas: '{calculated_from_origin_beta}' and '{calculated_beta}' vs {origin_beta}")
+
+        print("Calculating Energy")
+        calculated_from_origin_energy = calculate_Hamiltonion(A, origin_raw_rank)
+        calculated_energy = calculate_Hamiltonion(A, calculated_raw_rank)
+        print(f"Calculated energies: '{calculated_from_origin_energy}' and '{calculated_energy}'")
+
+        self.assertEqual(True, True)
+
+    def test_graph_significance(self):
+        A, origin_raw_rank = generate_graph(N=50, beta=0.05, c=5)
+        test_ranks_significance(A)
+        self.assertEqual(True, True)