diff --git a/research/ethereum/common/calculate_significance.py b/research/ethereum/common/calculate_significance.py new file mode 100644 index 00000000..b5d761ca --- /dev/null +++ b/research/ethereum/common/calculate_significance.py @@ -0,0 +1,89 @@ +import matplotlib.pyplot as plt +import numpy as np +import scipy.optimize as op +import scipy.sparse +import scipy.sparse.linalg +from scipy.sparse import csr_matrix + +from common.calculate_spring_rank import calculate_Hamiltonion, calculate_spring_rank + + +# noinspection PyTypeChecker +def estimate_beta(A: csr_matrix, rank: [float]) -> float: + """ + Use Maximum Likelihood Estimate to estimate inversed temperature beta from a network and its inferred rankings/scores + Input: + A: graph adjacency matrix where matrix[i,j] is the weight of an edge from node i to j + rank: SpringRank inferred scores/rankings of nodes + Output: + beta: MLE estimate of inversed tempurature + """ + + def f(beta, A, rnk): + n = rnk.size + y = 0. + + for i in range(n): + for j in range(n): + d = rnk[i] - rnk[j] + p = (1 + np.exp(-2 * beta * d)) ** (-1) + y = y + d * (A[i, j] - (A[i, j] + A[j, i]) * p) + + return y + + normed_rank = rank + abs(min(rank)) + beta_0 = 0.1 + return op.fsolve(f, beta_0, args=(A, normed_rank))[0] + + +def test_ranks_significance(A: csr_matrix, n_repetitions=100, plot=True): + """ + Given an adjacency matrix, test if the hierarchical structure is statitically significant compared to null model + The null model contains randomized directions of edges while preserving total degree between each pair of nodes + Input: + A: graph adjacency matrix where matrix[i,j] is the weight of an edge from node i to j + n_repetitions: number of null models to generate + plot: if True shows histogram of null models' energy distribution + Output: + p-value: probability of observing the Hamiltonion energy lower than that of the real network if null model is true + plot: histogram of energy of null models with dashed line as the energy of real network + """ + + # place holder for outputs + H = np.zeros(n_repetitions) + H0 = calculate_Hamiltonion(A, calculate_spring_rank(A)[1]) + + # generate null models + for i in range(n_repetitions): + B = randomize_edge_direction(A) + H[i] = calculate_Hamiltonion(B, calculate_spring_rank(B)[1]) + + p_val = np.sum(H < H0) / n_repetitions + + # Plot + if plot: + plt.hist(H) + plt.axvline(x=H0, color='r', linestyle='dashed') + plt.show() + + return (p_val, H) + + +def randomize_edge_direction(A: csr_matrix) -> csr_matrix: + """ + Randomize directions of edges while preserving the total degree. + Used when values in A are not integers + Input: + A: graph adjacency matrix where A[i,j] is the weight of an edge from node i to j + Output: + An adjacency matrix representing a null model + """ + + n = A.shape[0] + (r, c, v) = scipy.sparse.find(A) + for i in range(v.size): + if np.random.random_sample() > 0.5: + temp = r[i] + r[i] = c[i] + c[i] = temp + return scipy.sparse.csr_matrix((v, (r, c)), shape=(n, n)) diff --git a/research/ethereum/common/calculate_spring_rank.py b/research/ethereum/common/calculate_spring_rank.py index 80e2971a..16210358 100644 --- a/research/ethereum/common/calculate_spring_rank.py +++ b/research/ethereum/common/calculate_spring_rank.py @@ -4,7 +4,9 @@ import numpy as np import scipy.sparse import scipy.sparse.linalg + from networkx import DiGraph +from scipy.sparse import csr_matrix from common.adjacency_list_to_graph import print_graph_info, Edge from common.graph_to_matrix import build_matrix @@ -102,3 +104,92 @@ def update_rank(graph: DiGraph, rank: Rank, new_edges: List[Edge]) -> (int, Rank rank = dict(zip(nodes, raw_rank)) return iterations, rank + + +def calculate_violations(A: csr_matrix, rank: [float]) -> (float, float): + """ + Calculate number of violations in a graph given SpringRank scores + A violaton is an edge going from a lower ranked node to a higher ranked one + Input: + A: graph adjacency matrix where A[i,j] is the weight of an edge from node i to j + rank: SpringRank result + Output: + number of violations + proportion of all edges against violations + """ + + rank_sort = np.argsort(rank)[::-1] # Get sorted by rank A indices + A_sort = A[rank_sort,][:, rank_sort] # Create new matrix, where elements sorted by rank desc + + # All elements below main triangle is connection from low-ranked node + # to high-ranked node. So to get all violation just sum all elements + # below main triangle. + viol = scipy.sparse.tril(A_sort, -1).sum() + + m = A_sort.sum() # All matrix weights sum + + return (viol, viol / m) + + +def calculate_min_violations(A: csr_matrix) -> (float, float): + """ + Calculate the minimum number of violations in a graph for all possible rankings + A violaton is an edge going from a lower ranked node to a higher ranked one + Minimum number is calculated by summing bidirectional interactions. + Input: + A: graph adjacency matrix where A[i,j] is the weight of an edge from node i to j + Output: + minimum number of violations + proportion of all edges against minimum violations + """ + + min_viol = 0 + for i in range(A.shape[0]): + for j in range(i + 1, A.shape[0] - 1): + if A[i, j] > 0 and A[j, i] > 0: + min_viol = min_viol + min(A[i, j], A[j, i]) + + m = A.sum() + return (min_viol, min_viol / m) + + +def calculate_system_violated_energy(A: csr_matrix, rank: [float]) -> (float, float): + """ + Calculate number of violations in a graph given SpringRank scores + A violaton is an edge going from a lower ranked node to a higher ranked one + weighted by the difference between these two nodes + Input: + A: graph adjacency matrix where A[i,j] is the weight of an edge from node i to j + rank: SpringRank scores + Output: + system violated energy + proportion of system energy against system violated energy + """ + i, j, v = scipy.sparse.find(A) # I,J,V contain the row indices, column indices, and values of the nonzero entries. + normed_rank = (rank - min(rank)) / (max(rank) - min(rank)) # normalize + wv = 0 + for e in range(len(v)): # for all nodes interactions + if normed_rank[i[e]] < normed_rank[j[e]]: # compare ranks of two nodes i and j + wv = wv + v[e] * (normed_rank[j[e]] - normed_rank[i[e]]) + + H = calculate_Hamiltonion(A, rank) + return (wv, wv / H) + + +def calculate_Hamiltonion(A: csr_matrix, rank: [float]) -> float: + """ + Calculate the Hamiltonion of the network + Input: + A: graph adjacency matrix where matrix[i,j] is the weight of an edge from node i to j + rank: SpringRank scores + Output: + H: Hamiltonion energy of the system + """ + + H = 0.0 + + for i in range(A.shape[0]): + for j in range(A.shape[1]): + H = H + 0.5 * A[i, j] * (rank[i] - rank[j] - 1) ** 2 + + return H diff --git a/research/ethereum/common/generate_graph.py b/research/ethereum/common/generate_graph.py new file mode 100644 index 00000000..f53e4af1 --- /dev/null +++ b/research/ethereum/common/generate_graph.py @@ -0,0 +1,26 @@ +import numpy as np +from scipy.sparse import csr_matrix +from scipy.sparse import lil_matrix + + +def generate_graph(N, beta=1, c=1) -> (csr_matrix, [float]): + """ + Generate a graph. + Edges are drawn from a Poisson distribution, assuming all spring constants are 1 + Input: + beta: inversed temperature (noise) + c: sparsity constant + Output: + A: an adjacency matrix where A[i,j] is the weight of an edge from node i to j + raw_rank: + """ + + rank = np.random.normal(scale=10, size=N) + A = lil_matrix((N, N), dtype=float) + + for i in range(N): + for j in range(N): + mu = np.exp(-0.5 * beta * (rank[i] - rank[j] - 1) ** 2) + A[i, j] = np.random.poisson(c * mu) + + return (A.tocsr(), rank) diff --git a/research/ethereum/common/test_calculate_rank.py b/research/ethereum/common/test_calculate_rank.py new file mode 100644 index 00000000..36c932d8 --- /dev/null +++ b/research/ethereum/common/test_calculate_rank.py @@ -0,0 +1,34 @@ +import unittest + +from common.calculate_significance import estimate_beta, test_ranks_significance +from common.calculate_spring_rank import calculate_spring_rank, calculate_Hamiltonion +from common.generate_graph import generate_graph + + +class GenerateGraphTest(unittest.TestCase): + + def test_generate_graph(self): + print("Generating network") + origin_beta = 2.1 + A, origin_raw_rank = generate_graph(N=70, beta=origin_beta, c=1) + + print("Calculating rank") + iterations, calculated_raw_rank = calculate_spring_rank(A) + print(f"Iterations: {iterations}") + + print("Estimate beta") + calculated_from_origin_beta = estimate_beta(A, origin_raw_rank) + calculated_beta = estimate_beta(A, calculated_raw_rank) + print(f"Calculated betas: '{calculated_from_origin_beta}' and '{calculated_beta}' vs {origin_beta}") + + print("Calculating Energy") + calculated_from_origin_energy = calculate_Hamiltonion(A, origin_raw_rank) + calculated_energy = calculate_Hamiltonion(A, calculated_raw_rank) + print(f"Calculated energies: '{calculated_from_origin_energy}' and '{calculated_energy}'") + + self.assertEqual(True, True) + + def test_graph_significance(self): + A, origin_raw_rank = generate_graph(N=50, beta=0.05, c=5) + test_ranks_significance(A) + self.assertEqual(True, True)