Skip to content

Commit

Permalink
#21 Add H-Significance Test, beta estimate.
Browse files Browse the repository at this point in the history
  • Loading branch information
hleb-albau committed Oct 16, 2018
1 parent 0e21662 commit 3d42ace
Show file tree
Hide file tree
Showing 4 changed files with 240 additions and 0 deletions.
89 changes: 89 additions & 0 deletions research/ethereum/common/calculate_significance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import matplotlib.pyplot as plt
import numpy as np
import scipy.optimize as op
import scipy.sparse
import scipy.sparse.linalg
from scipy.sparse import csr_matrix

from common.calculate_spring_rank import calculate_Hamiltonion, calculate_spring_rank


# noinspection PyTypeChecker
def estimate_beta(A: csr_matrix, rank: [float]) -> float:
"""
Use Maximum Likelihood Estimate to estimate inversed temperature beta from a network and its inferred rankings/scores
Input:
A: graph adjacency matrix where matrix[i,j] is the weight of an edge from node i to j
rank: SpringRank inferred scores/rankings of nodes
Output:
beta: MLE estimate of inversed tempurature
"""

def f(beta, A, rnk):
n = rnk.size
y = 0.

for i in range(n):
for j in range(n):
d = rnk[i] - rnk[j]
p = (1 + np.exp(-2 * beta * d)) ** (-1)
y = y + d * (A[i, j] - (A[i, j] + A[j, i]) * p)

return y

normed_rank = rank + abs(min(rank))
beta_0 = 0.1
return op.fsolve(f, beta_0, args=(A, normed_rank))[0]


def test_ranks_significance(A: csr_matrix, n_repetitions=100, plot=True):
"""
Given an adjacency matrix, test if the hierarchical structure is statitically significant compared to null model
The null model contains randomized directions of edges while preserving total degree between each pair of nodes
Input:
A: graph adjacency matrix where matrix[i,j] is the weight of an edge from node i to j
n_repetitions: number of null models to generate
plot: if True shows histogram of null models' energy distribution
Output:
p-value: probability of observing the Hamiltonion energy lower than that of the real network if null model is true
plot: histogram of energy of null models with dashed line as the energy of real network
"""

# place holder for outputs
H = np.zeros(n_repetitions)
H0 = calculate_Hamiltonion(A, calculate_spring_rank(A)[1])

# generate null models
for i in range(n_repetitions):
B = randomize_edge_direction(A)
H[i] = calculate_Hamiltonion(B, calculate_spring_rank(B)[1])

p_val = np.sum(H < H0) / n_repetitions

# Plot
if plot:
plt.hist(H)
plt.axvline(x=H0, color='r', linestyle='dashed')
plt.show()

return (p_val, H)


def randomize_edge_direction(A: csr_matrix) -> csr_matrix:
"""
Randomize directions of edges while preserving the total degree.
Used when values in A are not integers
Input:
A: graph adjacency matrix where A[i,j] is the weight of an edge from node i to j
Output:
An adjacency matrix representing a null model
"""

n = A.shape[0]
(r, c, v) = scipy.sparse.find(A)
for i in range(v.size):
if np.random.random_sample() > 0.5:
temp = r[i]
r[i] = c[i]
c[i] = temp
return scipy.sparse.csr_matrix((v, (r, c)), shape=(n, n))
91 changes: 91 additions & 0 deletions research/ethereum/common/calculate_spring_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
import numpy as np
import scipy.sparse
import scipy.sparse.linalg

from networkx import DiGraph
from scipy.sparse import csr_matrix

from common.adjacency_list_to_graph import print_graph_info, Edge
from common.graph_to_matrix import build_matrix
Expand Down Expand Up @@ -102,3 +104,92 @@ def update_rank(graph: DiGraph, rank: Rank, new_edges: List[Edge]) -> (int, Rank
rank = dict(zip(nodes, raw_rank))

return iterations, rank


def calculate_violations(A: csr_matrix, rank: [float]) -> (float, float):
"""
Calculate number of violations in a graph given SpringRank scores
A violaton is an edge going from a lower ranked node to a higher ranked one
Input:
A: graph adjacency matrix where A[i,j] is the weight of an edge from node i to j
rank: SpringRank result
Output:
number of violations
proportion of all edges against violations
"""

rank_sort = np.argsort(rank)[::-1] # Get sorted by rank A indices
A_sort = A[rank_sort,][:, rank_sort] # Create new matrix, where elements sorted by rank desc

# All elements below main triangle is connection from low-ranked node
# to high-ranked node. So to get all violation just sum all elements
# below main triangle.
viol = scipy.sparse.tril(A_sort, -1).sum()

m = A_sort.sum() # All matrix weights sum

return (viol, viol / m)


def calculate_min_violations(A: csr_matrix) -> (float, float):
"""
Calculate the minimum number of violations in a graph for all possible rankings
A violaton is an edge going from a lower ranked node to a higher ranked one
Minimum number is calculated by summing bidirectional interactions.
Input:
A: graph adjacency matrix where A[i,j] is the weight of an edge from node i to j
Output:
minimum number of violations
proportion of all edges against minimum violations
"""

min_viol = 0
for i in range(A.shape[0]):
for j in range(i + 1, A.shape[0] - 1):
if A[i, j] > 0 and A[j, i] > 0:
min_viol = min_viol + min(A[i, j], A[j, i])

m = A.sum()
return (min_viol, min_viol / m)


def calculate_system_violated_energy(A: csr_matrix, rank: [float]) -> (float, float):
"""
Calculate number of violations in a graph given SpringRank scores
A violaton is an edge going from a lower ranked node to a higher ranked one
weighted by the difference between these two nodes
Input:
A: graph adjacency matrix where A[i,j] is the weight of an edge from node i to j
rank: SpringRank scores
Output:
system violated energy
proportion of system energy against system violated energy
"""
i, j, v = scipy.sparse.find(A) # I,J,V contain the row indices, column indices, and values of the nonzero entries.
normed_rank = (rank - min(rank)) / (max(rank) - min(rank)) # normalize
wv = 0
for e in range(len(v)): # for all nodes interactions
if normed_rank[i[e]] < normed_rank[j[e]]: # compare ranks of two nodes i and j
wv = wv + v[e] * (normed_rank[j[e]] - normed_rank[i[e]])

H = calculate_Hamiltonion(A, rank)
return (wv, wv / H)


def calculate_Hamiltonion(A: csr_matrix, rank: [float]) -> float:
"""
Calculate the Hamiltonion of the network
Input:
A: graph adjacency matrix where matrix[i,j] is the weight of an edge from node i to j
rank: SpringRank scores
Output:
H: Hamiltonion energy of the system
"""

H = 0.0

for i in range(A.shape[0]):
for j in range(A.shape[1]):
H = H + 0.5 * A[i, j] * (rank[i] - rank[j] - 1) ** 2

return H
26 changes: 26 additions & 0 deletions research/ethereum/common/generate_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse import lil_matrix


def generate_graph(N, beta=1, c=1) -> (csr_matrix, [float]):
"""
Generate a graph.
Edges are drawn from a Poisson distribution, assuming all spring constants are 1
Input:
beta: inversed temperature (noise)
c: sparsity constant
Output:
A: an adjacency matrix where A[i,j] is the weight of an edge from node i to j
raw_rank:
"""

rank = np.random.normal(scale=10, size=N)
A = lil_matrix((N, N), dtype=float)

for i in range(N):
for j in range(N):
mu = np.exp(-0.5 * beta * (rank[i] - rank[j] - 1) ** 2)
A[i, j] = np.random.poisson(c * mu)

return (A.tocsr(), rank)
34 changes: 34 additions & 0 deletions research/ethereum/common/test_calculate_rank.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import unittest

from common.calculate_significance import estimate_beta, test_ranks_significance
from common.calculate_spring_rank import calculate_spring_rank, calculate_Hamiltonion
from common.generate_graph import generate_graph


class GenerateGraphTest(unittest.TestCase):

def test_generate_graph(self):
print("Generating network")
origin_beta = 2.1
A, origin_raw_rank = generate_graph(N=70, beta=origin_beta, c=1)

print("Calculating rank")
iterations, calculated_raw_rank = calculate_spring_rank(A)
print(f"Iterations: {iterations}")

print("Estimate beta")
calculated_from_origin_beta = estimate_beta(A, origin_raw_rank)
calculated_beta = estimate_beta(A, calculated_raw_rank)
print(f"Calculated betas: '{calculated_from_origin_beta}' and '{calculated_beta}' vs {origin_beta}")

print("Calculating Energy")
calculated_from_origin_energy = calculate_Hamiltonion(A, origin_raw_rank)
calculated_energy = calculate_Hamiltonion(A, calculated_raw_rank)
print(f"Calculated energies: '{calculated_from_origin_energy}' and '{calculated_energy}'")

self.assertEqual(True, True)

def test_graph_significance(self):
A, origin_raw_rank = generate_graph(N=50, beta=0.05, c=5)
test_ranks_significance(A)
self.assertEqual(True, True)

0 comments on commit 3d42ace

Please sign in to comment.