From a9844b1e1c9044495851d4e64012471a44179244 Mon Sep 17 00:00:00 2001 From: sama Date: Thu, 10 Jul 2025 01:34:57 -0600 Subject: [PATCH 1/3] added python implementation for katz centrality --- easygraph/functions/centrality/__init__.py | 1 + .../functions/centrality/katz_centrality.py | 58 +++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 easygraph/functions/centrality/katz_centrality.py diff --git a/easygraph/functions/centrality/__init__.py b/easygraph/functions/centrality/__init__.py index 1b54aaec..f37e55fe 100644 --- a/easygraph/functions/centrality/__init__.py +++ b/easygraph/functions/centrality/__init__.py @@ -5,3 +5,4 @@ from .flowbetweenness import * from .laplacian import * from .pagerank import * +from .katz_centrality import * \ No newline at end of file diff --git a/easygraph/functions/centrality/katz_centrality.py b/easygraph/functions/centrality/katz_centrality.py new file mode 100644 index 00000000..6a24dc41 --- /dev/null +++ b/easygraph/functions/centrality/katz_centrality.py @@ -0,0 +1,58 @@ +from easygraph.utils.decorators import not_implemented_for +from easygraph.utils import * + +__all__ = ["katz_centrality"] + +@not_implemented_for("multigraph") +def katz_centrality(G, alpha=0.1, beta=1.0, max_iter=1000, tol=1e-6, normalized=True): + """Compute the Katz centrality of a graph. + + Parameters + ---------- + G : graph + A EasyGraph graph. + alpha : float + Attenuation factor (should be < 1 / largest eigenvalue). + beta : float or dict + Initial centrality (can be scalar or dict of node->value). + max_iter : int + Maximum number of iterations. + tol : float + Error tolerance used to check convergence. + normalized : bool + Whether to normalize the resulting centralities. + + Returns + ------- + centrality : dict + Dictionary of nodes with Katz centrality values. + """ + from collections import defaultdict + + nodes = list(G.nodes) + A = G.adj + centrality = {v: 1.0 for v in nodes} + beta_vec = {v: beta if isinstance(beta, (int, float)) else beta.get(v, 1.0) for v in nodes} + + for i in range(max_iter): + new_centrality = defaultdict(float) + for v in nodes: + for u in A[v]: + new_centrality[v] += centrality[u] + for v in nodes: + new_centrality[v] = alpha * new_centrality[v] + beta_vec[v] + + # Check convergence + err = sum(abs(new_centrality[v] - centrality[v]) for v in nodes) + centrality = new_centrality + if err < tol: + break + else: + raise RuntimeError(f"Katz centrality failed to converge in {max_iter} iterations") + + if normalized: + norm = sum(v**2 for v in centrality.values()) ** 0.5 + for v in centrality: + centrality[v] /= norm + + return centrality From 6a6ca1a41290051701c95d5a975b0af639a5a232 Mon Sep 17 00:00:00 2001 From: sama Date: Thu, 10 Jul 2025 02:40:15 -0600 Subject: [PATCH 2/3] added comments --- .../functions/centrality/katz_centrality.py | 119 ++++++++++++------ 1 file changed, 83 insertions(+), 36 deletions(-) diff --git a/easygraph/functions/centrality/katz_centrality.py b/easygraph/functions/centrality/katz_centrality.py index 6a24dc41..8df8cd1b 100644 --- a/easygraph/functions/centrality/katz_centrality.py +++ b/easygraph/functions/centrality/katz_centrality.py @@ -1,58 +1,105 @@ from easygraph.utils.decorators import not_implemented_for from easygraph.utils import * +import numpy as np __all__ = ["katz_centrality"] @not_implemented_for("multigraph") def katz_centrality(G, alpha=0.1, beta=1.0, max_iter=1000, tol=1e-6, normalized=True): - """Compute the Katz centrality of a graph. + r""" + Compute the Katz centrality for nodes in a graph. + + Katz centrality computes the influence of a node based on the total number + of walks between nodes, attenuated by a factor of their length. It is + defined as the solution to the linear system: + + .. math:: + + x = \alpha A x + \beta + + where: + - \( A \) is the adjacency matrix of the graph, + - \( \alpha \) is a scalar attenuation factor, + - \( \beta \) is the bias vector (typically all ones), + - and \( x \) is the resulting centrality vector. + + The algorithm runs an iterative fixed-point method until convergence. Parameters ---------- - G : graph - A EasyGraph graph. - alpha : float - Attenuation factor (should be < 1 / largest eigenvalue). - beta : float or dict - Initial centrality (can be scalar or dict of node->value). - max_iter : int - Maximum number of iterations. - tol : float - Error tolerance used to check convergence. - normalized : bool - Whether to normalize the resulting centralities. + G : easygraph.Graph + An EasyGraph graph instance. Must be simple (non-multigraph). + + alpha : float, optional (default=0.1) + Attenuation factor, must be smaller than the reciprocal of the largest + eigenvalue of the adjacency matrix to ensure convergence. + + beta : float or dict, optional (default=1.0) + Bias term. Can be a constant scalar applied to all nodes, or a dictionary + mapping node IDs to values. + + max_iter : int, optional (default=1000) + Maximum number of iterations before the algorithm terminates. + + tol : float, optional (default=1e-6) + Convergence tolerance. Iteration stops when the L1 norm of the difference + between successive iterations is below this threshold. + + normalized : bool, optional (default=True) + If True, the result vector will be normalized to unit norm (L2). Returns ------- - centrality : dict - Dictionary of nodes with Katz centrality values. + dict + A dictionary mapping node IDs to Katz centrality scores. + + Raises + ------ + RuntimeError + If the algorithm fails to converge within `max_iter` iterations. + + Examples + -------- + >>> import easygraph as eg + >>> from easygraph import katz_centrality + >>> G = eg.Graph() + >>> G.add_edges_from([(0, 1), (1, 2), (2, 3)]) + >>> katz_centrality(G, alpha=0.05) + {0: 0.370..., 1: 0.447..., 2: 0.447..., 3: 0.370...} """ - from collections import defaultdict + # Create node ordering nodes = list(G.nodes) - A = G.adj - centrality = {v: 1.0 for v in nodes} - beta_vec = {v: beta if isinstance(beta, (int, float)) else beta.get(v, 1.0) for v in nodes} - - for i in range(max_iter): - new_centrality = defaultdict(float) - for v in nodes: - for u in A[v]: - new_centrality[v] += centrality[u] - for v in nodes: - new_centrality[v] = alpha * new_centrality[v] + beta_vec[v] - - # Check convergence - err = sum(abs(new_centrality[v] - centrality[v]) for v in nodes) - centrality = new_centrality - if err < tol: + n = len(nodes) + node_to_index = {node: i for i, node in enumerate(nodes)} + index_to_node = {i: node for i, node in enumerate(nodes)} + + # Build adjacency matrix + A = np.zeros((n, n), dtype=np.float64) + for u in G.nodes: + for v in G.adj[u]: + A[node_to_index[u], node_to_index[v]] = 1.0 + + # Initialize x and beta + x = np.ones(n, dtype=np.float64) + if isinstance(beta, dict): + b = np.array([beta.get(index_to_node[i], 1.0) for i in range(n)]) + else: + b = np.ones(n, dtype=np.float64) * beta + + # Iterative update using vectorized ops + for _ in range(max_iter): + x_new = alpha * A @ x + b + if np.linalg.norm(x_new - x, ord=1) < tol: break + x = x_new else: raise RuntimeError(f"Katz centrality failed to converge in {max_iter} iterations") if normalized: - norm = sum(v**2 for v in centrality.values()) ** 0.5 - for v in centrality: - centrality[v] /= norm + norm = np.linalg.norm(x) + if norm > 0: + x /= norm - return centrality + result = {index_to_node[i]: float(x[i]) for i in range(n)} + return result From 87e8534e628cc05a57a7b59e97980a9f39fb1f3a Mon Sep 17 00:00:00 2001 From: sama Date: Fri, 11 Jul 2025 02:53:04 -0600 Subject: [PATCH 3/3] finished katz centrality --- cpp_easygraph/cpp_easygraph.cpp | 1 + .../functions/centrality/centrality.h | 10 +- .../functions/centrality/katz_centrality.cpp | 120 ++++++++++++++++++ .../functions/centrality/katz_centrality.py | 4 +- 4 files changed, 132 insertions(+), 3 deletions(-) create mode 100644 cpp_easygraph/functions/centrality/katz_centrality.cpp diff --git a/cpp_easygraph/cpp_easygraph.cpp b/cpp_easygraph/cpp_easygraph.cpp index f763d360..f5570d6e 100644 --- a/cpp_easygraph/cpp_easygraph.cpp +++ b/cpp_easygraph/cpp_easygraph.cpp @@ -77,6 +77,7 @@ PYBIND11_MODULE(cpp_easygraph, m) { m.def("cpp_closeness_centrality", &closeness_centrality, py::arg("G"), py::arg("weight") = "weight", py::arg("cutoff") = py::none(), py::arg("sources") = py::none()); m.def("cpp_betweenness_centrality", &betweenness_centrality, py::arg("G"), py::arg("weight") = "weight", py::arg("cutoff") = py::none(),py::arg("sources") = py::none(), py::arg("normalized") = py::bool_(true), py::arg("endpoints") = py::bool_(false)); + m.def("cpp_katz_centrality", &cpp_katz_centrality, py::arg("G"), py::arg("alpha") = 0.1, py::arg("beta") = 1.0, py::arg("max_iter") = 1000, py::arg("tol") = 1e-6, py::arg("normalized") = true); m.def("cpp_k_core", &core_decomposition, py::arg("G")); m.def("cpp_density", &density, py::arg("G")); m.def("cpp_constraint", &constraint, py::arg("G"), py::arg("nodes") = py::none(), py::arg("weight") = py::none(), py::arg("n_workers") = py::none()); diff --git a/cpp_easygraph/functions/centrality/centrality.h b/cpp_easygraph/functions/centrality/centrality.h index 35f5b348..7040618a 100644 --- a/cpp_easygraph/functions/centrality/centrality.h +++ b/cpp_easygraph/functions/centrality/centrality.h @@ -4,4 +4,12 @@ py::object closeness_centrality(py::object G, py::object weight, py::object cutoff, py::object sources); py::object betweenness_centrality(py::object G, py::object weight, py::object cutoff, py::object sources, - py::object normalized, py::object endpoints); \ No newline at end of file + py::object normalized, py::object endpoints); +py::object cpp_katz_centrality( + py::object G, + py::object py_alpha, + py::object py_beta, + py::object py_max_iter, + py::object py_tol, + py::object py_normalized +); \ No newline at end of file diff --git a/cpp_easygraph/functions/centrality/katz_centrality.cpp b/cpp_easygraph/functions/centrality/katz_centrality.cpp new file mode 100644 index 00000000..63e78485 --- /dev/null +++ b/cpp_easygraph/functions/centrality/katz_centrality.cpp @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include "centrality.h" +#include "../../classes/graph.h" + +namespace py = pybind11; + +py::object cpp_katz_centrality( + py::object G, + py::object py_alpha, + py::object py_beta, + py::object py_max_iter, + py::object py_tol, + py::object py_normalized +) { + try { + Graph& graph = G.cast(); + auto csr = graph.gen_CSR(); + int n = csr->nodes.size(); + + if (n == 0) { + return py::dict(); + } + + // Initialize vectors + std::vector x0(n, 1.0); + std::vector x1(n); + std::vector* x_prev = &x0; + std::vector* x_next = &x1; + + // Process beta parameter + std::vector b(n); + if (py::isinstance(py_beta) || py::isinstance(py_beta)) { + double beta_val = py_beta.cast(); + for (int i = 0; i < n; i++) { + b[i] = beta_val; + } + } else if (py::isinstance(py_beta)) { + py::dict beta_dict = py_beta.cast(); + for (int i = 0; i < n; i++) { + node_t internal_id = csr->nodes[i]; + py::object node_obj = graph.id_to_node[py::cast(internal_id)]; + if (beta_dict.contains(node_obj)) { + b[i] = beta_dict[node_obj].cast(); + } else { + b[i] = 1.0; + } + } + } else { + throw py::type_error("beta must be a float or a dict"); + } + + // Extract parameters + double alpha = py_alpha.cast(); + int max_iter = py_max_iter.cast(); + double tol = py_tol.cast(); + bool normalized = py_normalized.cast(); + + // Iterative updates + int iter = 0; + for (; iter < max_iter; iter++) { + for (int i = 0; i < n; i++) { + double sum = 0.0; + int start = csr->V[i]; + int end = csr->V[i + 1]; + for (int jj = start; jj < end; jj++) { + int j = csr->E[jj]; + sum += (*x_prev)[j]; + } + (*x_next)[i] = alpha * sum + b[i]; + } + + // Check convergence + double change = 0.0; + for (int i = 0; i < n; i++) { + change += std::abs((*x_next)[i] - (*x_prev)[i]); + } + + if (change < tol) { + break; + } + + std::swap(x_prev, x_next); + } + + // Handle convergence failure + if (iter == max_iter) { + throw std::runtime_error("Katz centrality failed to converge in " + std::to_string(max_iter) + " iterations"); + } + + // Normalization + std::vector& x_final = *x_next; + if (normalized) { + double norm = 0.0; + for (double val : x_final) { + norm += val * val; + } + norm = std::sqrt(norm); + if (norm > 0) { + for (int i = 0; i < n; i++) { + x_final[i] /= norm; + } + } + } + + // Prepare results + py::dict result; + for (int i = 0; i < n; i++) { + node_t internal_id = csr->nodes[i]; + py::object node_obj = graph.id_to_node[py::cast(internal_id)]; + result[node_obj] = x_final[i]; + } + + return result; + } catch (const std::exception& e) { + throw std::runtime_error(e.what()); + } +} \ No newline at end of file diff --git a/easygraph/functions/centrality/katz_centrality.py b/easygraph/functions/centrality/katz_centrality.py index 8df8cd1b..9cc5296d 100644 --- a/easygraph/functions/centrality/katz_centrality.py +++ b/easygraph/functions/centrality/katz_centrality.py @@ -1,10 +1,11 @@ -from easygraph.utils.decorators import not_implemented_for from easygraph.utils import * import numpy as np +from easygraph.utils.decorators import * __all__ = ["katz_centrality"] @not_implemented_for("multigraph") +@hybrid("cpp_katz_centrality") def katz_centrality(G, alpha=0.1, beta=1.0, max_iter=1000, tol=1e-6, normalized=True): r""" Compute the Katz centrality for nodes in a graph. @@ -67,7 +68,6 @@ def katz_centrality(G, alpha=0.1, beta=1.0, max_iter=1000, tol=1e-6, normalized= >>> katz_centrality(G, alpha=0.05) {0: 0.370..., 1: 0.447..., 2: 0.447..., 3: 0.370...} """ - # Create node ordering nodes = list(G.nodes) n = len(nodes)