-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
executable file
·139 lines (113 loc) · 5.06 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import numpy as np
import networkx as nx
import scipy.sparse as sp
import torch
import scipy.io as sio
import random
import dgl
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score, precision_score, recall_score
def sparse_to_tuple(sparse_mx, insert_batch=False):
"""Convert sparse matrix to tuple representation."""
"""Set insert_batch=True if you want to insert a batch dimension."""
def to_tuple(mx):
if not sp.isspmatrix_coo(mx):
mx = mx.tocoo()
if insert_batch:
coords = np.vstack((np.zeros(mx.row.shape[0]), mx.row, mx.col)).transpose()
values = mx.data
shape = (1,) + mx.shape
else:
coords = np.vstack((mx.row, mx.col)).transpose()
values = mx.data
shape = mx.shape
return coords, values, shape
if isinstance(sparse_mx, list):
for i in range(len(sparse_mx)):
sparse_mx[i] = to_tuple(sparse_mx[i])
else:
sparse_mx = to_tuple(sparse_mx)
return sparse_mx
def preprocess_features(features):
"""Row-normalize feature matrix and convert to tuple representation"""
rowsum = np.array(features.sum(1))
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
features = r_mat_inv.dot(features)
return features.todense(), sparse_to_tuple(features)
def normalize_adj(adj):
"""Symmetrically normalize adjacency matrix."""
adj = sp.coo_matrix(adj)
rowsum = np.array(adj.sum(1))
d_inv_sqrt = np.power(rowsum, -0.5).flatten()
d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
def dense_to_one_hot(labels_dense, num_classes):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = np.arange(num_labels) * num_classes
labels_one_hot = np.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset+labels_dense.ravel()] = 1
return labels_one_hot
def load_mat(dataset, train_rate=0.3, val_rate=0.1):
"""Load .mat dataset."""
data = sio.loadmat("./dataset/{}.mat".format(dataset))
label = data['Label'] if ('Label' in data) else data['gnd']
attr = data['Attributes'] if ('Attributes' in data) else data['X']
network = data['Network'] if ('Network' in data) else data['A']
adj = sp.csr_matrix(network)
feat = sp.lil_matrix(attr)
# labels = np.squeeze(np.array(data['Class'],dtype=np.int64) - 1)
labels = np.squeeze(np.array(data['Class'],dtype=np.int64))
if dataset == 'BlogCatalog' or dataset == 'Flickr' :
labels = labels - 1
ano_labels = np.squeeze(np.array(label))
num_node = adj.shape[0]
num_train = int(num_node * train_rate)
num_val = int(num_node * val_rate)
all_idx = list(range(num_node))
random.shuffle(all_idx)
idx_train = all_idx[ : num_train]
idx_val = all_idx[num_train : num_train + num_val]
idx_test = all_idx[num_train + num_val : ]
return adj, feat, labels, idx_train, idx_val, idx_test, ano_labels
def load_mat_f(dataset):
"""Load .mat dataset."""
data = sio.loadmat("./dataset/{}.mat".format(dataset))
label = data['Label'] if ('Label' in data) else data['gnd']
attr = data['Attributes'] if ('Attributes' in data) else data['X']
network = data['Network'] if ('Network' in data) else data['A']
adj = sp.csr_matrix(network)
feat = sp.lil_matrix(attr)
labels = np.squeeze(np.array(data['Class'],dtype=np.int64))
if dataset == 'BlogCatalog' or dataset == 'Flickr' :
labels = labels - 1
ano_labels = np.squeeze(np.array(label))
idx_train = np.loadtxt("splited_data/"+dataset+"/traincand", dtype=int)
idx_val = np.loadtxt("splited_data/"+dataset+"/val", dtype=int)
idx_test = np.loadtxt("splited_data/"+dataset+"/test", dtype=int)
return adj, feat, labels, idx_train, idx_val, idx_test, ano_labels
def adj_to_dgl_graph(adj):
"""Convert adjacency matrix to dgl format."""
nx_graph = nx.from_scipy_sparse_matrix(adj)
dgl_graph = dgl.DGLGraph(nx_graph)
return dgl_graph
def generate_rwr_subgraph(dgl_graph, subgraph_size):
"""Generate subgraph with RWR algorithm."""
all_idx = list(range(dgl_graph.number_of_nodes()))
reduced_size = subgraph_size - 1
traces = dgl.contrib.sampling.random_walk_with_restart(dgl_graph, all_idx, restart_prob=1, max_nodes_per_seed=subgraph_size*3)
subv = []
for i,trace in enumerate(traces):
subv.append(torch.unique(torch.cat(trace),sorted=False).tolist())
retry_time = 0
while len(subv[i]) < reduced_size:
cur_trace = dgl.contrib.sampling.random_walk_with_restart(dgl_graph, [i], restart_prob=0.9, max_nodes_per_seed=subgraph_size*5)
subv[i] = torch.unique(torch.cat(cur_trace[0]),sorted=False).tolist()
retry_time += 1
if (len(subv[i]) <= 2) and (retry_time >10):
subv[i] = (subv[i] * reduced_size)
subv[i] = subv[i][:reduced_size]
subv[i].append(i)
return subv