From 7029641af484da9e2242866b38b610eee3f95e35 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Mon, 15 May 2017 01:59:21 +0530 Subject: [PATCH 01/16] added any2sparse_clipped() function --- gensim/matutils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/gensim/matutils.py b/gensim/matutils.py index fbfa383a34..d1c2d085f4 100644 --- a/gensim/matutils.py +++ b/gensim/matutils.py @@ -166,6 +166,19 @@ def any2sparse(vec, eps=1e-9): return [(int(fid), float(fw)) for fid, fw in vec if np.abs(fw) > eps] +def any2sparse_clipped(vec, topn, eps=1e-9): + """ + Like `any2sparse`, but only returns the `topn` elements of greatest magnitude (abs). + + """ + if topn <= 0: + return [] + if isinstance(vec, np.ndarray): + return full2sparse_clipped(vec, topn, eps) + vec_sparse = any2sparse(vec, eps) + return sorted(vec_sparse,key=lambda x: x[1], reverse=True) + + def scipy2sparse(vec, eps=1e-9): """Convert a scipy.sparse vector into gensim document format (=list of 2-tuples).""" vec = vec.tocsr() From 50cfef06857620666e8803be1867f196e3792f6c Mon Sep 17 00:00:00 2001 From: manneshiva Date: Mon, 15 May 2017 02:01:02 +0530 Subject: [PATCH 02/16] changed full2sparse_clipped to any2sparse_clipped in __getitem__ --- gensim/interfaces.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gensim/interfaces.py b/gensim/interfaces.py index 530fab398b..2398267037 100644 --- a/gensim/interfaces.py +++ b/gensim/interfaces.py @@ -225,10 +225,10 @@ def __getitem__(self, query): # if the input query was a corpus (=more documents), compute the top-n # most similar for each document in turn if matutils.ismatrix(result): - return [matutils.full2sparse_clipped(v, self.num_best) for v in result] + return [matutils.any2sparse_clipped(v, self.num_best) for v in result] else: # otherwise, return top-n of the single input document - return matutils.full2sparse_clipped(result, self.num_best) + return matutils.any2sparse_clipped(result, self.num_best) def __iter__(self): From 3a0924cf324f482ef411cd2e451bed4956e49733 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Mon, 15 May 2017 03:24:06 +0530 Subject: [PATCH 03/16] added missing whitespace --- gensim/matutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/matutils.py b/gensim/matutils.py index d1c2d085f4..62a4f5c7be 100644 --- a/gensim/matutils.py +++ b/gensim/matutils.py @@ -176,7 +176,7 @@ def any2sparse_clipped(vec, topn, eps=1e-9): if isinstance(vec, np.ndarray): return full2sparse_clipped(vec, topn, eps) vec_sparse = any2sparse(vec, eps) - return sorted(vec_sparse,key=lambda x: x[1], reverse=True) + return sorted(vec_sparse, key=lambda x: x[1], reverse=True) def scipy2sparse(vec, eps=1e-9): From 5c10a699dc42d12fc31c8d8f19805632c096ccb1 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Wed, 17 May 2017 02:24:46 +0530 Subject: [PATCH 04/16] return topn from any2sparse_clipped() --- gensim/matutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/matutils.py b/gensim/matutils.py index 62a4f5c7be..6ce5725cab 100644 --- a/gensim/matutils.py +++ b/gensim/matutils.py @@ -176,7 +176,7 @@ def any2sparse_clipped(vec, topn, eps=1e-9): if isinstance(vec, np.ndarray): return full2sparse_clipped(vec, topn, eps) vec_sparse = any2sparse(vec, eps) - return sorted(vec_sparse, key=lambda x: x[1], reverse=True) + return sorted(vec_sparse, key=lambda x: x[1], reverse=True)[:topn] def scipy2sparse(vec, eps=1e-9): From a60a5dbb88526c57f7813e3642aefb510c8983af Mon Sep 17 00:00:00 2001 From: manneshiva Date: Wed, 17 May 2017 05:01:14 +0530 Subject: [PATCH 05/16] efficient any2sparse_clipped implementation --- gensim/matutils.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/gensim/matutils.py b/gensim/matutils.py index 6ce5725cab..ba321d1847 100644 --- a/gensim/matutils.py +++ b/gensim/matutils.py @@ -175,8 +175,14 @@ def any2sparse_clipped(vec, topn, eps=1e-9): return [] if isinstance(vec, np.ndarray): return full2sparse_clipped(vec, topn, eps) - vec_sparse = any2sparse(vec, eps) - return sorted(vec_sparse, key=lambda x: x[1], reverse=True)[:topn] + if scipy.sparse.issparse(vec): + biggest = argsort(abs(vec).data, topn, reverse=True) + return list(zip(vec.indices.take(biggest), vec.data.take(biggest))) + else: + vec_csr = scipy.sparse.csr_matrix(vec) + biggest = argsort(abs(vec_csr).data, topn, reverse=True) + return list(zip(vec_csr.indices.take(biggest), vec_csr.data.take(biggest))) + def scipy2sparse(vec, eps=1e-9): From b33a364d6865e689e497c9522474801f41257d13 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Wed, 17 May 2017 05:26:48 +0530 Subject: [PATCH 06/16] added unit test for any2sparse_clipped --- gensim/test/test_similarities.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index 88596bb5b2..5fbb3162be 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -108,6 +108,13 @@ def test_full2sparse_clipped(self): expected = [(0, 0.80000000000000004), (1, 0.20000000000000001), (5, -0.14999999999999999)] self.assertTrue(matutils.full2sparse_clipped(vec, topn=3), expected) + def test_any2sparse_clipped(self): + vec = [0.8, 0.2, 0.0, 0.0, -0.1, -0.15] + expected = [(0, 0.80000000000000004), (1, 0.20000000000000001), (5, -0.14999999999999999)] + self.assertTrue(matutils.any2sparse_clipped(vec, topn=3), expected) + + vec_scipy = scipy.csr_matrix(vec) + self.assertTrue(matutils.any2sparse_clipped(vec_scipy, topn=3), expected) def testChunking(self): From 02421cbcc9c5fbd919137a338c836bc9645b0863 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Wed, 17 May 2017 06:04:58 +0530 Subject: [PATCH 07/16] function call corrected --- gensim/test/test_similarities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index 5fbb3162be..d50db35e60 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -113,7 +113,7 @@ def test_any2sparse_clipped(self): expected = [(0, 0.80000000000000004), (1, 0.20000000000000001), (5, -0.14999999999999999)] self.assertTrue(matutils.any2sparse_clipped(vec, topn=3), expected) - vec_scipy = scipy.csr_matrix(vec) + vec_scipy = scipy.sparse.csr_matrix(vec) self.assertTrue(matutils.any2sparse_clipped(vec_scipy, topn=3), expected) From b86881881a19789a6b8cfc74a3c8e940cda516d9 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Fri, 19 May 2017 02:08:12 +0530 Subject: [PATCH 08/16] removed any2sparse_clipped and added scipy2scipy_clipped --- gensim/matutils.py | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/gensim/matutils.py b/gensim/matutils.py index ba321d1847..25546ca82f 100644 --- a/gensim/matutils.py +++ b/gensim/matutils.py @@ -166,23 +166,36 @@ def any2sparse(vec, eps=1e-9): return [(int(fid), float(fw)) for fid, fw in vec if np.abs(fw) > eps] -def any2sparse_clipped(vec, topn, eps=1e-9): +def scipy2scipy_clipped(matrix, topn, eps=1e-9): """ - Like `any2sparse`, but only returns the `topn` elements of greatest magnitude (abs). - + Return a scipy.sparse vector/matrix consisting of 'topn' elements of greatest magnitude(abs). """ + if not scipy.sparse.issparse(matrix): + raise ValueError("'%s' is not a scipy sparse vector." % matrix) if topn <= 0: - return [] - if isinstance(vec, np.ndarray): - return full2sparse_clipped(vec, topn, eps) - if scipy.sparse.issparse(vec): - biggest = argsort(abs(vec).data, topn, reverse=True) - return list(zip(vec.indices.take(biggest), vec.data.take(biggest))) + return scipy.sparse.csr_matrix([]) + # Return clipped sparse vector if input is a sparse vector. + if matrix.shape[0] == 1: + # use np.argpartition/argsort and only form tuples that are actually returned. + biggest = argsort(abs(matrix).data, topn, reverse=True) + indices, data = matrix.indices.take(biggest), matrix.data.take(biggest) + return scipy.sparse.csr_matrix((data, indices, [0, len(indices)])) + # Return clipped sparse matrix if input is a matrix, processing row by row. else: - vec_csr = scipy.sparse.csr_matrix(vec) - biggest = argsort(abs(vec_csr).data, topn, reverse=True) - return list(zip(vec_csr.indices.take(biggest), vec_csr.data.take(biggest))) - + matrix_indices = [] + matrix_data = [] + for v in matrix: + # Sort and clip each row vector first. + biggest = argsort(abs(v).data, topn, reverse=True) + indices, data = v.indices.take(biggest), v.data.take(biggest) + # Store the topn indices and values of each row vector. + matrix_data.append(data) + matrix_indices.append(indices) + matrix_indptr = np.array([i * topn for i in range(1 + len(matrix_indices))]) + matrix_indices = np.concatenate(matrix_indices).ravel() + matrix_data = np.concatenate(matrix_data).ravel() + # Instantiate and return a sparse csr_matrix which preserves the order of indices/data. + return scipy.sparse.csr.csr_matrix((matrix_data, matrix_indices, matrix_indptr), shape=(matrix.shape[0], np.max(matrix_indices) + 1)) def scipy2sparse(vec, eps=1e-9): From 99c96956b18e13fa63bee2a3a904aab994e85526 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Fri, 19 May 2017 02:11:32 +0530 Subject: [PATCH 09/16] added new code path for maintain_sparsity --- gensim/interfaces.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/gensim/interfaces.py b/gensim/interfaces.py index 2398267037..c3a07aa784 100644 --- a/gensim/interfaces.py +++ b/gensim/interfaces.py @@ -19,6 +19,8 @@ from gensim import utils, matutils from six.moves import xrange +import numpy as np +import scipy.sparse logger = logging.getLogger('gensim.interfaces') @@ -222,13 +224,19 @@ def __getitem__(self, query): if self.num_best is None: return result + # if maintain_sparity is True, result is scipy sparse. Sort, clip the + # topn and return as a scipy sparse matrix. + if hasattr(self, 'maintain_sparsity'): + if self.maintain_sparsity: + return matutils.scipy2scipy_clipped(result, self.num_best) + # if the input query was a corpus (=more documents), compute the top-n # most similar for each document in turn if matutils.ismatrix(result): - return [matutils.any2sparse_clipped(v, self.num_best) for v in result] + return [matutils.full2sparse_clipped(v, self.num_best) for v in result] else: # otherwise, return top-n of the single input document - return matutils.any2sparse_clipped(result, self.num_best) + return matutils.full2sparse_clipped(result, self.num_best) def __iter__(self): From d0d9f622d65e1cf19dc7f53fc3f76fb50669b961 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Fri, 19 May 2017 02:14:43 +0530 Subject: [PATCH 10/16] added unit tests for new function and issue --- gensim/test/test_similarities.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index d50db35e60..3fa6690591 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -108,13 +108,22 @@ def test_full2sparse_clipped(self): expected = [(0, 0.80000000000000004), (1, 0.20000000000000001), (5, -0.14999999999999999)] self.assertTrue(matutils.full2sparse_clipped(vec, topn=3), expected) - def test_any2sparse_clipped(self): + def test_scipy2scipy_clipped(self): + # Test for scipy vector/row vec = [0.8, 0.2, 0.0, 0.0, -0.1, -0.15] expected = [(0, 0.80000000000000004), (1, 0.20000000000000001), (5, -0.14999999999999999)] - self.assertTrue(matutils.any2sparse_clipped(vec, topn=3), expected) - vec_scipy = scipy.sparse.csr_matrix(vec) - self.assertTrue(matutils.any2sparse_clipped(vec_scipy, topn=3), expected) + vec_scipy_clipped = matutils.scipy2scipy_clipped(vec_scipy, topn=3) + self.assertTrue(scipy.sparse.issparse(vec_scipy_clipped)) + self.assertTrue(matutils.scipy2sparse(vec_scipy_clipped), expected) + + # Test for scipy matrix + vec = [0.8, 0.2, 0.0, 0.0, -0.1, -0.15] + expected = [(0, 0.80000000000000004), (1, 0.20000000000000001), (5, -0.14999999999999999)] + matrix_scipy = scipy.sparse.csr_matrix([vec] * 3) + matrix_scipy_clipped = matutils.scipy2scipy_clipped(matrix_scipy, topn=3) + self.assertTrue(scipy.sparse.issparse(matrix_scipy_clipped)) + self.assertTrue([matutils.scipy2sparse(x) for x in matrix_scipy_clipped], [expected]*3) def testChunking(self): @@ -412,6 +421,21 @@ def testMaintainSparsity(self): self.assertTrue(scipy.sparse.issparse(sparse_sims)) numpy.testing.assert_array_equal(dense_sims, sparse_sims.todense()) + def testMaintainSparsityWithNumBest(self): + """Tests that sparsity is correctly maintained when maintain_sparsity=True and num_best is not None""" + num_features = len(dictionary) + + index = self.cls(corpus, num_features=num_features, num_best=3) + dense_topn_sims = index[corpus] + + index = self.cls(corpus, num_features=num_features, maintain_sparsity=True, num_best=3) + scipy_topn_sims = index[corpus] + + self.assertFalse(scipy.sparse.issparse(dense_topn_sims)) + self.assertTrue(scipy.sparse.issparse(scipy_topn_sims)) + self.assertEqual(dense_topn_sims, [matutils.scipy2sparse(v) for v in scipy_topn_sims]) + + class TestSimilarity(unittest.TestCase, _TestSimilarityABC): def setUp(self): From e0b9fac3644eb6971c6ed8ace159a653e1a86482 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Fri, 19 May 2017 02:41:57 +0530 Subject: [PATCH 11/16] fixed flake8 errors --- gensim/interfaces.py | 2 -- gensim/test/test_similarities.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/gensim/interfaces.py b/gensim/interfaces.py index c3a07aa784..b570061ee3 100644 --- a/gensim/interfaces.py +++ b/gensim/interfaces.py @@ -19,8 +19,6 @@ from gensim import utils, matutils from six.moves import xrange -import numpy as np -import scipy.sparse logger = logging.getLogger('gensim.interfaces') diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index 3fa6690591..204553ddaa 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -123,7 +123,7 @@ def test_scipy2scipy_clipped(self): matrix_scipy = scipy.sparse.csr_matrix([vec] * 3) matrix_scipy_clipped = matutils.scipy2scipy_clipped(matrix_scipy, topn=3) self.assertTrue(scipy.sparse.issparse(matrix_scipy_clipped)) - self.assertTrue([matutils.scipy2sparse(x) for x in matrix_scipy_clipped], [expected]*3) + self.assertTrue([matutils.scipy2sparse(x) for x in matrix_scipy_clipped], [expected] * 3) def testChunking(self): From 3e9966c60ec556ae03c4bd79aaf11705da753f86 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Fri, 19 May 2017 03:12:48 +0530 Subject: [PATCH 12/16] fixed matrix_indptr --- gensim/matutils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gensim/matutils.py b/gensim/matutils.py index 25546ca82f..548dc7a2f5 100644 --- a/gensim/matutils.py +++ b/gensim/matutils.py @@ -184,6 +184,7 @@ def scipy2scipy_clipped(matrix, topn, eps=1e-9): else: matrix_indices = [] matrix_data = [] + matrix_indptr = [0] for v in matrix: # Sort and clip each row vector first. biggest = argsort(abs(v).data, topn, reverse=True) @@ -191,7 +192,7 @@ def scipy2scipy_clipped(matrix, topn, eps=1e-9): # Store the topn indices and values of each row vector. matrix_data.append(data) matrix_indices.append(indices) - matrix_indptr = np.array([i * topn for i in range(1 + len(matrix_indices))]) + matrix_indptr.append(matrix_indptr[-1] + min(len(indices), topn)) matrix_indices = np.concatenate(matrix_indices).ravel() matrix_data = np.concatenate(matrix_data).ravel() # Instantiate and return a sparse csr_matrix which preserves the order of indices/data. From a3c1eb4246b5bfca20352c11889b37134e22ed4f Mon Sep 17 00:00:00 2001 From: manneshiva Date: Thu, 1 Jun 2017 00:04:25 +0530 Subject: [PATCH 13/16] added requested changes --- gensim/matutils.py | 6 +++--- gensim/test/test_similarities.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gensim/matutils.py b/gensim/matutils.py index 548dc7a2f5..6d75342959 100644 --- a/gensim/matutils.py +++ b/gensim/matutils.py @@ -168,7 +168,7 @@ def any2sparse(vec, eps=1e-9): def scipy2scipy_clipped(matrix, topn, eps=1e-9): """ - Return a scipy.sparse vector/matrix consisting of 'topn' elements of greatest magnitude(abs). + Return a scipy.sparse vector/matrix consisting of 'topn' elements of the greatest magnitude (absolute value). """ if not scipy.sparse.issparse(matrix): raise ValueError("'%s' is not a scipy sparse vector." % matrix) @@ -177,7 +177,7 @@ def scipy2scipy_clipped(matrix, topn, eps=1e-9): # Return clipped sparse vector if input is a sparse vector. if matrix.shape[0] == 1: # use np.argpartition/argsort and only form tuples that are actually returned. - biggest = argsort(abs(matrix).data, topn, reverse=True) + biggest = argsort(abs(matrix.data), topn, reverse=True) indices, data = matrix.indices.take(biggest), matrix.data.take(biggest) return scipy.sparse.csr_matrix((data, indices, [0, len(indices)])) # Return clipped sparse matrix if input is a matrix, processing row by row. @@ -187,7 +187,7 @@ def scipy2scipy_clipped(matrix, topn, eps=1e-9): matrix_indptr = [0] for v in matrix: # Sort and clip each row vector first. - biggest = argsort(abs(v).data, topn, reverse=True) + biggest = argsort(abs(v.data), topn, reverse=True) indices, data = v.indices.take(biggest), v.data.take(biggest) # Store the topn indices and values of each row vector. matrix_data.append(data) diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index 204553ddaa..020b62bbdf 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -425,7 +425,7 @@ def testMaintainSparsityWithNumBest(self): """Tests that sparsity is correctly maintained when maintain_sparsity=True and num_best is not None""" num_features = len(dictionary) - index = self.cls(corpus, num_features=num_features, num_best=3) + index = self.cls(corpus, num_features=num_features, maintain_sparsity=False, num_best=3) dense_topn_sims = index[corpus] index = self.cls(corpus, num_features=num_features, maintain_sparsity=True, num_best=3) From f97011321c2f25693d3512ef60d43ed23b7d1d0c Mon Sep 17 00:00:00 2001 From: manneshiva Date: Thu, 1 Jun 2017 02:01:09 +0530 Subject: [PATCH 14/16] replaced hasattr with getattr --- gensim/interfaces.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gensim/interfaces.py b/gensim/interfaces.py index b570061ee3..58e6f45b13 100644 --- a/gensim/interfaces.py +++ b/gensim/interfaces.py @@ -224,8 +224,7 @@ def __getitem__(self, query): # if maintain_sparity is True, result is scipy sparse. Sort, clip the # topn and return as a scipy sparse matrix. - if hasattr(self, 'maintain_sparsity'): - if self.maintain_sparsity: + if getattr(self, 'maintain_sparsity', False): return matutils.scipy2scipy_clipped(result, self.num_best) # if the input query was a corpus (=more documents), compute the top-n From c0033d6d1573b8d3029032e6d753a8b6dd93d7d9 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Thu, 1 Jun 2017 03:54:43 +0530 Subject: [PATCH 15/16] call abs() once for entire matrix in scipy2scipy_clipped --- gensim/matutils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/gensim/matutils.py b/gensim/matutils.py index 6d75342959..a0f24f0d84 100644 --- a/gensim/matutils.py +++ b/gensim/matutils.py @@ -185,9 +185,13 @@ def scipy2scipy_clipped(matrix, topn, eps=1e-9): matrix_indices = [] matrix_data = [] matrix_indptr = [0] - for v in matrix: + matrix.sort_indices() # ensure data is stored in row major, sorts inplace + # calling abs() on entire matrix once is faster than calling abs() iteratively for each row + matrix_abs = abs(matrix.data) + for i in range(matrix.shape[0]): + v = matrix.getrow(i) # Sort and clip each row vector first. - biggest = argsort(abs(v.data), topn, reverse=True) + biggest = argsort(matrix_abs[matrix.indptr[i]:matrix.indptr[i + 1]], topn, reverse=True) indices, data = v.indices.take(biggest), v.data.take(biggest) # Store the topn indices and values of each row vector. matrix_data.append(data) From 4d0e9e83a3c2c4de9dab44cac7443b2c513a28fb Mon Sep 17 00:00:00 2001 From: manneshiva Date: Wed, 14 Jun 2017 23:19:40 +0530 Subject: [PATCH 16/16] removed matrix.sort_indices and removed indptr while calling argsort --- gensim/matutils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gensim/matutils.py b/gensim/matutils.py index a0f24f0d84..7d6be268e1 100644 --- a/gensim/matutils.py +++ b/gensim/matutils.py @@ -185,13 +185,13 @@ def scipy2scipy_clipped(matrix, topn, eps=1e-9): matrix_indices = [] matrix_data = [] matrix_indptr = [0] - matrix.sort_indices() # ensure data is stored in row major, sorts inplace # calling abs() on entire matrix once is faster than calling abs() iteratively for each row - matrix_abs = abs(matrix.data) + matrix_abs = abs(matrix) for i in range(matrix.shape[0]): v = matrix.getrow(i) + v_abs = matrix_abs.getrow(i) # Sort and clip each row vector first. - biggest = argsort(matrix_abs[matrix.indptr[i]:matrix.indptr[i + 1]], topn, reverse=True) + biggest = argsort(v_abs.data, topn, reverse=True) indices, data = v.indices.take(biggest), v.data.take(biggest) # Store the topn indices and values of each row vector. matrix_data.append(data)