Skip to content

Commit

Permalink
Extract cell assignment logic to cell_full local function
Browse files Browse the repository at this point in the history
  • Loading branch information
Witiko committed Jul 23, 2020
1 parent dc26e56 commit 235de1d
Showing 1 changed file with 14 additions and 6 deletions.
20 changes: 14 additions & 6 deletions gensim/similarities/termsim.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,16 @@ def tfidf_sort_key(term_index):
else:
raise ValueError('Dtype %s is unsupported, use numpy.float16, float32, or float64.' % dtype)

def cell_full(t1_index, t2_index, similarity):
if dominant and column_sum[t1_index] + abs(similarity) >= 1.0:
return True # after adding the similarity, the matrix would cease to be strongly diagonally dominant
assert column_nonzero[t1_index] <= nonzero_limit
if column_nonzero[t1_index] == nonzero_limit:
return True # after adding the similarity, the column would contain more than nonzero_limit elements
if symmetric and (t1_index, t2_index) in assigned_cells:
return True # a similarity has already been assigned to this cell
return False

def populate_buffers(t1_index, t2_index, similarity):
column_buffer.append(t1_index)
row_buffer.append(t2_index)
Expand Down Expand Up @@ -210,14 +220,12 @@ def progress_bar(iterable):
else:
rows = sorted(most_similar, key=tfidf_sort_key)

for row_number, (t2_index, similarity) in zip(range(num_rows), rows):
if dominant and column_sum[t1_index] + abs(similarity) >= 1.0:
break
for t2_index, similarity in rows:
if cell_full(t1_index, t2_index, similarity):
continue
if not symmetric:
populate_buffers(t1_index, t2_index, similarity)
elif column_nonzero[t2_index] < nonzero_limit \
and (not dominant or column_sum[t2_index] + abs(similarity) < 1.0) \
and (t1_index, t2_index) not in assigned_cells:
elif not cell_full(t2_index, t1_index, similarity):
populate_buffers(t1_index, t2_index, similarity)
populate_buffers(t2_index, t1_index, similarity)

Expand Down

0 comments on commit 235de1d

Please sign in to comment.