Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
sj29-innovate committed Jan 8, 2018
1 parent dc702de commit ce290dc
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions gensim/summarization/bm25.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,14 @@ def __init__(self, corpus):
self.f = []
self.df = {}
self.idf = {}
self.doc_length = []
self.doc_len = []
self.initialize()

def initialize(self):
"""Calculates frequencies of terms in documents and in corpus. Also computes inverse document frequencies."""
for document in self.corpus:
frequencies = {}
(self.doc_length).append(len(document))
(self.doc_len).append(len(document))
for word in document:
if word not in frequencies:
frequencies[word] = 0
Expand Down Expand Up @@ -124,7 +124,7 @@ def get_score(self, document, index, average_idf):
continue
idf = self.idf[word] if self.idf[word] >= 0 else EPSILON * average_idf
score += (idf * self.f[index][word] * (PARAM_K1 + 1)
/ (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * self.doc_length[index] / self.avgdl)))
/ (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * self.doc_len[index] / self.avgdl)))
return score

def get_scores(self, document, average_idf):
Expand Down

0 comments on commit ce290dc

Please sign in to comment.