Skip to content

Commit

Permalink
LdaMallet word_topics fixes after #767 (#771)
Browse files Browse the repository at this point in the history
  • Loading branch information
bhargavvader authored and tmylk committed Jul 5, 2016
1 parent 012877a commit 868716b
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ Changes
=======
0.13.2

* wordtopics has changed to word_topics in ldamallet, and fixed issue #764. (@bhargavvader, #771)
- assigning wordtopics value of word_topics to keep backward compatibility, for now
* topics, topn parameters changed to num_topics and num_words in show_topics() and print_topics()(@droudy, #747)
- In hdpmodel and dtmmodel
- NOT BACKWARDS COMPATIBLE!
Expand Down
16 changes: 9 additions & 7 deletions gensim/models/wrappers/ldamallet.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ def train(self, corpus):
logger.info("training MALLET LDA with %s", cmd)
check_output(cmd, shell=True)
self.word_topics = self.load_word_topics()
# NOTE - we are still keeping the wordtopics variable to not break backward compatibility.
# word_topics has replaced wordtopics throughout the code; wordtopics just stores the values of word_topics when train is called.
self.wordtopics = self.word_topics

def __getitem__(self, bow, iterations=100):
is_corpus, corpus = utils.is_corpus(bow)
Expand Down Expand Up @@ -200,7 +203,6 @@ def load_word_topics(self):
continue
tokenid = word2id[token]
word_topics[int(topic), tokenid] += 1.0
logger.info("loaded assigned topics for %i tokens", word_topics.sum())
self.print_topics(15)
return word_topics

Expand Down Expand Up @@ -233,25 +235,25 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
shown = []
for i in chosen_topics:
if formatted:
topic = self.print_topic(i, topn=num_words)
topic = self.print_topic(i, num_words=num_words)
else:
topic = self.show_topic(i, topn=num_words)
topic = self.show_topic(i, num_words=num_words)
shown.append(topic)
if log:
logger.info("topic #%i (%.3f): %s", i, self.alpha[i], topic)
return shown

def show_topic(self, topicid, topn=10):
def show_topic(self, topicid, num_words=10):
if self.word_topics is None:
logger.warn("Run train or load_word_topics before showing topics.")
topic = self.word_topics[topicid]
topic = topic / topic.sum() # normalize to probability dist
bestn = matutils.argsort(topic, topn, reverse=True)
bestn = matutils.argsort(topic, num_words, reverse=True)
beststr = [(topic[id], self.id2word[id]) for id in bestn]
return beststr

def print_topic(self, topicid, topn=10):
return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, topn)])
def print_topic(self, topicid, num_words=10):
return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, num_words)])


def get_version(self, direc_path):
Expand Down

0 comments on commit 868716b

Please sign in to comment.