LdaMallet word_topics fixes after #767 (#771)

piskvorky · Jul 5, 2016 · 868716b · 868716b
1 parent 012877a
commit 868716b
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 7 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,8 @@ Changes
 =======
 0.13.2
 
+* wordtopics has changed to word_topics in ldamallet, and fixed issue #764. (@bhargavvader, #771) 
+  - assigning wordtopics value of word_topics to keep backward compatibility, for now
 * topics, topn parameters changed to num_topics and num_words in show_topics() and print_topics()(@droudy, #747)
   - In hdpmodel and dtmmodel
   - NOT BACKWARDS COMPATIBLE!

diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py
@@ -165,6 +165,9 @@ def train(self, corpus):
         logger.info("training MALLET LDA with %s", cmd)
         check_output(cmd, shell=True)
         self.word_topics = self.load_word_topics()
+        # NOTE - we are still keeping the wordtopics variable to not break backward compatibility. 
+        # word_topics has replaced wordtopics throughout the code; wordtopics just stores the values of word_topics when train is called.
+        self.wordtopics = self.word_topics
 
     def __getitem__(self, bow, iterations=100):
         is_corpus, corpus = utils.is_corpus(bow)
@@ -200,7 +203,6 @@ def load_word_topics(self):
                     continue
                 tokenid = word2id[token]
                 word_topics[int(topic), tokenid] += 1.0
-        logger.info("loaded assigned topics for %i tokens", word_topics.sum())
         self.print_topics(15)
         return word_topics
 
@@ -233,25 +235,25 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
         shown = []
         for i in chosen_topics:
             if formatted:
-                topic = self.print_topic(i, topn=num_words)
+                topic = self.print_topic(i, num_words=num_words)
             else:
-                topic = self.show_topic(i, topn=num_words)
+                topic = self.show_topic(i, num_words=num_words)
             shown.append(topic)
             if log:
                 logger.info("topic #%i (%.3f): %s", i, self.alpha[i], topic)
         return shown
 
-    def show_topic(self, topicid, topn=10):
+    def show_topic(self, topicid, num_words=10):
         if self.word_topics is None:
             logger.warn("Run train or load_word_topics before showing topics.")
         topic = self.word_topics[topicid]
         topic = topic / topic.sum()  # normalize to probability dist
-        bestn = matutils.argsort(topic, topn, reverse=True)
+        bestn = matutils.argsort(topic, num_words, reverse=True)
         beststr = [(topic[id], self.id2word[id]) for id in bestn]
         return beststr
 
-    def print_topic(self, topicid, topn=10):
-        return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, topn)])
+    def print_topic(self, topicid, num_words=10):
+        return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, num_words)])
 
 
     def get_version(self, direc_path):