diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py index c9795ed7d0..387e42cc79 100644 --- a/gensim/models/wrappers/ldamallet.py +++ b/gensim/models/wrappers/ldamallet.py @@ -182,7 +182,7 @@ def __getitem__(self, bow, iterations=100): def load_word_topics(self): logger.info("loading assigned topics from %s", self.fstate()) - wordtopics = numpy.zeros((self.num_topics, self.num_terms), dtype=numpy.float32) + word_topics = numpy.zeros((self.num_topics, self.num_terms), dtype=numpy.float32) if hasattr(self.id2word, 'token2id'): word2id = self.id2word.token2id else: @@ -199,10 +199,10 @@ def load_word_topics(self): if token not in word2id: continue tokenid = word2id[token] - wordtopics[int(topic), tokenid] += 1.0 - logger.info("loaded assigned topics for %i tokens", wordtopics.sum()) - self.wordtopics = wordtopics + word_topics[int(topic), tokenid] += 1.0 + logger.info("loaded assigned topics for %i tokens", word_topics.sum()) self.print_topics(15) + return word_topics def print_topics(self, num_topics=10, num_words=10): return self.show_topics(num_topics, num_words, log=True) @@ -242,7 +242,9 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True): return shown def show_topic(self, topicid, topn=10): - topic = self.wordtopics[topicid] + if self.word_topics is None: + logger.warn("Run train or load_word_topics before showing topics.") + topic = self.word_topics[topicid] topic = topic / topic.sum() # normalize to probability dist bestn = matutils.argsort(topic, topn, reverse=True) beststr = [(topic[id], self.id2word[id]) for id in bestn] diff --git a/gensim/test/test_ldamallet_wrapper.py b/gensim/test/test_ldamallet_wrapper.py index 94737701f3..374641987e 100644 --- a/gensim/test/test_ldamallet_wrapper.py +++ b/gensim/test/test_ldamallet_wrapper.py @@ -104,7 +104,7 @@ def testPersistence(self): model.save(fname) model2 = ldamallet.LdaMallet.load(fname) self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(numpy.allclose(model.wordtopics, model2.wordtopics)) + self.assertTrue(numpy.allclose(model.word_topics, model2.word_topics)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector @@ -116,7 +116,7 @@ def testPersistenceCompressed(self): model.save(fname) model2 = ldamallet.LdaMallet.load(fname, mmap=None) self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(numpy.allclose(model.wordtopics, model2.wordtopics)) + self.assertTrue(numpy.allclose(model.word_topics, model2.word_topics)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector @@ -132,8 +132,8 @@ def testLargeMmap(self): # test loading the large model arrays with mmap model2 = ldamodel.LdaModel.load(testfile(), mmap='r') self.assertEqual(model.num_topics, model2.num_topics) - self.assertTrue(isinstance(model2.wordtopics, numpy.memmap)) - self.assertTrue(numpy.allclose(model.wordtopics, model2.wordtopics)) + self.assertTrue(isinstance(model2.word_topics, numpy.memmap)) + self.assertTrue(numpy.allclose(model.word_topics, model2.word_topics)) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector