diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py index 4f550b9ffe..c3987ceef0 100755 --- a/gensim/models/atmodel.py +++ b/gensim/models/atmodel.py @@ -391,15 +391,17 @@ def inference(self, chunk, author2doc, doc2author, rhot, collect_sstats=False, c doc_no = d # Get the IDs and counts of all the words in the current document. # TODO: this is duplication of code in LdaModel. Refactor. + if doc and not isinstance(doc[0][0], six.integer_types): # make sure the term IDs are ints, otherwise np will get upset ids = [int(id) for id, _ in doc] else: ids = [id for id, _ in doc] - cts = np.array([cnt for _, cnt in doc]) + ids = np.array(ids, dtype=np.integer) + cts = np.array([cnt for _, cnt in doc], dtype=np.integer) # Get all authors in current document, and convert the author names to integer IDs. - authors_d = [self.author2id[a] for a in self.doc2author[doc_no]] + authors_d = np.array([self.author2id[a] for a in self.doc2author[doc_no]], dtype=np.integer) gammad = self.state.gamma[authors_d, :] # gamma of document d before update. tilde_gamma = gammad.copy() # gamma that will be updated. @@ -828,9 +830,9 @@ def bound(self, chunk, chunk_doc_idx=None, subsample_ratio=1.0, author2doc=None, else: doc_no = d # Get all authors in current document, and convert the author names to integer IDs. - authors_d = [self.author2id[a] for a in self.doc2author[doc_no]] - ids = np.array([id for id, _ in doc]) # Word IDs in doc. - cts = np.array([cnt for _, cnt in doc]) # Word counts. + authors_d = np.array([self.author2id[a] for a in self.doc2author[doc_no]], dtype=np.integer) + ids = np.array([id for id, _ in doc], dtype=np.integer) # Word IDs in doc. + cts = np.array([cnt for _, cnt in doc], dtype=np.integer) # Word counts. if d % self.chunksize == 0: logger.debug("bound: at document #%i in chunk", d)