Fix flake8 warnings W605, W504 (#2256)

* Fix flake8 warnings W605 * fix W504 * pin flake8-rst (avoid issue from 0.5.0)
piskvorky · Dec 11, 2018 · 30528a5 · 30528a5
1 parent 2ccc82b
commit 30528a5
Show file tree

Hide file tree

Showing 28 changed files with 99 additions and 100 deletions.
diff --git a/gensim/corpora/wikicorpus.py b/gensim/corpora/wikicorpus.py
@@ -81,7 +81,7 @@
 """Capture interlinks text and article linked"""
 RE_P17 = re.compile(
     r'(\n.{0,4}((bgcolor)|(\d{0,1}[ ]?colspan)|(rowspan)|(style=)|(class=)|(align=)|(scope=))(.*))|'
-    '(^.{0,2}((bgcolor)|(\d{0,1}[ ]?colspan)|(rowspan)|(style=)|(class=)|(align=))(.*))',
+    r'(^.{0,2}((bgcolor)|(\d{0,1}[ ]?colspan)|(rowspan)|(style=)|(class=)|(align=))(.*))',
     re.UNICODE
 )
 """Table markup"""
@@ -143,8 +143,8 @@ def filter_example(elem, text, *args, **kwargs):
     # regex is in the function call so that we do not pollute the wikicorpus
     # namespace do not do this in production as this function is called for
     # every element in the wiki dump
-    _regex_de_excellent = re.compile('.*\{\{(Exzellent.*?)\}\}[\s]*', flags=re.DOTALL)
-    _regex_de_featured = re.compile('.*\{\{(Lesenswert.*?)\}\}[\s]*', flags=re.DOTALL)
+    _regex_de_excellent = re.compile(r'.*\{\{(Exzellent.*?)\}\}[\s]*', flags=re.DOTALL)
+    _regex_de_featured = re.compile(r'.*\{\{(Lesenswert.*?)\}\}[\s]*', flags=re.DOTALL)
 
     if text is None:
         return False

diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py
@@ -376,14 +376,14 @@ def extend_corpus(self, corpus):
             self.corpus.extend(corpus)
 
     def compute_phinorm(self, expElogthetad, expElogbetad):
-        """Efficiently computes the normalizing factor in phi.
+        r"""Efficiently computes the normalizing factor in phi.
 
         Parameters
         ----------
         expElogthetad: numpy.ndarray
             Value of variational distribution :math:`q(\theta|\gamma)`.
         expElogbetad: numpy.ndarray
-            Value of variational distribution :math:`q(\\beta|\lambda)`.
+            Value of variational distribution :math:`q(\beta|\lambda)`.
 
         Returns
         -------
@@ -888,7 +888,7 @@ def rho():
                 del other
 
     def bound(self, chunk, chunk_doc_idx=None, subsample_ratio=1.0, author2doc=None, doc2author=None):
-        """Estimate the variational bound of documents from `corpus`.
+        r"""Estimate the variational bound of documents from `corpus`.
 
         :math:`\mathbb{E_{q}}[\log p(corpus)] - \mathbb{E_{q}}[\log q(corpus)]`
 

diff --git a/gensim/models/base_any2vec.py b/gensim/models/base_any2vec.py
@@ -5,7 +5,7 @@
 # Copyright (C) 2018 RaRe Technologies s.r.o.
 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
 
-"""This module contains base classes required for implementing \*2vec algorithms.
+r"""This module contains base classes required for implementing \*2vec algorithms.
 
 The class hierarchy is designed to facilitate adding more concrete implementations for creating embeddings.
 In the most general case, the purpose of this class is to transform an arbitrary representation to a numerical vector
@@ -56,7 +56,7 @@
 
 
 class BaseAny2VecModel(utils.SaveLoad):
-    """Base class for training, using and evaluating \*2vec model.
+    r"""Base class for training, using and evaluating \*2vec model.
 
     Contains implementation for multi-threaded training. The purpose of this class is to provide a
     reference interface for concrete embedding implementations, whether the input space is a corpus

diff --git a/gensim/models/coherencemodel.py b/gensim/models/coherencemodel.py
@@ -460,9 +460,9 @@ def _relevant_ids_will_differ(self, new_topics):
         return not self._accumulator.relevant_ids.issuperset(new_set)
 
     def _topics_differ(self, new_topics):
-        return (new_topics is not None and
-                self._topics is not None and
-                not np.array_equal(new_topics, self._topics))
+        return (new_topics is not None
+                and self._topics is not None
+                and not np.array_equal(new_topics, self._topics))
 
     def _get_topics(self):
         """Internal helper function to return topics from a trained topic model."""

diff --git a/gensim/models/deprecated/doc2vec.py b/gensim/models/deprecated/doc2vec.py
@@ -242,8 +242,8 @@ def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=N
     if doctag_locks is None:
         doctag_locks = model.docvecs.doctag_syn0_lockf
 
-    word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab and
-                   model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
+    word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab
+                   and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
 
     for pos, word in enumerate(word_vocabs):
         reduced_window = model.random.randint(model.window)  # `b` in the original doc2vec code
@@ -298,8 +298,8 @@ def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None,
     if doctag_locks is None:
         doctag_locks = model.docvecs.doctag_syn0_lockf
 
-    word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab and
-                   model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
+    word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab
+                   and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
     doctag_len = len(doctag_indexes)
     if doctag_len != model.dm_tag_count:
         return 0  # skip doc without expected number of doctag(s) (TODO: warn/pad?)

diff --git a/gensim/models/deprecated/fasttext.py b/gensim/models/deprecated/fasttext.py
@@ -148,8 +148,8 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None):
     """
     result = 0
     for sentence in sentences:
-        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                       model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
+        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                       and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
         for pos, word in enumerate(word_vocabs):
             reduced_window = model.random.randint(model.window)
             start = max(0, pos - model.window + reduced_window)
@@ -211,8 +211,8 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None):
     """
     result = 0
     for sentence in sentences:
-        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                       model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
+        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                       and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
         for pos, word in enumerate(word_vocabs):
             reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
             # now go over all words from the (reduced) window, predicting each one in turn

diff --git a/gensim/models/deprecated/old_saveload.py b/gensim/models/deprecated/old_saveload.py
@@ -108,8 +108,8 @@ def _load_specials(self, fname, mmap, compress, subname):
         """
         def mmap_error(obj, filename):
             return IOError(
-                'Cannot mmap compressed object %s in file %s. ' % (obj, filename) +
-                'Use `load(fname, mmap=None)` or uncompress files manually.'
+                'Cannot mmap compressed object %s in file %s. ' % (obj, filename)
+                + 'Use `load(fname, mmap=None)` or uncompress files manually.'
             )
 
         for attrib in getattr(self, '__recursive_saveloads', []):

diff --git a/gensim/models/deprecated/word2vec.py b/gensim/models/deprecated/word2vec.py
@@ -232,8 +232,8 @@ def train_batch_sg(model, sentences, alpha, work=None, compute_loss=False):
     """
     result = 0
     for sentence in sentences:
-        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                       model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
+        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                       and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
         for pos, word in enumerate(word_vocabs):
             reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
 
@@ -263,8 +263,8 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None, compute_loss
     """
     result = 0
     for sentence in sentences:
-        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                       model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
+        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                       and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
         for pos, word in enumerate(word_vocabs):
             reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
             start = max(0, pos - model.window + reduced_window)

diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py
@@ -227,8 +227,8 @@ def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=N
         if doctag_locks is None:
             doctag_locks = model.docvecs.doctag_syn0_lockf
 
-        word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab and
-                       model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
+        word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab
+                       and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
 
         for pos, word in enumerate(word_vocabs):
             reduced_window = model.random.randint(model.window)  # `b` in the original doc2vec code
@@ -314,8 +314,8 @@ def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None,
         if doctag_locks is None:
             doctag_locks = model.docvecs.doctag_syn0_lockf
 
-        word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab and
-                       model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
+        word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab
+                       and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
         doctag_len = len(doctag_indexes)
         if doctag_len != model.dm_tag_count:
             return 0  # skip doc without expected number of doctag(s) (TODO: warn/pad?)

diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py
@@ -140,8 +140,8 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None):
         """
         result = 0
         for sentence in sentences:
-            word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                           model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
+            word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                           and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
             for pos, word in enumerate(word_vocabs):
                 reduced_window = model.random.randint(model.window)
                 start = max(0, pos - model.window + reduced_window)
@@ -199,8 +199,8 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None):
         """
         result = 0
         for sentence in sentences:
-            word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                           model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
+            word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                           and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
             for pos, word in enumerate(word_vocabs):
                 reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
                 # now go over all words from the (reduced) window, predicting each one in turn

diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py
@@ -72,7 +72,7 @@
 
 
 def expect_log_sticks(sticks):
-    """For stick-breaking hdp, get the :math:`\mathbb{E}[log(sticks)]`.
+    r"""For stick-breaking hdp, get the :math:`\mathbb{E}[log(sticks)]`.
 
     Parameters
     ----------
@@ -97,7 +97,7 @@ def expect_log_sticks(sticks):
 
 
 def lda_e_step(doc_word_ids, doc_word_counts, alpha, beta, max_iter=100):
-    """Performs EM-iteration on a single document for calculation of likelihood for a maximum iteration of `max_iter`.
+    r"""Performs EM-iteration on a single document for calculation of likelihood for a maximum iteration of `max_iter`.
 
     Parameters
     ----------
@@ -115,7 +115,7 @@ def lda_e_step(doc_word_ids, doc_word_counts, alpha, beta, max_iter=100):
     Returns
     -------
     (numpy.ndarray, numpy.ndarray)
-        Computed (:math:`likelihood`, :math:`\\gamma`).
+        Computed (:math:`likelihood`, :math:`\gamma`).
 
     """
     gamma = np.ones(len(alpha))
@@ -172,7 +172,7 @@ def set_zero(self):
 
 
 class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
-    """`Hierarchical Dirichlet Process model <http://jmlr.csail.mit.edu/proceedings/papers/v15/wang11a/wang11a.pdf>`_
+    r"""`Hierarchical Dirichlet Process model <http://jmlr.csail.mit.edu/proceedings/papers/v15/wang11a/wang11a.pdf>`_
 
     Topic models promise to help summarize and organize large archives of texts that cannot be easily analyzed by hand.
     Hierarchical Dirichlet process (HDP) is a powerful mixed-membership model for the unsupervised analysis of grouped
@@ -194,7 +194,7 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     For this assume that there is a restaurant franchise (`corpus`) which has a large number of restaurants
     (`documents`, `j`) under it. They have a global menu of dishes (`topics`, :math:`\Phi_{k}`) which they serve.
     Also, a single dish (`topic`, :math:`\Phi_{k}`) is only served at a single table `t` for all the customers
-    (`words`, :math:`\\theta_{j,i}`) who sit at that table.
+    (`words`, :math:`\theta_{j,i}`) who sit at that table.
     So, when a customer enters the restaurant he/she has the choice to make where he/she wants to sit.
     He/she can choose to sit at a table where some customers are already sitting , or he/she can choose to sit
     at a new table. Here the probability of choosing each option is not same.
@@ -213,31 +213,31 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     share the same set of atoms, :math:`\Phi_{k}`, and only the atom weights :math:`\pi _{jt}` differs.
 
     There will be multiple document-level atoms :math:`\psi_{jt}` which map to the same corpus-level atom
-    :math:`\Phi_{k}`. Here, the :math:`\\beta` signify the weights given to each of the topics globally. Also, each
-    factor :math:`\\theta_{j,i}` is distributed according to :math:`G_{j}`, i.e., it takes on the value of
+    :math:`\Phi_{k}`. Here, the :math:`\beta` signify the weights given to each of the topics globally. Also, each
+    factor :math:`\theta_{j,i}` is distributed according to :math:`G_{j}`, i.e., it takes on the value of
     :math:`\Phi_{k}` with probability :math:`\pi _{jt}`. :math:`C_{j,t}` is an indicator variable whose value `k`
     signifies the index of :math:`\Phi`. This helps to map :math:`\psi_{jt}` to :math:`\Phi_{k}`.
 
-    The top level (`corpus` level) stick proportions correspond the values of :math:`\\beta`,
+    The top level (`corpus` level) stick proportions correspond the values of :math:`\beta`,
     bottom level (`document` level) stick proportions correspond to the values of :math:`\pi`.
-    The truncation level for the corpus (`K`) and document (`T`) corresponds to the number of :math:`\\beta`
+    The truncation level for the corpus (`K`) and document (`T`) corresponds to the number of :math:`\beta`
     and :math:`\pi` which are in existence.
 
     Now, whenever coordinate ascent updates are to be performed, they happen at two level. The document level as well
     as corpus level.
 
     At document level, we update the following:
 
-    #. The parameters to the document level sticks, i.e, a and b parameters of :math:`\\beta` distribution of the
+    #. The parameters to the document level sticks, i.e, a and b parameters of :math:`\beta` distribution of the
        variable :math:`\pi _{jt}`.
     #. The parameters to per word topic indicators, :math:`Z_{j,n}`. Here :math:`Z_{j,n}` selects topic parameter
        :math:`\psi_{jt}`.
     #. The parameters to per document topic indices :math:`\Phi_{jtk}`.
 
     At corpus level, we update the following:
 
-    #. The parameters to the top level sticks, i.e., the parameters of the :math:`\\beta` distribution for the
-       corpus level :math:`\\beta`, which signify the topic distribution at corpus level.
+    #. The parameters to the top level sticks, i.e., the parameters of the :math:`\beta` distribution for the
+       corpus level :math:`\beta`, which signify the topic distribution at corpus level.
     #. The parameters to the topics :math:`\Phi_{k}`.
 
     Now coming on to the steps involved, procedure for online variational inference for the Hdp model is as follows:
@@ -261,14 +261,14 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     Attributes
     ----------
     lda_alpha : numpy.ndarray
-        Same as :math:`\\alpha` from :class:`gensim.models.ldamodel.LdaModel`.
+        Same as :math:`\alpha` from :class:`gensim.models.ldamodel.LdaModel`.
     lda_beta : numpy.ndarray
-        Same as :math:`\\beta` from from :class:`gensim.models.ldamodel.LdaModel`.
+        Same as :math:`\beta` from from :class:`gensim.models.ldamodel.LdaModel`.
     m_D : int
         Number of documents in the corpus.
     m_Elogbeta : numpy.ndarray:
-        Stores value of dirichlet expectation, i.e., compute :math:`E[log \\theta]` for a vector
-        :math:`\\theta \sim Dir(\\alpha)`.
+        Stores value of dirichlet expectation, i.e., compute :math:`E[log \theta]` for a vector
+        :math:`\theta \sim Dir(\alpha)`.
     m_lambda : {numpy.ndarray, float}
         Drawn samples from the parameterized gamma distribution.
     m_lambda_sum : {numpy.ndarray, float}
@@ -280,7 +280,7 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     m_rhot : float
         Assigns weight to the information obtained from the mini-chunk and its value it between 0 and 1.
     m_status_up_to_date : bool
-        Flag to indicate whether `lambda `and :math:`E[log \\theta]` have been updated if True, otherwise - not.
+        Flag to indicate whether `lambda `and :math:`E[log \theta]` have been updated if True, otherwise - not.
     m_timestamp : numpy.ndarray
         Helps to keep track and perform lazy updates on lambda.
     m_updatect : int
@@ -510,13 +510,13 @@ def update_finished(self, start_time, chunks_processed, docs_processed):
         """
         return (
             # chunk limit reached
-            (self.max_chunks and chunks_processed == self.max_chunks) or
+            (self.max_chunks and chunks_processed == self.max_chunks)
 
             # time limit reached
-            (self.max_time and time.clock() - start_time > self.max_time) or
+            or (self.max_time and time.clock() - start_time > self.max_time)
 
             # no limits and whole corpus has been processed once
-            (not self.max_chunks and not self.max_time and docs_processed >= self.m_D))
+            or (not self.max_chunks and not self.max_time and docs_processed >= self.m_D))
 
     def update_chunk(self, chunk, update=True, opt_o=True):
         """Performs lazy update on necessary columns of lambda and variational inference for documents in the chunk.

diff --git a/gensim/models/ldamulticore.py b/gensim/models/ldamulticore.py
@@ -276,9 +276,9 @@ def process_result_queue(force=False):
                 if (force and merged_new and queue_size[0] == 0) or (not self.batch and (other.numdocs >= updateafter)):
                     self.do_mstep(rho(), other, pass_ > 0)
                     other.reset()
-                    if self.eval_every is not None and \
-                            ((force and queue_size[0] == 0) or
-                                 (self.eval_every != 0 and (self.num_updates / updateafter) % self.eval_every == 0)):
+                    if self.eval_every is not None \
+                            and ((force and queue_size[0] == 0)
+                            or (self.eval_every != 0 and (self.num_updates / updateafter) % self.eval_every == 0)):
                         self.log_perplexity(chunk, total_docs=lencorpus)
 
             chunk_stream = utils.grouper(corpus, self.chunksize, as_numpy=chunks_as_numpy)