From 30528a5c01de9c2eb0b2f4d5cf7351adace369d1 Mon Sep 17 00:00:00 2001
From: horpto <__Singleton__@hackerdom.ru>
Date: Tue, 11 Dec 2018 21:25:54 +0500
Subject: [PATCH] Fix flake8 warnings W605, W504 (#2256)

* Fix flake8 warnings W605

* fix W504

* pin flake8-rst (avoid issue from 0.5.0)
---
 gensim/corpora/wikicorpus.py                  |  6 +--
 gensim/models/atmodel.py                      |  6 +--
 gensim/models/base_any2vec.py                 |  4 +-
 gensim/models/coherencemodel.py               |  6 +--
 gensim/models/deprecated/doc2vec.py           |  8 ++--
 gensim/models/deprecated/fasttext.py          |  8 ++--
 gensim/models/deprecated/old_saveload.py      |  4 +-
 gensim/models/deprecated/word2vec.py          |  8 ++--
 gensim/models/doc2vec.py                      |  8 ++--
 gensim/models/fasttext.py                     |  8 ++--
 gensim/models/hdpmodel.py                     | 40 +++++++++----------
 gensim/models/ldamulticore.py                 |  6 +--
 gensim/models/ldaseqmodel.py                  |  2 +-
 gensim/models/logentropy_model.py             |  6 +--
 gensim/models/normmodel.py                    |  6 +--
 gensim/models/phrases.py                      | 10 ++---
 gensim/models/tfidfmodel.py                   |  8 ++--
 gensim/models/word2vec.py                     |  8 ++--
 gensim/models/wrappers/ldamallet.py           |  4 +-
 gensim/summarization/mz_entropy.py            |  6 +--
 gensim/test/test_corpora.py                   |  2 +-
 gensim/test/test_doc2vec.py                   |  4 +-
 gensim/test/test_keyedvectors.py              |  3 +-
 gensim/test/test_matutils.py                  |  4 +-
 .../direct_confirmation_measure.py            | 10 ++---
 gensim/topic_coherence/segmentation.py        |  8 ++--
 gensim/utils.py                               |  4 +-
 tox.ini                                       |  2 +-
 28 files changed, 99 insertions(+), 100 deletions(-)

diff --git a/gensim/corpora/wikicorpus.py b/gensim/corpora/wikicorpus.py
index 8cc5ea58a7..a7302fe5c0 100644
--- a/gensim/corpora/wikicorpus.py
+++ b/gensim/corpora/wikicorpus.py
@@ -81,7 +81,7 @@
 """Capture interlinks text and article linked"""
 RE_P17 = re.compile(
     r'(\n.{0,4}((bgcolor)|(\d{0,1}[ ]?colspan)|(rowspan)|(style=)|(class=)|(align=)|(scope=))(.*))|'
-    '(^.{0,2}((bgcolor)|(\d{0,1}[ ]?colspan)|(rowspan)|(style=)|(class=)|(align=))(.*))',
+    r'(^.{0,2}((bgcolor)|(\d{0,1}[ ]?colspan)|(rowspan)|(style=)|(class=)|(align=))(.*))',
     re.UNICODE
 )
 """Table markup"""
@@ -143,8 +143,8 @@ def filter_example(elem, text, *args, **kwargs):
     # regex is in the function call so that we do not pollute the wikicorpus
     # namespace do not do this in production as this function is called for
     # every element in the wiki dump
-    _regex_de_excellent = re.compile('.*\{\{(Exzellent.*?)\}\}[\s]*', flags=re.DOTALL)
-    _regex_de_featured = re.compile('.*\{\{(Lesenswert.*?)\}\}[\s]*', flags=re.DOTALL)
+    _regex_de_excellent = re.compile(r'.*\{\{(Exzellent.*?)\}\}[\s]*', flags=re.DOTALL)
+    _regex_de_featured = re.compile(r'.*\{\{(Lesenswert.*?)\}\}[\s]*', flags=re.DOTALL)
 
     if text is None:
         return False
diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py
index a60e657788..e3ca00f7d7 100755
--- a/gensim/models/atmodel.py
+++ b/gensim/models/atmodel.py
@@ -376,14 +376,14 @@ def extend_corpus(self, corpus):
             self.corpus.extend(corpus)
 
     def compute_phinorm(self, expElogthetad, expElogbetad):
-        """Efficiently computes the normalizing factor in phi.
+        r"""Efficiently computes the normalizing factor in phi.
 
         Parameters
         ----------
         expElogthetad: numpy.ndarray
             Value of variational distribution :math:`q(\theta|\gamma)`.
         expElogbetad: numpy.ndarray
-            Value of variational distribution :math:`q(\\beta|\lambda)`.
+            Value of variational distribution :math:`q(\beta|\lambda)`.
 
         Returns
         -------
@@ -888,7 +888,7 @@ def rho():
                 del other
 
     def bound(self, chunk, chunk_doc_idx=None, subsample_ratio=1.0, author2doc=None, doc2author=None):
-        """Estimate the variational bound of documents from `corpus`.
+        r"""Estimate the variational bound of documents from `corpus`.
 
         :math:`\mathbb{E_{q}}[\log p(corpus)] - \mathbb{E_{q}}[\log q(corpus)]`
 
diff --git a/gensim/models/base_any2vec.py b/gensim/models/base_any2vec.py
index bd74a5a6bd..d72301dccd 100644
--- a/gensim/models/base_any2vec.py
+++ b/gensim/models/base_any2vec.py
@@ -5,7 +5,7 @@
 # Copyright (C) 2018 RaRe Technologies s.r.o.
 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
 
-"""This module contains base classes required for implementing \*2vec algorithms.
+r"""This module contains base classes required for implementing \*2vec algorithms.
 
 The class hierarchy is designed to facilitate adding more concrete implementations for creating embeddings.
 In the most general case, the purpose of this class is to transform an arbitrary representation to a numerical vector
@@ -56,7 +56,7 @@
 
 
 class BaseAny2VecModel(utils.SaveLoad):
-    """Base class for training, using and evaluating \*2vec model.
+    r"""Base class for training, using and evaluating \*2vec model.
 
     Contains implementation for multi-threaded training. The purpose of this class is to provide a
     reference interface for concrete embedding implementations, whether the input space is a corpus
diff --git a/gensim/models/coherencemodel.py b/gensim/models/coherencemodel.py
index fd42f53359..a46414a1a5 100644
--- a/gensim/models/coherencemodel.py
+++ b/gensim/models/coherencemodel.py
@@ -460,9 +460,9 @@ def _relevant_ids_will_differ(self, new_topics):
         return not self._accumulator.relevant_ids.issuperset(new_set)
 
     def _topics_differ(self, new_topics):
-        return (new_topics is not None and
-                self._topics is not None and
-                not np.array_equal(new_topics, self._topics))
+        return (new_topics is not None
+                and self._topics is not None
+                and not np.array_equal(new_topics, self._topics))
 
     def _get_topics(self):
         """Internal helper function to return topics from a trained topic model."""
diff --git a/gensim/models/deprecated/doc2vec.py b/gensim/models/deprecated/doc2vec.py
index 8d8875affe..b841866b93 100644
--- a/gensim/models/deprecated/doc2vec.py
+++ b/gensim/models/deprecated/doc2vec.py
@@ -242,8 +242,8 @@ def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=N
     if doctag_locks is None:
         doctag_locks = model.docvecs.doctag_syn0_lockf
 
-    word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab and
-                   model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
+    word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab
+                   and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
 
     for pos, word in enumerate(word_vocabs):
         reduced_window = model.random.randint(model.window)  # `b` in the original doc2vec code
@@ -298,8 +298,8 @@ def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None,
     if doctag_locks is None:
         doctag_locks = model.docvecs.doctag_syn0_lockf
 
-    word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab and
-                   model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
+    word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab
+                   and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
     doctag_len = len(doctag_indexes)
     if doctag_len != model.dm_tag_count:
         return 0  # skip doc without expected number of doctag(s) (TODO: warn/pad?)
diff --git a/gensim/models/deprecated/fasttext.py b/gensim/models/deprecated/fasttext.py
index 47e7f1a6a8..836c66d4ca 100644
--- a/gensim/models/deprecated/fasttext.py
+++ b/gensim/models/deprecated/fasttext.py
@@ -148,8 +148,8 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None):
     """
     result = 0
     for sentence in sentences:
-        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                       model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
+        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                       and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
         for pos, word in enumerate(word_vocabs):
             reduced_window = model.random.randint(model.window)
             start = max(0, pos - model.window + reduced_window)
@@ -211,8 +211,8 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None):
     """
     result = 0
     for sentence in sentences:
-        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                       model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
+        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                       and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
         for pos, word in enumerate(word_vocabs):
             reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
             # now go over all words from the (reduced) window, predicting each one in turn
diff --git a/gensim/models/deprecated/old_saveload.py b/gensim/models/deprecated/old_saveload.py
index 44f4a5619d..c609dd5532 100644
--- a/gensim/models/deprecated/old_saveload.py
+++ b/gensim/models/deprecated/old_saveload.py
@@ -108,8 +108,8 @@ def _load_specials(self, fname, mmap, compress, subname):
         """
         def mmap_error(obj, filename):
             return IOError(
-                'Cannot mmap compressed object %s in file %s. ' % (obj, filename) +
-                'Use `load(fname, mmap=None)` or uncompress files manually.'
+                'Cannot mmap compressed object %s in file %s. ' % (obj, filename)
+                + 'Use `load(fname, mmap=None)` or uncompress files manually.'
             )
 
         for attrib in getattr(self, '__recursive_saveloads', []):
diff --git a/gensim/models/deprecated/word2vec.py b/gensim/models/deprecated/word2vec.py
index 885d77ba66..d647bfb8f1 100644
--- a/gensim/models/deprecated/word2vec.py
+++ b/gensim/models/deprecated/word2vec.py
@@ -232,8 +232,8 @@ def train_batch_sg(model, sentences, alpha, work=None, compute_loss=False):
     """
     result = 0
     for sentence in sentences:
-        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                       model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
+        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                       and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
         for pos, word in enumerate(word_vocabs):
             reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
 
@@ -263,8 +263,8 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None, compute_loss
     """
     result = 0
     for sentence in sentences:
-        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                       model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
+        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                       and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
         for pos, word in enumerate(word_vocabs):
             reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
             start = max(0, pos - model.window + reduced_window)
diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py
index 6a6b3d3ae9..d9b905cb3b 100644
--- a/gensim/models/doc2vec.py
+++ b/gensim/models/doc2vec.py
@@ -227,8 +227,8 @@ def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=N
         if doctag_locks is None:
             doctag_locks = model.docvecs.doctag_syn0_lockf
 
-        word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab and
-                       model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
+        word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab
+                       and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
 
         for pos, word in enumerate(word_vocabs):
             reduced_window = model.random.randint(model.window)  # `b` in the original doc2vec code
@@ -314,8 +314,8 @@ def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None,
         if doctag_locks is None:
             doctag_locks = model.docvecs.doctag_syn0_lockf
 
-        word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab and
-                       model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
+        word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab
+                       and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
         doctag_len = len(doctag_indexes)
         if doctag_len != model.dm_tag_count:
             return 0  # skip doc without expected number of doctag(s) (TODO: warn/pad?)
diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py
index f7e9d65556..17b314fec9 100644
--- a/gensim/models/fasttext.py
+++ b/gensim/models/fasttext.py
@@ -140,8 +140,8 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None):
         """
         result = 0
         for sentence in sentences:
-            word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                           model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
+            word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                           and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
             for pos, word in enumerate(word_vocabs):
                 reduced_window = model.random.randint(model.window)
                 start = max(0, pos - model.window + reduced_window)
@@ -199,8 +199,8 @@ def train_batch_sg(model, sentences, alpha, work=None, neu1=None):
         """
         result = 0
         for sentence in sentences:
-            word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                           model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
+            word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                           and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
             for pos, word in enumerate(word_vocabs):
                 reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
                 # now go over all words from the (reduced) window, predicting each one in turn
diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py
index 3ff2a508d2..3c24fb4561 100755
--- a/gensim/models/hdpmodel.py
+++ b/gensim/models/hdpmodel.py
@@ -72,7 +72,7 @@
 
 
 def expect_log_sticks(sticks):
-    """For stick-breaking hdp, get the :math:`\mathbb{E}[log(sticks)]`.
+    r"""For stick-breaking hdp, get the :math:`\mathbb{E}[log(sticks)]`.
 
     Parameters
     ----------
@@ -97,7 +97,7 @@ def expect_log_sticks(sticks):
 
 
 def lda_e_step(doc_word_ids, doc_word_counts, alpha, beta, max_iter=100):
-    """Performs EM-iteration on a single document for calculation of likelihood for a maximum iteration of `max_iter`.
+    r"""Performs EM-iteration on a single document for calculation of likelihood for a maximum iteration of `max_iter`.
 
     Parameters
     ----------
@@ -115,7 +115,7 @@ def lda_e_step(doc_word_ids, doc_word_counts, alpha, beta, max_iter=100):
     Returns
     -------
     (numpy.ndarray, numpy.ndarray)
-        Computed (:math:`likelihood`, :math:`\\gamma`).
+        Computed (:math:`likelihood`, :math:`\gamma`).
 
     """
     gamma = np.ones(len(alpha))
@@ -172,7 +172,7 @@ def set_zero(self):
 
 
 class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
-    """`Hierarchical Dirichlet Process model <http://jmlr.csail.mit.edu/proceedings/papers/v15/wang11a/wang11a.pdf>`_
+    r"""`Hierarchical Dirichlet Process model <http://jmlr.csail.mit.edu/proceedings/papers/v15/wang11a/wang11a.pdf>`_
 
     Topic models promise to help summarize and organize large archives of texts that cannot be easily analyzed by hand.
     Hierarchical Dirichlet process (HDP) is a powerful mixed-membership model for the unsupervised analysis of grouped
@@ -194,7 +194,7 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     For this assume that there is a restaurant franchise (`corpus`) which has a large number of restaurants
     (`documents`, `j`) under it. They have a global menu of dishes (`topics`, :math:`\Phi_{k}`) which they serve.
     Also, a single dish (`topic`, :math:`\Phi_{k}`) is only served at a single table `t` for all the customers
-    (`words`, :math:`\\theta_{j,i}`) who sit at that table.
+    (`words`, :math:`\theta_{j,i}`) who sit at that table.
     So, when a customer enters the restaurant he/she has the choice to make where he/she wants to sit.
     He/she can choose to sit at a table where some customers are already sitting , or he/she can choose to sit
     at a new table. Here the probability of choosing each option is not same.
@@ -213,14 +213,14 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     share the same set of atoms, :math:`\Phi_{k}`, and only the atom weights :math:`\pi _{jt}` differs.
 
     There will be multiple document-level atoms :math:`\psi_{jt}` which map to the same corpus-level atom
-    :math:`\Phi_{k}`. Here, the :math:`\\beta` signify the weights given to each of the topics globally. Also, each
-    factor :math:`\\theta_{j,i}` is distributed according to :math:`G_{j}`, i.e., it takes on the value of
+    :math:`\Phi_{k}`. Here, the :math:`\beta` signify the weights given to each of the topics globally. Also, each
+    factor :math:`\theta_{j,i}` is distributed according to :math:`G_{j}`, i.e., it takes on the value of
     :math:`\Phi_{k}` with probability :math:`\pi _{jt}`. :math:`C_{j,t}` is an indicator variable whose value `k`
     signifies the index of :math:`\Phi`. This helps to map :math:`\psi_{jt}` to :math:`\Phi_{k}`.
 
-    The top level (`corpus` level) stick proportions correspond the values of :math:`\\beta`,
+    The top level (`corpus` level) stick proportions correspond the values of :math:`\beta`,
     bottom level (`document` level) stick proportions correspond to the values of :math:`\pi`.
-    The truncation level for the corpus (`K`) and document (`T`) corresponds to the number of :math:`\\beta`
+    The truncation level for the corpus (`K`) and document (`T`) corresponds to the number of :math:`\beta`
     and :math:`\pi` which are in existence.
 
     Now, whenever coordinate ascent updates are to be performed, they happen at two level. The document level as well
@@ -228,7 +228,7 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
 
     At document level, we update the following:
 
-    #. The parameters to the document level sticks, i.e, a and b parameters of :math:`\\beta` distribution of the
+    #. The parameters to the document level sticks, i.e, a and b parameters of :math:`\beta` distribution of the
        variable :math:`\pi _{jt}`.
     #. The parameters to per word topic indicators, :math:`Z_{j,n}`. Here :math:`Z_{j,n}` selects topic parameter
        :math:`\psi_{jt}`.
@@ -236,8 +236,8 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
 
     At corpus level, we update the following:
 
-    #. The parameters to the top level sticks, i.e., the parameters of the :math:`\\beta` distribution for the
-       corpus level :math:`\\beta`, which signify the topic distribution at corpus level.
+    #. The parameters to the top level sticks, i.e., the parameters of the :math:`\beta` distribution for the
+       corpus level :math:`\beta`, which signify the topic distribution at corpus level.
     #. The parameters to the topics :math:`\Phi_{k}`.
 
     Now coming on to the steps involved, procedure for online variational inference for the Hdp model is as follows:
@@ -261,14 +261,14 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     Attributes
     ----------
     lda_alpha : numpy.ndarray
-        Same as :math:`\\alpha` from :class:`gensim.models.ldamodel.LdaModel`.
+        Same as :math:`\alpha` from :class:`gensim.models.ldamodel.LdaModel`.
     lda_beta : numpy.ndarray
-        Same as :math:`\\beta` from from :class:`gensim.models.ldamodel.LdaModel`.
+        Same as :math:`\beta` from from :class:`gensim.models.ldamodel.LdaModel`.
     m_D : int
         Number of documents in the corpus.
     m_Elogbeta : numpy.ndarray:
-        Stores value of dirichlet expectation, i.e., compute :math:`E[log \\theta]` for a vector
-        :math:`\\theta \sim Dir(\\alpha)`.
+        Stores value of dirichlet expectation, i.e., compute :math:`E[log \theta]` for a vector
+        :math:`\theta \sim Dir(\alpha)`.
     m_lambda : {numpy.ndarray, float}
         Drawn samples from the parameterized gamma distribution.
     m_lambda_sum : {numpy.ndarray, float}
@@ -280,7 +280,7 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     m_rhot : float
         Assigns weight to the information obtained from the mini-chunk and its value it between 0 and 1.
     m_status_up_to_date : bool
-        Flag to indicate whether `lambda `and :math:`E[log \\theta]` have been updated if True, otherwise - not.
+        Flag to indicate whether `lambda `and :math:`E[log \theta]` have been updated if True, otherwise - not.
     m_timestamp : numpy.ndarray
         Helps to keep track and perform lazy updates on lambda.
     m_updatect : int
@@ -510,13 +510,13 @@ def update_finished(self, start_time, chunks_processed, docs_processed):
         """
         return (
             # chunk limit reached
-            (self.max_chunks and chunks_processed == self.max_chunks) or
+            (self.max_chunks and chunks_processed == self.max_chunks)
 
             # time limit reached
-            (self.max_time and time.clock() - start_time > self.max_time) or
+            or (self.max_time and time.clock() - start_time > self.max_time)
 
             # no limits and whole corpus has been processed once
-            (not self.max_chunks and not self.max_time and docs_processed >= self.m_D))
+            or (not self.max_chunks and not self.max_time and docs_processed >= self.m_D))
 
     def update_chunk(self, chunk, update=True, opt_o=True):
         """Performs lazy update on necessary columns of lambda and variational inference for documents in the chunk.
diff --git a/gensim/models/ldamulticore.py b/gensim/models/ldamulticore.py
index 248cc83abc..d154e367df 100644
--- a/gensim/models/ldamulticore.py
+++ b/gensim/models/ldamulticore.py
@@ -276,9 +276,9 @@ def process_result_queue(force=False):
                 if (force and merged_new and queue_size[0] == 0) or (not self.batch and (other.numdocs >= updateafter)):
                     self.do_mstep(rho(), other, pass_ > 0)
                     other.reset()
-                    if self.eval_every is not None and \
-                            ((force and queue_size[0] == 0) or
-                                 (self.eval_every != 0 and (self.num_updates / updateafter) % self.eval_every == 0)):
+                    if self.eval_every is not None \
+                            and ((force and queue_size[0] == 0)
+                            or (self.eval_every != 0 and (self.num_updates / updateafter) % self.eval_every == 0)):
                         self.log_perplexity(chunk, total_docs=lencorpus)
 
             chunk_stream = utils.grouper(corpus, self.chunksize, as_numpy=chunks_as_numpy)
diff --git a/gensim/models/ldaseqmodel.py b/gensim/models/ldaseqmodel.py
index 35c1b64a15..1c9e8a55d9 100644
--- a/gensim/models/ldaseqmodel.py
+++ b/gensim/models/ldaseqmodel.py
@@ -741,7 +741,7 @@ def update_zeta(self):
         return self.zeta
 
     def compute_post_variance(self, word, chain_variance):
-        """Get the variance, based on the `Variational Kalman Filtering approach for Approximate Inference (section 3.1)
+        r"""Get the variance, based on the `Variational Kalman Filtering approach for Approximate Inference (section 3.1)
         <https://mimno.infosci.cornell.edu/info6150/readings/dynamic_topic_models.pdf>`_.
 
         This function accepts the word to compute variance for, along with the associated sslm class object,
diff --git a/gensim/models/logentropy_model.py b/gensim/models/logentropy_model.py
index 6429cbf9e0..bdc726d5fd 100644
--- a/gensim/models/logentropy_model.py
+++ b/gensim/models/logentropy_model.py
@@ -25,7 +25,7 @@
 
 
 class LogEntropyModel(interfaces.TransformationABC):
-    """Objects of this class realize the transformation between word-document co-occurrence matrix (int)
+    r"""Objects of this class realize the transformation between word-document co-occurrence matrix (int)
     into a locally/globally weighted matrix (positive floats).
 
     This is done by a log entropy normalization, optionally normalizing the resulting documents to unit length.
@@ -35,9 +35,9 @@ class LogEntropyModel(interfaces.TransformationABC):
 
         local\_weight_{i,j} = log(frequency_{i,j} + 1)
 
-        P_{i,j} = \\frac{frequency_{i,j}}{\sum_j frequency_{i,j}}
+        P_{i,j} = \frac{frequency_{i,j}}{\sum_j frequency_{i,j}}
 
-        global\_weight_i = 1 + \\frac{\sum_j P_{i,j} * log(P_{i,j})}{log(number\_of\_documents + 1)}
+        global\_weight_i = 1 + \frac{\sum_j P_{i,j} * log(P_{i,j})}{log(number\_of\_documents + 1)}
 
         final\_weight_{i,j} = local\_weight_{i,j} * global\_weight_i
 
diff --git a/gensim/models/normmodel.py b/gensim/models/normmodel.py
index 23853cdafd..3292f6514e 100644
--- a/gensim/models/normmodel.py
+++ b/gensim/models/normmodel.py
@@ -15,15 +15,15 @@ class NormModel(interfaces.TransformationABC):
     """Objects of this class realize the explicit normalization of vectors (l1 and l2)."""
 
     def __init__(self, corpus=None, norm='l2'):
-        """Compute the l1 or l2 normalization by normalizing separately for each document in a corpus.
+        r"""Compute the l1 or l2 normalization by normalizing separately for each document in a corpus.
 
         If :math:`v_{i,j}` is the 'i'th component of the vector representing document 'j', the l1 normalization is
 
-        .. math:: l1_{i, j} = \\frac{v_{i,j}}{\sum_k |v_{k,j}|}
+        .. math:: l1_{i, j} = \frac{v_{i,j}}{\sum_k |v_{k,j}|}
 
         the l2 normalization is
 
-        .. math:: l2_{i, j} = \\frac{v_{i,j}}{\sqrt{\sum_k v_{k,j}^2}}
+        .. math:: l2_{i, j} = \frac{v_{i,j}}{\sqrt{\sum_k v_{k,j}^2}}
 
 
         Parameters
diff --git a/gensim/models/phrases.py b/gensim/models/phrases.py
index 9d8a5f5da6..ca8c1cfa3b 100644
--- a/gensim/models/phrases.py
+++ b/gensim/models/phrases.py
@@ -658,7 +658,7 @@ def __getitem__(self, sentence):
 
 
 def original_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count, corpus_word_count):
-    """Bigram scoring function, based on the original `Mikolov, et. al: "Distributed Representations
+    r"""Bigram scoring function, based on the original `Mikolov, et. al: "Distributed Representations
     of Words and Phrases and their Compositionality" <https://arxiv.org/abs/1310.4546>`_.
 
     Parameters
@@ -678,14 +678,14 @@ def original_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count
 
     Notes
     -----
-    Formula: :math:`\\frac{(bigram\_count - min\_count) * len\_vocab }{ (worda\_count * wordb\_count)}`.
+    Formula: :math:`\frac{(bigram\_count - min\_count) * len\_vocab }{ (worda\_count * wordb\_count)}`.
 
     """
     return (bigram_count - min_count) / worda_count / wordb_count * len_vocab
 
 
 def npmi_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count, corpus_word_count):
-    """Calculation NPMI score based on `"Normalized (Pointwise) Mutual Information in Colocation Extraction"
+    r"""Calculation NPMI score based on `"Normalized (Pointwise) Mutual Information in Colocation Extraction"
     by Gerlof Bouma <https://svn.spraakdata.gu.se/repos/gerlof/pub/www/Docs/npmi-pfd.pdf>`_.
 
     Parameters
@@ -705,8 +705,8 @@ def npmi_scorer(worda_count, wordb_count, bigram_count, len_vocab, min_count, co
 
     Notes
     -----
-    Formula: :math:`\\frac{ln(prop(word_a, word_b) / (prop(word_a)*prop(word_b)))}{ -ln(prop(word_a, word_b)}`,
-    where :math:`prob(word) = \\frac{word\_count}{corpus\_word\_count}`
+    Formula: :math:`\frac{ln(prop(word_a, word_b) / (prop(word_a)*prop(word_b)))}{ -ln(prop(word_a, word_b)}`,
+    where :math:`prob(word) = \frac{word\_count}{corpus\_word\_count}`
 
     """
     if bigram_count >= min_count:
diff --git a/gensim/models/tfidfmodel.py b/gensim/models/tfidfmodel.py
index 8f163b66c4..a4cbedcd22 100644
--- a/gensim/models/tfidfmodel.py
+++ b/gensim/models/tfidfmodel.py
@@ -83,8 +83,8 @@ def resolve_weights(smartirs):
 
 
 def df2idf(docfreq, totaldocs, log_base=2.0, add=0.0):
-    """Compute inverse-document-frequency for a term with the given document frequency `docfreq`:
-    :math:`idf = add + log_{log\_base} \\frac{totaldocs}{docfreq}`
+    r"""Compute inverse-document-frequency for a term with the given document frequency `docfreq`:
+    :math:`idf = add + log_{log\_base} \frac{totaldocs}{docfreq}`
 
     Parameters
     ----------
@@ -239,11 +239,11 @@ class TfidfModel(interfaces.TransformationABC):
     """
     def __init__(self, corpus=None, id2word=None, dictionary=None, wlocal=utils.identity,
                  wglobal=df2idf, normalize=True, smartirs=None, pivot=None, slope=0.65):
-        """Compute TF-IDF by multiplying a local component (term frequency) with a global component
+        r"""Compute TF-IDF by multiplying a local component (term frequency) with a global component
         (inverse document frequency), and normalizing the resulting documents to unit length.
         Formula for non-normalized weight of term :math:`i` in document :math:`j` in a corpus of :math:`D` documents
 
-        .. math:: weight_{i,j} = frequency_{i,j} * log_2 \\frac{D}{document\_freq_{i}}
+        .. math:: weight_{i,j} = frequency_{i,j} * log_2 \frac{D}{document\_freq_{i}}
 
         or, more generally
 
diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
index 098905420b..a961d6f004 100755
--- a/gensim/models/word2vec.py
+++ b/gensim/models/word2vec.py
@@ -193,8 +193,8 @@ def train_batch_sg(model, sentences, alpha, work=None, compute_loss=False):
         """
         result = 0
         for sentence in sentences:
-            word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                           model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
+            word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                           and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
             for pos, word in enumerate(word_vocabs):
                 reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
 
@@ -245,8 +245,8 @@ def train_batch_cbow(model, sentences, alpha, work=None, neu1=None, compute_loss
         result = 0
         for sentence in sentences:
             word_vocabs = [
-                model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
-                model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32
+                model.wv.vocab[w] for w in sentence if w in model.wv.vocab
+                and model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32
             ]
             for pos, word in enumerate(word_vocabs):
                 reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py
index 6639be5d8e..1c321d1269 100644
--- a/gensim/models/wrappers/ldamallet.py
+++ b/gensim/models/wrappers/ldamallet.py
@@ -5,7 +5,7 @@
 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
 
 
-"""Python wrapper for `Latent Dirichlet Allocation (LDA) <https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation>`_
+r"""Python wrapper for `Latent Dirichlet Allocation (LDA) <https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation>`_
 from `MALLET, the Java topic modelling toolkit <http://mallet.cs.umass.edu/>`_
 
 This module allows both LDA model estimation from a training corpus and inference of topic distribution on new,
@@ -250,7 +250,7 @@ def convert_input(self, corpus, infer=False, serialize_corpus=True):
         cmd = \
             self.mallet_path + \
             " import-file --preserve-case --keep-sequence " \
-            "--remove-stopwords --token-regex \"\S+\" --input %s --output %s"
+            "--remove-stopwords --token-regex \"\\S+\" --input %s --output %s"
         if infer:
             cmd += ' --use-pipe-from ' + self.fcorpusmallet()
             cmd = cmd % (self.fcorpustxt(), self.fcorpusmallet() + '.infer')
diff --git a/gensim/summarization/mz_entropy.py b/gensim/summarization/mz_entropy.py
index 11437f5c86..492ae7e79a 100644
--- a/gensim/summarization/mz_entropy.py
+++ b/gensim/summarization/mz_entropy.py
@@ -98,9 +98,9 @@ def marginal_prob(n, m):
            occurring m times in a given block"""
 
         return numpy.exp(
-            __log_combinations(n, m) +
-            __log_combinations(n_words - n, blocksize - m) -
-            __log_combinations(n_words, blocksize)
+            __log_combinations(n, m)
+            + __log_combinations(n_words - n, blocksize - m)
+            - __log_combinations(n_words, blocksize)
         )
 
     return numpy.frompyfunc(marginal_prob, 2, 1)
diff --git a/gensim/test/test_corpora.py b/gensim/test/test_corpora.py
index 8eb10faa0e..a9568e4508 100644
--- a/gensim/test/test_corpora.py
+++ b/gensim/test/test_corpora.py
@@ -786,7 +786,7 @@ def test_two_level_directory(self):
 
     def test_filename_filtering(self):
         dirpath = self.write_one_level('test1.log', 'test1.txt', 'test2.log', 'other1.log')
-        corpus = textcorpus.TextDirectoryCorpus(dirpath, pattern="test.*\.log")
+        corpus = textcorpus.TextDirectoryCorpus(dirpath, pattern=r"test.*\.log")
         filenames = list(corpus.iter_filepaths())
         expected = [os.path.join(dirpath, name) for name in ('test1.log', 'test2.log')]
         self.assertEqual(sorted(expected), sorted(filenames))
diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py
index d35b907800..d61adef085 100644
--- a/gensim/test/test_doc2vec.py
+++ b/gensim/test/test_doc2vec.py
@@ -340,8 +340,8 @@ def test_similarity_unseen_docs(self):
         model = doc2vec.Doc2Vec(min_count=1)
         model.build_vocab(corpus)
         self.assertTrue(
-            model.docvecs.similarity_unseen_docs(model, rome_str, rome_str) >
-            model.docvecs.similarity_unseen_docs(model, rome_str, car_str)
+            model.docvecs.similarity_unseen_docs(model, rome_str, rome_str)
+            > model.docvecs.similarity_unseen_docs(model, rome_str, car_str)
         )
 
     def model_sanity(self, model, keep_training=True):
diff --git a/gensim/test/test_keyedvectors.py b/gensim/test/test_keyedvectors.py
index 927b896c64..0259fea7af 100644
--- a/gensim/test/test_keyedvectors.py
+++ b/gensim/test/test_keyedvectors.py
@@ -38,8 +38,7 @@ def test_similarity_matrix(self):
         similarity_matrix = self.vectors.similarity_matrix(dictionary).todense()
         self.assertTrue((similarity_matrix.T == similarity_matrix).all())
         self.assertTrue(
-            (np.diag(similarity_matrix) ==
-             np.ones(similarity_matrix.shape[0])).all())
+            (np.diag(similarity_matrix) == np.ones(similarity_matrix.shape[0])).all())
 
         # checking that thresholding works as expected
         similarity_matrix = self.vectors.similarity_matrix(dictionary, threshold=0.45).todense()
diff --git a/gensim/test/test_matutils.py b/gensim/test/test_matutils.py
index 49988af296..cde90b50d9 100644
--- a/gensim/test/test_matutils.py
+++ b/gensim/test/test_matutils.py
@@ -60,7 +60,7 @@ def mean_absolute_difference(a, b):
 
 
 def dirichlet_expectation(alpha):
-    """For a vector :math:`\\theta \sim Dir(\\alpha)`, compute :math:`E[log \\theta]`.
+    r"""For a vector :math:`\theta \sim Dir(\alpha)`, compute :math:`E[log \theta]`.
 
     Parameters
     ----------
@@ -70,7 +70,7 @@ def dirichlet_expectation(alpha):
     Returns
     -------
     numpy.ndarray:
-        :math:`E[log \\theta]`
+        :math:`E[log \theta]`
 
     """
     if len(alpha.shape) == 1:
diff --git a/gensim/topic_coherence/direct_confirmation_measure.py b/gensim/topic_coherence/direct_confirmation_measure.py
index d7a7e5f464..710718f3af 100644
--- a/gensim/topic_coherence/direct_confirmation_measure.py
+++ b/gensim/topic_coherence/direct_confirmation_measure.py
@@ -17,8 +17,8 @@
 
 
 def log_conditional_probability(segmented_topics, accumulator, with_std=False, with_support=False):
-    """Calculate the log-conditional-probability measure which is used by coherence measures such as `U_mass`.
-    This is defined as :math:`m_{lc}(S_i) = log \\frac{P(W', W^{*}) + \epsilon}{P(W^{*})}`.
+    r"""Calculate the log-conditional-probability measure which is used by coherence measures such as `U_mass`.
+    This is defined as :math:`m_{lc}(S_i) = log \frac{P(W', W^{*}) + \epsilon}{P(W^{*})}`.
 
     Parameters
     ----------
@@ -124,7 +124,7 @@ def aggregate_segment_sims(segment_sims, with_std, with_support):
 
 
 def log_ratio_measure(segmented_topics, accumulator, normalize=False, with_std=False, with_support=False):
-    """Compute log ratio measure for `segment_topics`.
+    r"""Compute log ratio measure for `segment_topics`.
 
     Parameters
     ----------
@@ -146,12 +146,12 @@ def log_ratio_measure(segmented_topics, accumulator, normalize=False, with_std=F
     -----
     If `normalize=False`:
         Calculate the log-ratio-measure, popularly known as **PMI** which is used by coherence measures such as `c_v`.
-        This is defined as :math:`m_{lr}(S_i) = log \\frac{P(W', W^{*}) + \epsilon}{P(W') * P(W^{*})}`
+        This is defined as :math:`m_{lr}(S_i) = log \frac{P(W', W^{*}) + \epsilon}{P(W') * P(W^{*})}`
 
     If `normalize=True`:
         Calculate the normalized-log-ratio-measure, popularly knowns as **NPMI**
         which is used by coherence measures such as `c_v`.
-        This is defined as :math:`m_{nlr}(S_i) = \\frac{m_{lr}(S_i)}{-log(P(W', W^{*}) + \epsilon)}`
+        This is defined as :math:`m_{nlr}(S_i) = \frac{m_{lr}(S_i)}{-log(P(W', W^{*}) + \epsilon)}`
 
     Returns
     -------
diff --git a/gensim/topic_coherence/segmentation.py b/gensim/topic_coherence/segmentation.py
index d02f700547..94924c8a60 100644
--- a/gensim/topic_coherence/segmentation.py
+++ b/gensim/topic_coherence/segmentation.py
@@ -12,7 +12,7 @@
 
 
 def s_one_pre(topics):
-    """Performs segmentation on a list of topics.
+    r"""Performs segmentation on a list of topics.
 
     Notes
     -----
@@ -54,9 +54,9 @@ def s_one_pre(topics):
 
 
 def s_one_one(topics):
-    """Perform segmentation on a list of topics.
+    r"""Perform segmentation on a list of topics.
     Segmentation is defined as
-    :math:`s_{one} = {(W', W^{*}) | W' = {w_i}; W^{*} = {w_j}; w_{i}, w_{j} \in W; i \\neq j}`.
+    :math:`s_{one} = {(W', W^{*}) | W' = {w_i}; W^{*} = {w_j}; w_{i}, w_{j} \in W; i \neq j}`.
 
     Parameters
     ----------
@@ -96,7 +96,7 @@ def s_one_one(topics):
 
 
 def s_one_set(topics):
-    """Perform s_one_set segmentation on a list of topics.
+    r"""Perform s_one_set segmentation on a list of topics.
     Segmentation is defined as
     :math:`s_{set} = {(W', W^{*}) | W' = {w_i}; w_{i} \in W; W^{*} = W}`
 
diff --git a/gensim/utils.py b/gensim/utils.py
index 0359125db5..a5c7c94ab7 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -447,8 +447,8 @@ def _load_specials(self, fname, mmap, compress, subname):
         """
         def mmap_error(obj, filename):
             return IOError(
-                'Cannot mmap compressed object %s in file %s. ' % (obj, filename) +
-                'Use `load(fname, mmap=None)` or uncompress files manually.'
+                'Cannot mmap compressed object %s in file %s. ' % (obj, filename)
+                + 'Use `load(fname, mmap=None)` or uncompress files manually.'
             )
 
         for attrib in getattr(self, '__recursive_saveloads', []):
diff --git a/tox.ini b/tox.ini
index f380171659..c5446a8097 100644
--- a/tox.ini
+++ b/tox.ini
@@ -64,7 +64,7 @@ commands = flake8 gensim/ {posargs}
 
 [testenv:flake8-docs]
 recreate = True
-deps = flake8-rst >= 0.4.1
+deps = flake8-rst == 0.4.3
 
 commands = flake8-rst gensim/ docs/ {posargs}