piskvorky · menshikh-iv · Jul 5, 2017 · Jun 22, 2017 · Jun 23, 2017 · Jun 23, 2017
diff --git a/gensim/models/coherencemodel.py b/gensim/models/coherencemodel.py
@@ -14,8 +14,8 @@
 Implementation of this pipeline allows for the user to in essence "make" a
 coherence measure of his/her choice by choosing a method in each of the pipelines.
 
-.. [1] Michael Roeder, Andreas Both and Alexander Hinneburg. Exploring the space of topic
-coherence measures. http://svn.aksw.org/papers/2015/WSDM_Topic_Evaluation/public.pdf.
+.. [1] Michael Roeder, Andreas Both and Alexander Hinneburg. Exploring the space of topic coherence measures.
+`http://svn.aksw.org/papers/2015/WSDM_Topic_Evaluation/public.pdf.`
 """
 
 import logging
@@ -113,16 +113,18 @@ class CoherenceModel(interfaces.TransformationABC):
     def __init__(self, model=None, topics=None, texts=None, corpus=None, dictionary=None,
                  window_size=None, coherence='c_v', topn=10, processes=-1):
         """
-        Args:
-        ----
+        Args
+        ====
         model : Pre-trained topic model. Should be provided if topics is not provided.
                 Currently supports LdaModel, LdaMallet wrapper and LdaVowpalWabbit wrapper. Use 'topics'
                 parameter to plug in an as yet unsupported model.
-        topics : List of tokenized topics. If this is preferred over model, dictionary should be provided. eg::
-                 topics = [['human', 'machine', 'computer', 'interface'],
+        topics : List of tokenized topics. If this is preferred over model, dictionary should be provided.
+            eg :
+                        topics = [['human', 'machine', 'computer', 'interface'],
                                ['graph', 'trees', 'binary', 'widths']]
-        texts : Tokenized texts. Needed for coherence models that use sliding window based probability estimator, eg::
-                texts = [['system', 'human', 'system', 'eps'],
+        texts : Tokenized texts. Needed for coherence models that use sliding window based probability estimator,
+            eg :
+                        texts = [['system', 'human', 'system', 'eps'],
                              ['user', 'response', 'time'],
                              ['trees'],
                              ['graph', 'trees'],

diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
@@ -730,7 +730,7 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case
                             dummy4unknown=False):
         """
         Compute correlation of the model with human similarity judgments. `pairs` is a filename of a dataset where
-        lines are 3-tuples, each consisting of a word pair and a similarity value, separated by `delimiter'.
+        lines are 3-tuples, each consisting of a word pair and a similarity value, separated by `delimiter`.
         An example dataset is included in Gensim (test/test_data/wordsim353.tsv). More datasets can be found at
         http://technion.ac.il/~ira.leviant/MultilingualVSMdata.html or https://www.cl.cam.ac.uk/~fh295/simlex.html.
 
@@ -748,7 +748,7 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case
         and words pairs in the dataset. If there are multiple case variants of a single word, the vector for the first
         occurrence (also the most frequent if vocabulary is sorted) is taken.
 
-        Use `dummy4unknown=True' to produce zero-valued similarities for pairs with out-of-vocabulary words.
+        Use `dummy4unknown=True` to produce zero-valued similarities for pairs with out-of-vocabulary words.
         Otherwise (default False), these pairs are skipped entirely.
         """
         ok_vocab = [(w, self.vocab[w]) for w in self.index2word[:restrict_vocab]]

diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py
@@ -358,14 +358,14 @@ def malletmodel2ldamodel(mallet_model, gamma_threshold=0.001, iterations=50):
     training model weights (alpha, beta...) from a trained mallet model into the
     gensim model.
 
-    Args:
-    ----
+    Args
+    ====
     mallet_model : Trained mallet model
     gamma_threshold : To be used for inference in the new LdaModel.
     iterations : number of iterations to be used for inference in the new LdaModel.
 
-    Returns:
-    -------
+    Returns
+    =======
     model_gensim : LdaModel instance; copied gensim LdaModel
     """
     model_gensim = LdaModel(

diff --git a/gensim/models/wrappers/ldavowpalwabbit.py b/gensim/models/wrappers/ldavowpalwabbit.py
@@ -567,13 +567,13 @@ def vwmodel2ldamodel(vw_model, iterations=50):
     simply copying the training model weights (alpha, beta...) from a trained
     vwmodel into the gensim model.
 
-    Args:
-    ----
+    Args
+    ====
     vw_model : Trained vowpal wabbit model.
     iterations : Number of iterations to be used for inference of the new LdaModel.
 
-    Returns:
-    -------
+    Returns
+    =======
     model_gensim : LdaModel instance; copied gensim LdaModel.
     """
     model_gensim = LdaModel(

diff --git a/gensim/summarization/summarizer.py b/gensim/summarization/summarizer.py
@@ -184,10 +184,10 @@ def summarize(text, ratio=0.2, word_count=None, split=False):
 
     The length of the output can be specified using the ratio and
     word_count parameters:
-        ratio should be a number between 0 and 1 that determines the
+    ratio should be a number between 0 and 1 that determines the
     percentage of the number of sentences of the original text to be
     chosen for the summary (defaults at 0.2).
-        word_count determines how many words will the output contain.
+    word_count determines how many words will the output contain.
     If both parameters are provided, the ratio will be ignored.
     """
     # Gets a list of processed sentences.

diff --git a/gensim/topic_coherence/aggregation.py b/gensim/topic_coherence/aggregation.py
@@ -19,12 +19,12 @@ def arithmetic_mean(confirmed_measures):
     This functoin performs the arithmetic mean aggregation on the output obtained from
     the confirmation measure module.
 
-    Args:
-    ----
+    Args
+    ====
     confirmed_measures : list of calculated confirmation measure on each set in the segmented topics.
 
-    Returns:
-    -------
+    Returns
+    =======
     mean : Arithmetic mean of all the values contained in confirmation measures.
     """
     return np.mean(confirmed_measures)
diff --git a/gensim/topic_coherence/direct_confirmation_measure.py b/gensim/topic_coherence/direct_confirmation_measure.py
@@ -23,14 +23,14 @@ def log_conditional_probability(segmented_topics, accumulator):
     which is used by coherence measures such as U_mass.
     This is defined as: m_lc(S_i) = log[(P(W', W*) + e) / P(W*)]
 
-    Args:
-    ----
+    Args
+    ====
     segmented_topics : Output from the segmentation module of the segmented topics.
                        Is a list of list of tuples.
     accumulator: word occurrence accumulator from probability_estimation.
 
-    Returns:
-    -------
+    Returns
+    =======
     m_lc : List of log conditional probability measure for each topic.
     """
     m_lc = []
@@ -64,14 +64,14 @@ def log_ratio_measure(segmented_topics, accumulator, normalize=False):
         NPMI which is used by coherence measures such as c_v.
         This is defined as: m_nlr(S_i) = m_lr(S_i) / -log[P(W', W*) + e]
 
-    Args:
-    ----
+    Args
+    ====
     segmented topics : Output from the segmentation module of the segmented topics.
                        Is a list of list of tuples.
     accumulator: word occurrence accumulator from probability_estimation.
 
-    Returns:
-    -------
+    Returns
+    =======
     m_lr : List of log ratio measures for each topic.
     """
     m_lr = []

diff --git a/gensim/topic_coherence/indirect_confirmation_measure.py b/gensim/topic_coherence/indirect_confirmation_measure.py
@@ -41,25 +41,22 @@ def cosine_similarity(segmented_topics, accumulator, topics, measure='nlr', gamm
     _   _         _   _
     u = V(W') and w = V(W*) for the word sets of a pair S_i = (W', W*) indirect
                                                                 _     _
-    cosine measure is computed as the cosine similarity between u and w. The formula used is:
+    cosine measure is computed as the cosine similarity between u and w. The formula used is::
 
     m_{sim}_{(m, \gamma)}(W', W*) = s_{sim}(\vec{V}^{\,}_{m,\gamma}(W'), \vec{V}^{\,}_{m,\gamma}(W*))
 
     where each vector \vec{V}^{\,}_{m,\gamma}(W') = \Bigg \{{\sum_{w_{i} \in W'}^{ } m(w_{i}, w_{j})^{\gamma}}\Bigg \}_{j = 1,...,|W|}
 
-    Args:
-    ----
-    segmented_topics : Output from the segmentation module of the segmented topics.
-                       Is a list of list of tuples.
-    accumulator : Output from the probability_estimation module.
-                  Is an accumulator of word occurrences (see text_analysis module).
+    Args
+    ====
+    segmented_topics : Output from the segmentation module of the segmented topics. Is a list of list of tuples.
+    accumulator : Output from the probability_estimation module. Is an accumulator of word occurrences (see text_analysis module).
     topics : Topics obtained from the trained topic model.
-    measure : String. Direct confirmation measure to be used.
-              Supported values are "nlr" (normalized log ratio).
+    measure : String. Direct confirmation measure to be used. Supported values are "nlr" (normalized log ratio).
     gamma : Gamma value for computing W', W* vectors; default is 1.
 
-    Returns:
-    -------
+    Returns
+    =======
     s_cos_sim : list of indirect cosine similarity measure for each topic.
     """
     context_vectors = ContextVectorComputer(measure, topics, accumulator, gamma)

diff --git a/gensim/topic_coherence/probability_estimation.py b/gensim/topic_coherence/probability_estimation.py
@@ -22,13 +22,13 @@ def p_boolean_document(corpus, segmented_topics):
     Boolean document estimates the probability of a single word as the number
     of documents in which the word occurs divided by the total number of documents.
 
-    Args:
-    ----
+    Args
+    ====
     corpus : The corpus of documents.
     segmented_topics : Output from the segmentation of topics. Could be simply topics too.
 
-    Returns:
-    -------
+    Returns
+    =======
     accumulator : word occurrence accumulator instance that can be used to lookup token
                   frequencies and co-occurrence frequencies.
     """
@@ -43,15 +43,15 @@ def p_boolean_sliding_window(texts, segmented_topics, dictionary, window_size, p
     document  by copying the window content. Boolean document is applied to these virtual
     documents to compute word probabilities.
 
-    Args:
-    ----
+    Args
+    ====
     texts : List of string sentences.
     segmented_topics : Output from the segmentation of topics. Could be simply topics too.
     dictionary : Gensim dictionary mapping of the tokens and ids.
     window_size : Size of the sliding window. 110 found out to be the ideal size for large corpora.
 
-    Returns:
-    -------
+    Returns
+    =======
     accumulator : word occurrence accumulator instance that can be used to lookup token
                   frequencies and co-occurrence frequencies.
     """
@@ -67,11 +67,12 @@ def p_boolean_sliding_window(texts, segmented_topics, dictionary, window_size, p
 def unique_ids_from_segments(segmented_topics):
     """Return the set of all unique ids in a list of segmented topics.
 
-    Args:
-    ----
+    Args
+    ====
     segmented_topics: list of tuples of (word_id_set1, word_id_set2). Each word_id_set
                       is either a single integer, or a `numpy.ndarray` of integers.
-    Returns:
+    Returns
+    =======
     unique_ids : set of unique ids across all topic segments.
     """
     unique_ids = set()  # is a set of all the unique ids contained in topics.

diff --git a/gensim/topic_coherence/segmentation.py b/gensim/topic_coherence/segmentation.py
@@ -16,20 +16,19 @@
 def s_one_pre(topics):
     """
     This function performs s_one_pre segmentation on a list of topics.
-    s_one_pre segmentation is defined as: s_one_pre = {(W', W*) | W' = {w_i};
-                                                                  W* = {w_j}; w_i, w_j belongs to W; i > j}
+    s_one_pre segmentation is defined as: s_one_pre = {(W', W*) | W' = {w_i}; W* = {w_j}; w_i, w_j belongs to W; i > j}
     Example:
 
         >>> topics = [np.array([1, 2, 3]), np.array([4, 5, 6])]
         >>> s_one_pre(topics)
         [[(2, 1), (3, 1), (3, 2)], [(5, 4), (6, 4), (6, 5)]]
 
-    Args:
-    ----
+    Args
+    ====
     topics : list of topics obtained from an algorithm such as LDA. Is a list such as [array([ 9, 10, 11]), array([ 9, 10,  7]), ...]
 
-    Returns:
-    -------
+    Returns
+    =======
     s_one_pre : list of list of (W', W*) tuples for all unique topic ids
     """
     s_one_pre = []
@@ -46,20 +45,19 @@ def s_one_pre(topics):
 def s_one_one(topics):
     """
     This function performs s_one_one segmentation on a list of topics.
-    s_one_one segmentation is defined as: s_one_one = {(W', W*) | W' = {w_i};
-                                                                  W* = {w_j}; w_i, w_j belongs to W; i != j}
+    s_one_one segmentation is defined as: s_one_one = {(W', W*) | W' = {w_i}; W* = {w_j}; w_i, w_j belongs to W; i != j}
     Example:
 
         >>> topics = [np.array([1, 2, 3]), np.array([4, 5, 6])]
         >>> s_one_pre(topics)
         [[(1, 2), (1, 3), (2, 1), (2, 3), (3, 1), (3, 2)], [(4, 5), (4, 6), (5, 4), (5, 6), (6, 4), (6, 5)]]
 
-    Args:
-    ----
+    Args
+    ====
     topics : list of topics obtained from an algorithm such as LDA. Is a list such as [array([ 9, 10, 11]), array([ 9, 10,  7]), ...]
 
-    Returns:
-    -------
+    Returns
+    =======
     s_one_one : list of list of (W', W*) tuples for all unique topic ids
     """
     s_one_one = []
@@ -79,21 +77,20 @@ def s_one_one(topics):
 def s_one_set(topics):
     """
     This function performs s_one_set segmentation on a list of topics.
-    s_one_set segmentation is defined as: s_one_set = {(W', W*) | W' = {w_i}; w_i belongs to W;
-                                                                  W* = W}
+    s_one_set segmentation is defined as: s_one_set = {(W', W*) | W' = {w_i}; w_i belongs to W; W* = W}
     Example:
         >>> topics = [np.array([9, 10, 7])
         >>> s_one_set(topics)
         [[(9, array([ 9, 10,  7])),
           (10, array([ 9, 10,  7])),
           (7, array([ 9, 10,  7]))]]
 
-    Args:
-    ----
+    Args
+    ====
     topics : list of topics obtained from an algorithm such as LDA. Is a list such as [array([ 9, 10, 11]), array([ 9, 10,  7]), ...]
 
-    Returns:
-    -------
+    Returns
+    =======
     s_one_set : list of list of (W', W*) tuples for all unique topic ids.
     """
     s_one_set = []

diff --git a/gensim/utils.py b/gensim/utils.py
@@ -1208,14 +1208,16 @@ def strided_windows(ndarray, window_size):
            [4, 5, 6, 7, 8],
            [5, 6, 7, 8, 9]])
 
-    Args:
-    ----
+    Args
+    ====
     ndarray: either a numpy.ndarray or something that can be converted into one.
     window_size: sliding window size.
-    :param window_size:
-    :return: numpy.ndarray of the subsequences produced by sliding a window of the given size over
-             the `ndarray`. Since this uses striding, the individual arrays are views rather than
-             copies of `ndarray`. Changes to one view modifies the others and the original.
+
+    Returns
+    =======
+    numpy.ndarray of the subsequences produced by sliding a window of the given size over
+    the `ndarray`. Since this uses striding, the individual arrays are views rather than
+    copies of `ndarray`. Changes to one view modifies the others and the original.
     """
     ndarray = np.asarray(ndarray)
     if window_size == ndarray.shape[0]:
@@ -1234,13 +1236,13 @@ def iter_windows(texts, window_size, copy=False, ignore_below_size=True, include
     The windows produced are views of some subsequence of a text. To use deep copies
     instead, pass `copy=True`.
 
-    Args:
-    ----
+    Args
+    ====
     texts: List of string sentences.
     window_size: Size of sliding window.
     copy: False to use views of the texts (default) or True to produce deep copies.
     ignore_below_size: ignore documents that are not at least `window_size` in length (default behavior).
-                       If False, the documents below `window_size` will be yielded as the full document.
+    If False, the documents below `window_size` will be yielded as the full document.
 
     """
     for doc_num, document in enumerate(texts):