From 1a1fc442bd9b60d9ebe649070f7876bf454053d3 Mon Sep 17 00:00:00 2001 From: horpto Date: Wed, 18 Oct 2017 12:05:40 +0500 Subject: [PATCH] Fix duplication and wrong markup in docs (#1633) * Fixed build of docs: - duplication of the citates from word2vec and doc2vec, - wrong markup of lists in the scripts, - some typos. * Add missing 'tensor' word --- docs/src/scripts/word2vec2tensor.rst | 6 +++--- gensim/models/doc2vec.py | 4 ++-- gensim/models/word2vec.py | 6 +++--- gensim/scripts/glove2word2vec.py | 6 ++++-- gensim/scripts/word2vec2tensor.py | 14 ++++++++------ 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/docs/src/scripts/word2vec2tensor.rst b/docs/src/scripts/word2vec2tensor.rst index 62b1c109d9..6fb4e33482 100644 --- a/docs/src/scripts/word2vec2tensor.rst +++ b/docs/src/scripts/word2vec2tensor.rst @@ -1,8 +1,8 @@ -:mod:`scripts.word2vec2tensor` -- -================================== +:mod:`scripts.word2vec2tensor` -- Convert the word2vec format to Tensorflow 2D tensor +===================================================================================== .. automodule:: gensim.scripts.word2vec2tensor - :synopsis: + :synopsis: Convert the word2vec format to Tensorflow 2D tensor :members: :inherited-members: :undoc-members: diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index f7cb7a0448..7cff025a50 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -7,7 +7,7 @@ """ Deep learning via the distributed memory and distributed bag of words models from -[1]_, using either hierarchical softmax or negative sampling [2]_ [3]_. See [tutorial]_ +[1]_, using either hierarchical softmax or negative sampling [2]_ [3]_. See [#tutorial]_ **Make sure you have a C compiler before installing gensim, to use optimized (compiled) doc2vec training** (70x speedup [blog]_). @@ -35,7 +35,7 @@ In Proceedings of NIPS, 2013. .. [blog] Optimizing word2vec in gensim, http://radimrehurek.com/2013/09/word2vec-in-python-part-two-optimizing/ -.. [tutorial] Doc2vec in gensim tutorial, https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/doc2vec-lee.ipynb +.. [#tutorial] Doc2vec in gensim tutorial, https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/doc2vec-lee.ipynb diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index ce7de6330c..20d58a7977 100644 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -1075,10 +1075,10 @@ def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor Note that you should specify total_sentences; we'll run into problems if you ask to score more than this number of sentences but it is inefficient to set the value too high. - See the article by [taddy]_ and the gensim demo at [deepir]_ for examples of how to use such scores in document classification. + See the article by [#taddy]_ and the gensim demo at [#deepir]_ for examples of how to use such scores in document classification. - .. [taddy] Taddy, Matt. Document Classification by Inversion of Distributed Language Representations, in Proceedings of the 2015 Conference of the Association of Computational Linguistics. - .. [deepir] https://github.com/piskvorky/gensim/blob/develop/docs/notebooks/deepir.ipynb + .. [#taddy] Taddy, Matt. Document Classification by Inversion of Distributed Language Representations, in Proceedings of the 2015 Conference of the Association of Computational Linguistics. + .. [#deepir] https://github.com/piskvorky/gensim/blob/develop/docs/notebooks/deepir.ipynb """ if FAST_VERSION < 0: diff --git a/gensim/scripts/glove2word2vec.py b/gensim/scripts/glove2word2vec.py index 0667440f80..88574acee0 100644 --- a/gensim/scripts/glove2word2vec.py +++ b/gensim/scripts/glove2word2vec.py @@ -8,9 +8,11 @@ """ USAGE: $ python -m gensim.scripts.glove2word2vec --input --output + Where: - : Input GloVe .txt file - : Desired name of output Word2vec .txt file + +* : Input GloVe .txt file. +* : Desired name of output Word2vec .txt file. This script is used to convert GloVe vectors in text format into the word2vec text format. The only difference between the two formats is an extra header line in word2vec, diff --git a/gensim/scripts/word2vec2tensor.py b/gensim/scripts/word2vec2tensor.py index ef1b017137..f8878aed79 100644 --- a/gensim/scripts/word2vec2tensor.py +++ b/gensim/scripts/word2vec2tensor.py @@ -9,20 +9,22 @@ USAGE: $ python -m gensim.scripts.word2vec2tensor --input --output [--binary] Where: - : Input Word2Vec model. - : 2D tensor TSV output file name prefix. - : Set True if Word2Vec model is binary. Defaults to False. + +* : Input Word2Vec model. +* : 2D tensor TSV output file name prefix. +* : Set True if Word2Vec model is binary. Defaults to False. Output: The script will create two TSV files. A 2d tensor format file, and a Word Embedding metadata file. Both files will - us the --output file name as prefix + use the --output file name as prefix. This script is used to convert the word2vec format to Tensorflow 2D tensor and metadata formats for Embedding Visualization To use the generated TSV 2D tensor and metadata file in the Projector Visualizer, please + 1) Open http://projector.tensorflow.org/. 2) Choose "Load Data" from the left menu. -3) Select "Choose file" in "Load a TSV file of vectors." and choose you local "_tensor.tsv" file -4) Select "Choose file" in "Load a TSV file of metadata." and choose you local "_metadata.tsv" file +3) Select "Choose file" in "Load a TSV file of vectors." and choose you local "_tensor.tsv" file. +4) Select "Choose file" in "Load a TSV file of metadata." and choose you local "_metadata.tsv" file. For more information about TensorBoard TSV format please visit: https://www.tensorflow.org/versions/master/how_tos/embedding_viz/