diff --git a/gensim/matutils.py b/gensim/matutils.py index 7879431cec..b36f2fcb5a 100644 --- a/gensim/matutils.py +++ b/gensim/matutils.py @@ -778,7 +778,7 @@ def cossim(vec1, vec2): @deprecated( "Function will be removed in 4.0.0, use " + - "gensim.models.term_similarity.SparseTermSimilarityMatrix.inner_product instead") + "gensim.similarities.termsim.SparseTermSimilarityMatrix.inner_product instead") def softcossim(vec1, vec2, similarity_matrix): """Get Soft Cosine Measure between two vectors given a term similarity matrix. @@ -812,7 +812,7 @@ def softcossim(vec1, vec2, similarity_matrix): -------- :meth:`gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.similarity_matrix` A term similarity matrix produced from term embeddings. - :func:`gensim.models.levenshtein.similarity_matrix` + :func:`gensim.similarities.levenshtein.similarity_matrix` A term similarity matrix produced from Levenshtein distances. :class:`gensim.similarities.docsim.SoftCosineSimilarity` A class for performing corpus-based similarity queries with Soft Cosine Measure. diff --git a/gensim/models/__init__.py b/gensim/models/__init__.py index fcfb507ad7..b1ef3ff367 100644 --- a/gensim/models/__init__.py +++ b/gensim/models/__init__.py @@ -21,10 +21,7 @@ from .ldaseqmodel import LdaSeqModel # noqa:F401 from .fasttext import FastText # noqa:F401 from .translation_matrix import TranslationMatrix, BackMappingTranslationMatrix # noqa:F401 -from .term_similarity import TermSimilarityIndex, UniformTermSimilarityIndex, SparseTermSimilarityMatrix # noqa:F401 -from .levenshtein import LevenshteinSimilarityIndex # noqa:F401 -from . import levenshtein # noqa:F401 from . import wrappers # noqa:F401 from . import deprecated # noqa:F401 diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index cd5b8c6200..2895e3b187 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -84,7 +84,7 @@ from scipy import stats from gensim.utils import deprecated from gensim.models.utils_any2vec import _save_word2vec_format, _load_word2vec_format, _compute_ngrams, _ft_hash -from gensim.models.term_similarity import TermSimilarityIndex, SparseTermSimilarityMatrix +from gensim.similarities.termsim import TermSimilarityIndex, SparseTermSimilarityMatrix logger = logging.getLogger(__name__) @@ -538,7 +538,7 @@ def similarity_matrix(self, dictionary, tfidf=None, threshold=0.0, exponent=2.0, The Soft Cosine Measure. :class:`gensim.similarities.docsim.SoftCosineSimilarity` A class for performing corpus-based similarity queries with Soft Cosine Measure. - :func:`gensim.models.levenshtein.similarity_matrix` + :func:`gensim.similarities.levenshtein.similarity_matrix` A term similarity matrix produced from Levenshtein distances. @@ -1079,7 +1079,7 @@ class WordEmbeddingSimilarityIndex(TermSimilarityIndex): See Also -------- - :class:`~gensim.models.term_similarity.SparseTermSimilarityMatrix` + :class:`~gensim.similarities.termsim.SparseTermSimilarityMatrix` Build a term similarity matrix and compute the Soft Cosine Measure. """ diff --git a/gensim/similarities/__init__.py b/gensim/similarities/__init__.py index 52cbad43e7..da2257da32 100644 --- a/gensim/similarities/__init__.py +++ b/gensim/similarities/__init__.py @@ -4,3 +4,7 @@ # bring classes directly into package namespace, to save some typing from .docsim import Similarity, MatrixSimilarity, SparseMatrixSimilarity, SoftCosineSimilarity, WmdSimilarity # noqa:F401 +from .termsim import TermSimilarityIndex, UniformTermSimilarityIndex, SparseTermSimilarityMatrix # noqa:F401 +from .levenshtein import LevenshteinSimilarityIndex # noqa:F401 + +from . import levenshtein # noqa:F401 diff --git a/gensim/similarities/docsim.py b/gensim/similarities/docsim.py index 7b9d056b2b..26ed1e1a7d 100755 --- a/gensim/similarities/docsim.py +++ b/gensim/similarities/docsim.py @@ -875,7 +875,7 @@ class SoftCosineSimilarity(interfaces.SimilarityABC): """ @deprecated( "Method will be removed in 4.0.0, use " + - "gensim.models.term_similarity.SparseTermSimilarityMatrix.inner_product instead") + "gensim.similarities.termsim.SparseTermSimilarityMatrix.inner_product instead") def __init__(self, corpus, similarity_matrix, num_best=None, chunksize=256): """ @@ -895,7 +895,7 @@ def __init__(self, corpus, similarity_matrix, num_best=None, chunksize=256): -------- :meth:`gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.similarity_matrix` A term similarity matrix produced from term embeddings. - :func:`gensim.models.levenshtein.similarity_matrix` + :func:`gensim.similarities.levenshtein.similarity_matrix` A term similarity matrix produced from Levenshtein distances. :func:`gensim.matutils.softcossim` The Soft Cosine Measure. @@ -916,13 +916,13 @@ def __init__(self, corpus, similarity_matrix, num_best=None, chunksize=256): @deprecated( "Method will be removed in 4.0.0, use " + - "gensim.models.term_similarity.SparseTermSimilarityMatrix.inner_product instead") + "gensim.similarities.termsim.SparseTermSimilarityMatrix.inner_product instead") def __len__(self): return len(self.corpus) @deprecated( "Method will be removed in 4.0.0, use " + - "gensim.models.term_similarity.SparseTermSimilarityMatrix.inner_product instead") + "gensim.similarities.termsim.SparseTermSimilarityMatrix.inner_product instead") def get_similarities(self, query): """Get similarity between `query` and current index instance. @@ -969,7 +969,7 @@ def get_similarities(self, query): @deprecated( "Method will be removed in 4.0.0, use " + - "gensim.models.term_similarity.SparseTermSimilarityMatrix.inner_product instead") + "gensim.similarities.termsim.SparseTermSimilarityMatrix.inner_product instead") def __str__(self): return "%s<%i docs, %i features>" % (self.__class__.__name__, len(self), self.similarity_matrix.shape[0]) diff --git a/gensim/models/levenshtein.py b/gensim/similarities/levenshtein.py similarity index 95% rename from gensim/models/levenshtein.py rename to gensim/similarities/levenshtein.py index fd09740e0b..07bab287ae 100644 --- a/gensim/models/levenshtein.py +++ b/gensim/similarities/levenshtein.py @@ -15,7 +15,7 @@ from Levenshtein import distance from numpy import float32 as REAL -from gensim.models.term_similarity import TermSimilarityIndex, SparseTermSimilarityMatrix +from gensim.similarities.termsim import TermSimilarityIndex, SparseTermSimilarityMatrix from gensim.utils import deprecated logger = logging.getLogger(__name__) @@ -80,9 +80,9 @@ class LevenshteinSimilarityIndex(TermSimilarityIndex): See Also -------- - :func:`gensim.models.levenshtein.levsim` + :func:`gensim.similarities.levenshtein.levsim` The Levenshtein similarity. - :class:`~gensim.models.term_similarity.SparseTermSimilarityMatrix` + :class:`~gensim.similarities.termsim.SparseTermSimilarityMatrix` Build a term similarity matrix and compute the Soft Cosine Measure. """ @@ -120,7 +120,7 @@ def most_similar(self, t1, topn=10): @deprecated( "Function will be deprecated in 4.0.0, use " + - "gensim.models.levenshtein.LevenshteinSimilarityIndex instead") + "gensim.similarities.levenshtein.LevenshteinSimilarityIndex instead") def similarity_matrix(dictionary, tfidf=None, threshold=0.0, alpha=1.8, beta=5.0, nonzero_limit=100, workers=1, dtype=REAL): """Constructs a term similarity matrix for computing Soft Cosine Measure. diff --git a/gensim/models/term_similarity.py b/gensim/similarities/termsim.py similarity index 97% rename from gensim/models/term_similarity.py rename to gensim/similarities/termsim.py index 527ca69379..66588fe5dd 100644 --- a/gensim/models/term_similarity.py +++ b/gensim/similarities/termsim.py @@ -27,7 +27,7 @@ class TermSimilarityIndex(SaveLoad): See Also -------- - :class:`~gensim.models.term_similarity.SparseTermSimilarityMatrix` + :class:`~gensim.similarities.termsim.SparseTermSimilarityMatrix` Build a term similarity matrix and compute the Soft Cosine Measure. """ @@ -67,7 +67,7 @@ class UniformTermSimilarityIndex(TermSimilarityIndex): See Also -------- - :class:`~gensim.models.term_similarity.SparseTermSimilarityMatrix` + :class:`~gensim.similarities.termsim.SparseTermSimilarityMatrix` Build a term similarity matrix and compute the Soft Cosine Measure. Notes @@ -93,7 +93,7 @@ class SparseTermSimilarityMatrix(SaveLoad): Parameters ---------- - source : :class:`~gensim.models.term_similarity.TermSimilarityIndex` or :class:`scipy.sparse.spmatrix` + source : :class:`~gensim.similarities.termsim.TermSimilarityIndex` or :class:`scipy.sparse.spmatrix` The source of the term similarity. Either a term similarity index that will be used for building the term similarity matrix, or an existing sparse term similarity matrix that will be encapsulated and stored in the matrix attribute. diff --git a/gensim/test/test_levenshtein.py b/gensim/test/test_levenshtein.py index f7ab4a6ca7..c8f04ad60a 100644 --- a/gensim/test/test_levenshtein.py +++ b/gensim/test/test_levenshtein.py @@ -6,7 +6,7 @@ # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html """ -Automated tests for checking the gensim.models.levenshtein module. +Automated tests for checking the gensim.similarities.levenshtein module. """ import logging diff --git a/gensim/test/test_term_similarity.py b/gensim/test/test_term_similarity.py index 6415c1a27d..9d880a2265 100644 --- a/gensim/test/test_term_similarity.py +++ b/gensim/test/test_term_similarity.py @@ -6,7 +6,7 @@ # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html """ -Automated tests for checking the gensim.models.term_similarity module. +Automated tests for checking the gensim.similarities.termsim module. """ import logging @@ -14,7 +14,8 @@ import unittest from gensim.corpora import Dictionary -from gensim.models import UniformTermSimilarityIndex, SparseTermSimilarityMatrix, TfidfModel +from gensim.models import TfidfModel +from gensim.similarities import UniformTermSimilarityIndex, SparseTermSimilarityMatrix import numpy as np from scipy.sparse import csc_matrix, csr_matrix