Skip to content

Commit

Permalink
Use gensim.test.utils datapath() to construct paths to the test data
Browse files Browse the repository at this point in the history
Makes the code more readable and consistent with other tests.
  • Loading branch information
pabs3 committed Apr 2, 2022
1 parent 4c941b4 commit ead2afa
Showing 1 changed file with 9 additions and 9 deletions.
18 changes: 9 additions & 9 deletions gensim/test/test_lee.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from gensim import corpora, models, utils, matutils
from gensim.parsing.preprocessing import preprocess_documents, preprocess_string, DEFAULT_FILTERS

from gensim.test.utils import datapath

bg_corpus = None
corpus = None
Expand All @@ -45,24 +46,23 @@ def setUp(self):
"""setup lee test corpora"""
global bg_corpus, corpus, human_sim_vector, bg_corpus2, corpus2

pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
bg_corpus_file = 'lee_background.cor'
corpus_file = 'lee.cor'
sim_file = 'similarities0-1.txt'
bg_corpus_file = datapath('lee_background.cor')
corpus_file = datapath('lee.cor')
sim_file = datapath('similarities0-1.txt')

# read in the corpora
latin1 = partial(utils.to_unicode, encoding='latin1')
with utils.open(os.path.join(pre_path, bg_corpus_file), 'rb') as f:
with utils.open(bg_corpus_file, 'rb') as f:
bg_corpus = preprocess_documents(latin1(line) for line in f)
with utils.open(os.path.join(pre_path, corpus_file), 'rb') as f:
with utils.open(corpus_file, 'rb') as f:
corpus = preprocess_documents(latin1(line) for line in f)
with utils.open(os.path.join(pre_path, bg_corpus_file), 'rb') as f:
with utils.open(bg_corpus_file, 'rb') as f:
bg_corpus2 = [preprocess_string(latin1(s), filters=DEFAULT_FILTERS[:-1]) for s in f]
with utils.open(os.path.join(pre_path, corpus_file), 'rb') as f:
with utils.open(corpus_file, 'rb') as f:
corpus2 = [preprocess_string(latin1(s), filters=DEFAULT_FILTERS[:-1]) for s in f]

# read the human similarity data
sim_matrix = np.loadtxt(os.path.join(pre_path, sim_file))
sim_matrix = np.loadtxt(sim_file)
sim_m_size = np.shape(sim_matrix)[0]
human_sim_vector = sim_matrix[np.triu_indices(sim_m_size, 1)]

Expand Down

0 comments on commit ead2afa

Please sign in to comment.