Skip to content

Commit

Permalink
Use gensim.test.utils datapath() to construct paths to the test data
Browse files Browse the repository at this point in the history
Makes the code more readable and consistent with other tests.
  • Loading branch information
pabs3 committed Mar 20, 2022
1 parent fe79fbf commit 63e31a7
Showing 1 changed file with 9 additions and 9 deletions.
18 changes: 9 additions & 9 deletions gensim/test/test_lee.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from gensim import corpora, models, utils, matutils
from gensim.parsing.preprocessing import preprocess_documents, preprocess_string, DEFAULT_FILTERS

from gensim.test.utils import datapath

bg_corpus = None
corpus = None
Expand All @@ -45,24 +46,23 @@ def setUp(self):
"""setup lee test corpora"""
global bg_corpus, corpus, human_sim_vector, bg_corpus2, corpus2

pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
bg_corpus_file = 'lee_background.cor'
corpus_file = 'lee.cor'
sim_file = 'similarities0-1.txt'
bg_corpus_file = datapath('lee_background.cor')
corpus_file = datapath('lee.cor')
sim_file = datapath('similarities0-1.txt')

# read in the corpora
latin1 = partial(utils.to_unicode, encoding='latin1')
with utils.open(os.path.join(pre_path, bg_corpus_file), 'rb') as f:
with utils.open(bg_corpus_file, 'rb') as f:
bg_corpus = preprocess_documents(latin1(line) for line in f)
with utils.open(os.path.join(pre_path, corpus_file), 'rb') as f:
with utils.open(corpus_file, 'rb') as f:
corpus = preprocess_documents(latin1(line) for line in f)
with utils.open(os.path.join(pre_path, bg_corpus_file), 'rb') as f:
with utils.open(bg_corpus_file, 'rb') as f:
bg_corpus2 = [preprocess_string(latin1(s), filters=DEFAULT_FILTERS[:-1]) for s in f]
with utils.open(os.path.join(pre_path, corpus_file), 'rb') as f:
with utils.open(corpus_file, 'rb') as f:
corpus2 = [preprocess_string(latin1(s), filters=DEFAULT_FILTERS[:-1]) for s in f]

# read the human similarity data
sim_matrix = np.loadtxt(os.path.join(pre_path, sim_file))
sim_matrix = np.loadtxt(sim_file)
sim_m_size = np.shape(sim_matrix)[0]
human_sim_vector = sim_matrix[np.triu_indices(sim_m_size, 1)]

Expand Down

0 comments on commit 63e31a7

Please sign in to comment.