Skip to content

Commit

Permalink
rm unused stuff from miislita tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
sotte committed Jan 16, 2011
1 parent 5017514 commit cfc85ec
Showing 1 changed file with 5 additions and 26 deletions.
31 changes: 5 additions & 26 deletions src/gensim/test/test_miislita.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,31 +28,15 @@ def test_miislita_high_level(self):
with open(corpusName) as corpusfile:
texts = corpusfile.readlines()

#corpus_txt_filename = (corpusName)
#try:
# f = open(corpus_txt_filename, "r")
# try:
# texts = f.readlines()
# finally:
# f.close()
#except IOError:
# print 'File not found.'
# sys.exit(-1)

# get a dictionary and a corpus (LoL) objects. Save them
# filter texts with a stopwordlist
stoplist = set('for a of the and to in on'.split())
texts = [[word for word in doc.lower().split() if word not in stoplist]
for doc in texts]

# store the dictionary, for future reference; not really needed.
dictionary = corpora.Dictionary.fromDocuments(texts)
# store the dictionary, for future reference
dictionary.save(corpusName + '.dict')

# problem: not in the same order as the matrix in the miislita example
# TODO: do we need this?
print dictionary
print dictionary.token2id

corpusMiislita = [dictionary.doc2bow(text) for text in texts]

# create a corpus object (not LoL, but a scipy matrix). For this we
Expand All @@ -74,23 +58,18 @@ def test_miislita_high_level(self):
# compare to query
query = 'latent semantic indexing'
vec_bow = dictionary.doc2bow(query.lower().split())
# convert the query to LSI space
# TODO: unused
vec_tfidf = tfidf[vec_bow]

# similarities, ordered
# perform a similarity query against the corpus
sims_tfidf = index_tfidf[vec_bow]

# NOTE: it does not matter if we use the raw counts (vec_bow) or the
# tfidf counts for the query here (vec_tfidf). The resulting cosines
# are the same.
#sims_tfidf = sorted(list(enumerate(sims_tfidf)), key=lambda item:
# sims_tfidf = sorted(list(enumerate(sims_tfidf)), key=lambda item:
# -item[1])

print sims_tfidf # success

# TODO: what exactly do we expect here?
#self.assertTrue(False)
# for the expected results see the acticle
expected = [0, 0.2560, 0.7022, 0.1524, 0.3334]
for i, value in enumerate(expected):
self.assertAlmostEqual(sims_tfidf[i], value, 2)
Expand Down

0 comments on commit cfc85ec

Please sign in to comment.