Skip to content

Commit

Permalink
changing tests accordingly
Browse files Browse the repository at this point in the history
  • Loading branch information
markroxor committed Oct 20, 2016
1 parent 6fcd75e commit c144bf3
Showing 1 changed file with 12 additions and 9 deletions.
21 changes: 12 additions & 9 deletions gensim/test/test_doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,11 @@ def test_int_doctags(self):
"""Test doc2vec doctag alternatives"""
corpus = DocsLeeCorpus()

model = doc2vec.Doc2Vec(min_count=1)
size = 300
model = doc2vec.Doc2Vec(min_count=1, size=size)
model.build_vocab(corpus)
self.assertEqual(len(model.docvecs.doctag_syn0), 300)
self.assertEqual(model.docvecs[0].shape, (300,))
self.assertEqual(len(model.docvecs.doctag_syn0), size)
self.assertEqual(model.docvecs[0].shape, (size,))
self.assertRaises(KeyError, model.__getitem__, '_*0')

def test_missing_string_doctag(self):
Expand All @@ -106,11 +107,13 @@ def test_string_doctags(self):
# force duplicated tags
corpus = corpus[0:10] + corpus

model = doc2vec.Doc2Vec(min_count=1)
size = 300
model = doc2vec.Doc2Vec(size=size, min_count=1)
model.build_vocab(corpus)
self.assertEqual(len(model.docvecs.doctag_syn0), 300)
self.assertEqual(model.docvecs[0].shape, (300,))
self.assertEqual(model.docvecs['_*0'].shape, (300,))

self.assertEqual(len(model.docvecs.doctag_syn0), size)
self.assertEqual(model.docvecs[0].shape, (size,))
self.assertEqual(model.docvecs['_*0'].shape, (size,))
self.assertTrue(all(model.docvecs['_*0'] == model.docvecs[0]))
self.assertTrue(max(d.offset for d in model.docvecs.doctags.values()) < len(model.docvecs.doctags))
self.assertTrue(max(model.docvecs._int_index(str_key) for str_key in model.docvecs.doctags.keys()) < len(model.docvecs.doctag_syn0))
Expand Down Expand Up @@ -168,15 +171,15 @@ def model_sanity(self, model):
def test_training(self):
"""Test doc2vec training."""
corpus = DocsLeeCorpus()
model = doc2vec.Doc2Vec(size=100, min_count=2, iter=20)
model = doc2vec.Doc2Vec(size=100, min_count=2, iter=20, window=8, sample=0.01, workers=1)
model.build_vocab(corpus)
self.assertEqual(model.docvecs.doctag_syn0.shape, (300, 100))
model.train(corpus)

self.model_sanity(model)

# build vocab and train in one step; must be the same as above
model2 = doc2vec.Doc2Vec(corpus, size=100, min_count=2, iter=20)
model2 = doc2vec.Doc2Vec(corpus, size=100, min_count=2, iter=20, window=8, sample=0.01, workers=1)
self.models_equal(model, model2)

def test_dbow_hs(self):
Expand Down

0 comments on commit c144bf3

Please sign in to comment.