Skip to content

Commit

Permalink
[Issue-2670] Bug fix: Initialize doc_no2 because it is not set when c…
Browse files Browse the repository at this point in the history
…orpus' is empty (#2672)

* [Issue-2670] Bug fix: Initialize doc_no2 because it is not set when 'corpus' is empty

* [Issue-2670] Add: unittests should fail on invalid input (generator and empty corpus)

* [Issue-2670] Add: Fix unittest for generator

* [Issue-2670] Fix unittest tox:flake8 errors

* [Issue-2670] Fix: empty corpus def in unittest

* [Issue-2670] Fix: empty corpus and generator unittests

* [Issue-2670] Fix: empty corpus and generator unittests
  • Loading branch information
paulrigor authored and mpenkov committed Nov 23, 2019
1 parent e7c9f0e commit e391f0c
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 0 deletions.
3 changes: 3 additions & 0 deletions gensim/models/logentropy_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ def initialize(self, corpus):
self.n_docs, len(glob_freq), self.n_words
)
logger.debug('iterating over corpus')

# initialize doc_no2 index in case corpus is empty
doc_no2 = 0
for doc_no2, bow in enumerate(corpus):
for key, freq in bow:
p = (float(freq) / glob_freq[key]) * math.log(float(freq) / glob_freq[key])
Expand Down
14 changes: 14 additions & 0 deletions gensim/test/test_logentropy_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,23 @@


class TestLogEntropyModel(unittest.TestCase):
TEST_CORPUS = [[(1, 1.0)], [], [(0, 0.5), (2, 1.0)], []]

def setUp(self):
self.corpus_small = MmCorpus(datapath('test_corpus_small.mm'))
self.corpus_ok = MmCorpus(datapath('test_corpus_ok.mm'))
self.corpus_empty = []

def test_generator_fail(self):
"""Test creating a model using a generator as input; should fail."""
def get_generator(test_corpus=TestLogEntropyModel.TEST_CORPUS):
for test_doc in test_corpus:
yield test_doc
self.assertRaises(ValueError, logentropy_model.LogEntropyModel, corpus=get_generator())

def test_empty_fail(self):
"""Test creating a model using an empty input; should fail."""
self.assertRaises(ValueError, logentropy_model.LogEntropyModel, corpus=self.corpus_empty)

def testTransform(self):
# create the transformation model
Expand Down

0 comments on commit e391f0c

Please sign in to comment.