diff --git a/gensim/models/logentropy_model.py b/gensim/models/logentropy_model.py index bdc726d5fd..a79c685660 100644 --- a/gensim/models/logentropy_model.py +++ b/gensim/models/logentropy_model.py @@ -108,6 +108,9 @@ def initialize(self, corpus): self.n_docs, len(glob_freq), self.n_words ) logger.debug('iterating over corpus') + + # initialize doc_no2 index in case corpus is empty + doc_no2 = 0 for doc_no2, bow in enumerate(corpus): for key, freq in bow: p = (float(freq) / glob_freq[key]) * math.log(float(freq) / glob_freq[key]) diff --git a/gensim/test/test_logentropy_model.py b/gensim/test/test_logentropy_model.py index bc64f1b2d1..5613b1d896 100644 --- a/gensim/test/test_logentropy_model.py +++ b/gensim/test/test_logentropy_model.py @@ -19,9 +19,23 @@ class TestLogEntropyModel(unittest.TestCase): + TEST_CORPUS = [[(1, 1.0)], [], [(0, 0.5), (2, 1.0)], []] + def setUp(self): self.corpus_small = MmCorpus(datapath('test_corpus_small.mm')) self.corpus_ok = MmCorpus(datapath('test_corpus_ok.mm')) + self.corpus_empty = [] + + def test_generator_fail(self): + """Test creating a model using a generator as input; should fail.""" + def get_generator(test_corpus=TestLogEntropyModel.TEST_CORPUS): + for test_doc in test_corpus: + yield test_doc + self.assertRaises(ValueError, logentropy_model.LogEntropyModel, corpus=get_generator()) + + def test_empty_fail(self): + """Test creating a model using an empty input; should fail.""" + self.assertRaises(ValueError, logentropy_model.LogEntropyModel, corpus=self.corpus_empty) def testTransform(self): # create the transformation model