diff --git a/gensim/summarization/keywords.py b/gensim/summarization/keywords.py index db7c8a0dc7..2c85cf0bfe 100644 --- a/gensim/summarization/keywords.py +++ b/gensim/summarization/keywords.py @@ -302,7 +302,7 @@ def _extract_tokens(lemmas, scores, ratio, words): """ lemmas.sort(key=lambda s: scores[s], reverse=True) - length = len(lemmas) * ratio if words is None else words + length = len(lemmas) * ratio if words is None else min(words, len(lemmas)) return [(scores[lemmas[i]], lemmas[i],) for i in range(int(length))] diff --git a/gensim/test/test_keywords.py b/gensim/test/test_keywords.py index 6011c83df4..ffe2f32a8f 100644 --- a/gensim/test/test_keywords.py +++ b/gensim/test/test_keywords.py @@ -101,6 +101,12 @@ def test_text_keywords_without_graph_edges(self): kwds = keywords(text, deacc=False, scores=True) self.assertFalse(len(kwds)) + def test_keywords_with_words_greater_than_lemmas(self): + # words parameter is greater than number of words in text variable + text = 'Test string small length' + kwds = keywords(text, words=5, split=True) + self.assertIsNotNone(kwds) + if __name__ == '__main__': logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)