Skip to content

Commit

Permalink
Fix out of range issue in gensim.summarization.keywords (#2738)
Browse files Browse the repository at this point in the history
* Fixed out of range error in keywords.py

* Now using min() function to improve readability

* Added a test to make sure that keywords does not
fail when words param is greater than number
of words in string

* Fixing travisCI build error from not having 2  lines after class definition

* Fixed whitespace issue for flake8

Co-authored-by: Carter Olsen <[email protected]>
  • Loading branch information
carterols and olsencar authored Mar 21, 2020
1 parent f767e1e commit 1b3ad81
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
2 changes: 1 addition & 1 deletion gensim/summarization/keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def _extract_tokens(lemmas, scores, ratio, words):
"""
lemmas.sort(key=lambda s: scores[s], reverse=True)
length = len(lemmas) * ratio if words is None else words
length = len(lemmas) * ratio if words is None else min(words, len(lemmas))
return [(scores[lemmas[i]], lemmas[i],) for i in range(int(length))]


Expand Down
6 changes: 6 additions & 0 deletions gensim/test/test_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ def test_text_keywords_without_graph_edges(self):
kwds = keywords(text, deacc=False, scores=True)
self.assertFalse(len(kwds))

def test_keywords_with_words_greater_than_lemmas(self):
# words parameter is greater than number of words in text variable
text = 'Test string small length'
kwds = keywords(text, words=5, split=True)
self.assertIsNotNone(kwds)


if __name__ == '__main__':
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
Expand Down

0 comments on commit 1b3ad81

Please sign in to comment.