Skip to content

Commit

Permalink
Fix a possible integer overflow of column_nonzero
Browse files Browse the repository at this point in the history
  • Loading branch information
Witiko committed May 18, 2019
1 parent b588bd7 commit 7060ee8
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions gensim/similarities/termsim.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def __init__(self, source, dictionary=None, tfidf=None, symmetric=True, positive
tfidf.idfs.items(),
key=lambda x: (lambda term_index, term_idf: (term_idf, -term_index))(*x), reverse=True)]

column_nonzero = np.array([1] * matrix_order, dtype=_shortest_uint_dtype(nonzero_limit))
column_nonzero = np.array([0] * matrix_order, dtype=_shortest_uint_dtype(nonzero_limit))
column_sum = np.zeros(matrix_order, dtype=dtype)
matrix = sparse.identity(matrix_order, dtype=dtype, format="dok")

Expand All @@ -227,7 +227,7 @@ def __init__(self, source, dictionary=None, tfidf=None, symmetric=True, positive
0.0, 1.0))

t1 = dictionary[t1_index]
num_nonzero = column_nonzero[t1_index] - 1
num_nonzero = column_nonzero[t1_index]
num_rows = nonzero_limit - num_nonzero
most_similar = [
(dictionary.token2id[term], similarity)
Expand All @@ -246,7 +246,7 @@ def __init__(self, source, dictionary=None, tfidf=None, symmetric=True, positive
if positive_definite and column_sum[t1_index] + abs(similarity) >= 1.0:
break
if symmetric:
if column_nonzero[t2_index] <= nonzero_limit \
if column_nonzero[t2_index] < nonzero_limit \
and (not positive_definite or column_sum[t2_index] + abs(similarity) < 1.0) \
and not (t1_index, t2_index) in matrix:
matrix[t1_index, t2_index] = similarity
Expand Down

0 comments on commit 7060ee8

Please sign in to comment.