From cfda7c2881f1bf08c076f21c17e84d589da747a7 Mon Sep 17 00:00:00 2001 From: Manish Kumar Date: Tue, 22 Oct 2019 03:53:38 +0530 Subject: [PATCH] Optimizing load_word2vec_format for binary equal to true --- gensim/models/utils_any2vec.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gensim/models/utils_any2vec.py b/gensim/models/utils_any2vec.py index 6bd7b28690..45543401db 100644 --- a/gensim/models/utils_any2vec.py +++ b/gensim/models/utils_any2vec.py @@ -382,7 +382,8 @@ def add_word(word, weights): word = utils.to_unicode(b''.join(word), encoding=encoding, errors=unicode_errors) with utils.ignore_deprecation_warning(): # TODO use frombuffer or something similar - weights = fromstring(fin.read(binary_len), dtype=REAL).astype(datatype) + raw_data=fin.read(binary_len).split() + weights = np.array(raw_data, dtype=REAL).astype(datatype) add_word(word, weights) else: for line_no in range(vocab_size):