From f87a9c48db503d9429b57fce5009c5e84a31b110 Mon Sep 17 00:00:00 2001 From: manneshiva Date: Mon, 4 Dec 2017 14:35:53 +0530 Subject: [PATCH 1/3] adds an assert in `online_sanity` test case to check `syn0` and `syn0_vocab` are different --- gensim/test/test_fasttext.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gensim/test/test_fasttext.py b/gensim/test/test_fasttext.py index 69aa9d074a..608df8206b 100644 --- a/gensim/test/test_fasttext.py +++ b/gensim/test/test_fasttext.py @@ -419,6 +419,8 @@ def online_sanity(self, model): self.assertTrue(all(['terrorism' not in l for l in others])) model.build_vocab(others) model.train(others, total_examples=model.corpus_count, epochs=model.iter) + # checks that `syn0` is different from `syn0_vocab` + self.assertFalse(np.all(np.equal(model.wv.syn0, model.wv.syn0_vocab))) self.assertFalse('terrorism' in model.wv.vocab) self.assertFalse('orism>' in model.wv.ngrams) model.build_vocab(terro, update=True) # update vocab From a8b85cc1edf0dc388b99ab5453728a2167ec701f Mon Sep 17 00:00:00 2001 From: manneshiva Date: Mon, 4 Dec 2017 14:43:08 +0530 Subject: [PATCH 2/3] creates copy of `syn0_vocab` vector before adding with ngrams and assigning to `syn0` --- gensim/models/fasttext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 6174754314..7d842deda9 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -228,7 +228,7 @@ def __getitem__(self, word): def get_vocab_word_vecs(self): for w, v in self.wv.vocab.items(): - word_vec = self.wv.syn0_vocab[v.index] + word_vec = np.copy(self.wv.syn0_vocab[v.index]) ngrams = self.wv.ngrams_word[w] ngram_weights = self.wv.syn0_ngrams for ngram in ngrams: From 19fbe4c8bc2a80b2ec21057afd3ed2e1b91a2cbe Mon Sep 17 00:00:00 2001 From: manneshiva Date: Tue, 5 Dec 2017 00:20:50 +0530 Subject: [PATCH 3/3] adds unit test for `get_vocab_word_vecs` --- gensim/test/test_fasttext.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/gensim/test/test_fasttext.py b/gensim/test/test_fasttext.py index 608df8206b..e652facab7 100644 --- a/gensim/test/test_fasttext.py +++ b/gensim/test/test_fasttext.py @@ -457,6 +457,13 @@ def test_cbow_neg_online(self): ) self.online_sanity(model) + def test_get_vocab_word_vecs(self): + model = FT_gensim(size=10, min_count=1, seed=42) + model.build_vocab(sentences) + original_syn0_vocab = np.copy(model.wv.syn0_vocab) + model.get_vocab_word_vecs() + self.assertTrue(np.all(np.equal(model.wv.syn0_vocab, original_syn0_vocab))) + if __name__ == '__main__': logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)