From 1b94bc05bdd82fc5c48a729028e7890564b84c9d Mon Sep 17 00:00:00 2001 From: ivan Date: Thu, 6 Jul 2017 13:14:05 +0500 Subject: [PATCH 1/3] fix encoding problem in saveAsText method --- gensim/test/test_corpora_dictionary.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/gensim/test/test_corpora_dictionary.py b/gensim/test/test_corpora_dictionary.py index 210ff94548..fea5047db7 100644 --- a/gensim/test/test_corpora_dictionary.py +++ b/gensim/test/test_corpora_dictionary.py @@ -12,6 +12,7 @@ import logging import tempfile import unittest +import codecs import os import os.path @@ -189,23 +190,23 @@ def test_saveAsText(self): d = Dictionary(small_text) d.save_as_text(tmpf) - with open(tmpf) as file: + with codecs.open(tmpf, 'r', encoding='utf-8') as file: serialized_lines = file.readlines() - self.assertEqual(serialized_lines[0], "3\n") + self.assertEqual(serialized_lines[0], u"3\n") self.assertEqual(len(serialized_lines), 4) # We do not know, which word will have which index - self.assertEqual(serialized_lines[1][1:], "\tdruhé\t2\n") - self.assertEqual(serialized_lines[2][1:], "\tprvé\t1\n") - self.assertEqual(serialized_lines[3][1:], "\tslovo\t3\n") + self.assertEqual(serialized_lines[1][1:], u"\tdruhé\t2\n") + self.assertEqual(serialized_lines[2][1:], u"\tprvé\t1\n") + self.assertEqual(serialized_lines[3][1:], u"\tslovo\t3\n") d.save_as_text(tmpf, sort_by_word=False) - with open(tmpf) as file: + with codecs.open(tmpf, 'r', encoding='utf-8') as file: serialized_lines = file.readlines() - self.assertEqual(serialized_lines[0], "3\n") + self.assertEqual(serialized_lines[0], u"3\n") self.assertEqual(len(serialized_lines), 4) - self.assertEqual(serialized_lines[1][1:], "\tslovo\t3\n") - self.assertEqual(serialized_lines[2][1:], "\tdruhé\t2\n") - self.assertEqual(serialized_lines[3][1:], "\tprvé\t1\n") + self.assertEqual(serialized_lines[1][1:], u"\tslovo\t3\n") + self.assertEqual(serialized_lines[2][1:], u"\tdruhé\t2\n") + self.assertEqual(serialized_lines[3][1:], u"\tprvé\t1\n") def test_loadFromText_legacy(self): """ From 5bc7696a09bec73eb21f07df4b83b53c9c11b0a1 Mon Sep 17 00:00:00 2001 From: ivan Date: Thu, 6 Jul 2017 14:05:49 +0500 Subject: [PATCH 2/3] fix encoding problem in loadFromText/loadFromText_legacy tests --- gensim/test/test_corpora_dictionary.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/gensim/test/test_corpora_dictionary.py b/gensim/test/test_corpora_dictionary.py index fea5047db7..e5a5786613 100644 --- a/gensim/test/test_corpora_dictionary.py +++ b/gensim/test/test_corpora_dictionary.py @@ -19,6 +19,7 @@ import scipy import gensim from gensim.corpora import Dictionary +from gensim.utils import to_utf8 from six import PY3 from six.moves import zip @@ -214,8 +215,8 @@ def test_loadFromText_legacy(self): Legacy format does not have num_docs on the first line. """ tmpf = get_tmpfile('load_dict_test_legacy.txt') - no_num_docs_serialization = "1\tprvé\t1\n2\tslovo\t2\n" - with open(tmpf, "w") as file: + no_num_docs_serialization = to_utf8("1\tprvé\t1\n2\tslovo\t2\n") + with open(tmpf, "wb") as file: file.write(no_num_docs_serialization) d = Dictionary.load_from_text(tmpf) @@ -228,8 +229,8 @@ def test_loadFromText_legacy(self): def test_loadFromText(self): """`Dictionary` can be loaded from textfile.""" tmpf = get_tmpfile('load_dict_test.txt') - no_num_docs_serialization = "2\n1\tprvé\t1\n2\tslovo\t2\n" - with open(tmpf, "w") as file: + no_num_docs_serialization = to_utf8("2\n1\tprvé\t1\n2\tslovo\t2\n") + with open(tmpf, "wb") as file: file.write(no_num_docs_serialization) d = Dictionary.load_from_text(tmpf) From d523f7960dc52afc4a93bb3fa9af5bbd9453497e Mon Sep 17 00:00:00 2001 From: ivan Date: Thu, 6 Jul 2017 14:40:13 +0500 Subject: [PATCH 3/3] replace main -> run in test entrypoint --- appveyor.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 5c99f829f8..a68c3036e6 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -68,8 +68,7 @@ test_script: - "cd empty_folder" - "pip install pyemd testfixtures unittest2 sklearn Morfessor==2.0.2a4" - # Use run instead of main to avoid nnz code when fail (TODO: remove this) - - "python -c \"import nose; nose.run()\" -s -v gensim" + - "python -c \"import nose; nose.main()\" -s -v gensim" # Move back to the project folder - "cd .."