From aa3ca7d758c2d18d506bff78de17973bc6a52043 Mon Sep 17 00:00:00 2001 From: hobs Date: Thu, 17 Oct 2019 13:28:33 -0700 Subject: [PATCH] standardize the import of KeyedVectors --- setup.cfg | 2 +- src/nlpia/book/examples/ch06_nessvectors.py | 14 ++++++++++---- src/nlpia/book/examples/ch07.ipynb | 4 ++-- src/nlpia/book/examples/ch08.ipynb | 2 +- src/nlpia/book/examples/ch09.ipynb | 2 +- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/setup.cfg b/setup.cfg index 96a83d7..f616d69 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,7 +38,7 @@ include_package_data = True package_dir = =src # Project dependencies (unversioned, semicolon-separated) -install_requires = future; jupyter; h5py; matplotlib; nltk; pandas; pypandoc; plotly; python-Levenshtein; scikit-learn; scipy; seaborn; tqdm; gensim; pandas-datareader; pugnlp; tensorflow; keras; regex; spacy; lxml; html2text; # annoy; cufflinks; +install_requires = future; jupyter; h5py; matplotlib; nltk; pandas; pypandoc; plotly; python-Levenshtein; scikit-learn; scipy; seaborn; tqdm; gensim; pandas-datareader; pugnlp; tensorflow; keras; regex; spacy; lxml; html2text; # Add here test requirements (semicolon-separated) tests_require = pytest; pytest-cov; pyscaffold; pyttsx3; MechanicalSoup; plyfile; sphinx diff --git a/src/nlpia/book/examples/ch06_nessvectors.py b/src/nlpia/book/examples/ch06_nessvectors.py index dce9d47..d3a46ac 100644 --- a/src/nlpia/book/examples/ch06_nessvectors.py +++ b/src/nlpia/book/examples/ch06_nessvectors.py @@ -74,16 +74,22 @@ TODO: automate the search for synonyms with higher than 60% similarity, walking a shallow graph """ +import os + from collections import OrderedDict import pandas as pd -from nlpia.data.loaders import get_data -# from gensim.models import KeyedVectors +from nlpia.data.loaders import get_data, BIGDATA_PATH +from gensim.models import KeyedVectors + + +word_vectors = get_data('word2vec') # not in book +wordvector_path = os.path.join(BIGDATA_PATH, 'GoogleNews-vectors-negative300.bin.gz') # not in book, reader required to compose this path -if 'word_vectors' not in globals(): +if 'word_vectors' not in globals(): # not in book WV = word_vectors = get_data('word2vec') - # word_vectors = KeyedVectors.load_word2vec_format(wordvector_path, binary=True) + word_vectors = KeyedVectors.load_word2vec_format(wordvector_path, binary=True) ################################################### diff --git a/src/nlpia/book/examples/ch07.ipynb b/src/nlpia/book/examples/ch07.ipynb index a24e015..d9148e6 100644 --- a/src/nlpia/book/examples/ch07.ipynb +++ b/src/nlpia/book/examples/ch07.ipynb @@ -177,7 +177,7 @@ "outputs": [], "source": [ "from nltk.tokenize import TreebankWordTokenizer\n", - "from gensim.models.keyedvectors import KeyedVectors\n", + "from gensim.models import KeyedVectors\n", "word_vectors = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True, limit=200000)\n", "\n", "def tokenize_and_vectorize(dataset):\n", @@ -430,4 +430,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/src/nlpia/book/examples/ch08.ipynb b/src/nlpia/book/examples/ch08.ipynb index 054684d..efa20f2 100644 --- a/src/nlpia/book/examples/ch08.ipynb +++ b/src/nlpia/book/examples/ch08.ipynb @@ -173,7 +173,7 @@ ], "source": [ "from nltk.tokenize import TreebankWordTokenizer\n", - "from gensim.models.keyedvectors import KeyedVectors\n", + "from gensim.models import KeyedVectors\n", "word_vectors = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True, limit=200000)\n", "\n", "def tokenize_and_vectorize(dataset):\n", diff --git a/src/nlpia/book/examples/ch09.ipynb b/src/nlpia/book/examples/ch09.ipynb index b8f3834..a234b28 100644 --- a/src/nlpia/book/examples/ch09.ipynb +++ b/src/nlpia/book/examples/ch09.ipynb @@ -196,7 +196,7 @@ "outputs": [], "source": [ "from nltk.tokenize import TreebankWordTokenizer\n", - "from gensim.models.keyedvectors import KeyedVectors\n", + "from gensim.models import KeyedVectors\n", "word_vectors = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True, limit=200000)\n", "\n", "def tokenize_and_vectorize(dataset):\n",