standardize the import of KeyedVectors

shadynagy232 · Oct 17, 2019 · aa3ca7d · aa3ca7d
1 parent 259e322
commit aa3ca7d
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 9 deletions.
diff --git a/setup.cfg b/setup.cfg
@@ -38,7 +38,7 @@ include_package_data = True
 package_dir =
     =src
 # Project dependencies (unversioned, semicolon-separated)
-install_requires = future; jupyter; h5py; matplotlib; nltk; pandas; pypandoc; plotly;  python-Levenshtein; scikit-learn; scipy; seaborn; tqdm; gensim; pandas-datareader; pugnlp; tensorflow; keras; regex; spacy; lxml; html2text;  # annoy; cufflinks;
+install_requires = future; jupyter; h5py; matplotlib; nltk; pandas; pypandoc; plotly;  python-Levenshtein; scikit-learn; scipy; seaborn; tqdm; gensim; pandas-datareader; pugnlp; tensorflow; keras; regex; spacy; lxml; html2text;
 
 # Add here test requirements (semicolon-separated)
 tests_require = pytest; pytest-cov; pyscaffold; pyttsx3; MechanicalSoup; plyfile; sphinx

diff --git a/src/nlpia/book/examples/ch06_nessvectors.py b/src/nlpia/book/examples/ch06_nessvectors.py
@@ -74,16 +74,22 @@
 TODO:
 automate the search for synonyms with higher than 60% similarity, walking a shallow graph
 """
+import os
+
 from collections import OrderedDict
 
 import pandas as pd
-from nlpia.data.loaders import get_data
-# from gensim.models import KeyedVectors
+from nlpia.data.loaders import get_data, BIGDATA_PATH
+from gensim.models import KeyedVectors
+
+
+word_vectors = get_data('word2vec')  # not in book
 
+wordvector_path = os.path.join(BIGDATA_PATH, 'GoogleNews-vectors-negative300.bin.gz')    # not in book, reader required to compose this path
 
-if 'word_vectors' not in globals():
+if 'word_vectors' not in globals():  # not in book
     WV = word_vectors = get_data('word2vec')
-    # word_vectors = KeyedVectors.load_word2vec_format(wordvector_path, binary=True)
+    word_vectors = KeyedVectors.load_word2vec_format(wordvector_path, binary=True)
 
 
 ###################################################

diff --git a/src/nlpia/book/examples/ch07.ipynb b/src/nlpia/book/examples/ch07.ipynb
@@ -177,7 +177,7 @@
    "outputs": [],
    "source": [
     "from nltk.tokenize import TreebankWordTokenizer\n",
-    "from gensim.models.keyedvectors import KeyedVectors\n",
+    "from gensim.models import KeyedVectors\n",
     "word_vectors = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True, limit=200000)\n",
     "\n",
     "def tokenize_and_vectorize(dataset):\n",
@@ -430,4 +430,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}
diff --git a/src/nlpia/book/examples/ch08.ipynb b/src/nlpia/book/examples/ch08.ipynb
@@ -173,7 +173,7 @@
    ],
    "source": [
     "from nltk.tokenize import TreebankWordTokenizer\n",
-    "from gensim.models.keyedvectors import KeyedVectors\n",
+    "from gensim.models import KeyedVectors\n",
     "word_vectors = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True, limit=200000)\n",
     "\n",
     "def tokenize_and_vectorize(dataset):\n",

diff --git a/src/nlpia/book/examples/ch09.ipynb b/src/nlpia/book/examples/ch09.ipynb
@@ -196,7 +196,7 @@
    "outputs": [],
    "source": [
     "from nltk.tokenize import TreebankWordTokenizer\n",
-    "from gensim.models.keyedvectors import KeyedVectors\n",
+    "from gensim.models import KeyedVectors\n",
     "word_vectors = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True, limit=200000)\n",
     "\n",
     "def tokenize_and_vectorize(dataset):\n",