Skip to content

Commit

Permalink
standardize the import of KeyedVectors
Browse files Browse the repository at this point in the history
  • Loading branch information
hobs committed Oct 17, 2019
1 parent 259e322 commit aa3ca7d
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 9 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ include_package_data = True
package_dir =
=src
# Project dependencies (unversioned, semicolon-separated)
install_requires = future; jupyter; h5py; matplotlib; nltk; pandas; pypandoc; plotly; python-Levenshtein; scikit-learn; scipy; seaborn; tqdm; gensim; pandas-datareader; pugnlp; tensorflow; keras; regex; spacy; lxml; html2text; # annoy; cufflinks;
install_requires = future; jupyter; h5py; matplotlib; nltk; pandas; pypandoc; plotly; python-Levenshtein; scikit-learn; scipy; seaborn; tqdm; gensim; pandas-datareader; pugnlp; tensorflow; keras; regex; spacy; lxml; html2text;

# Add here test requirements (semicolon-separated)
tests_require = pytest; pytest-cov; pyscaffold; pyttsx3; MechanicalSoup; plyfile; sphinx
Expand Down
14 changes: 10 additions & 4 deletions src/nlpia/book/examples/ch06_nessvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,22 @@
TODO:
automate the search for synonyms with higher than 60% similarity, walking a shallow graph
"""
import os

from collections import OrderedDict

import pandas as pd
from nlpia.data.loaders import get_data
# from gensim.models import KeyedVectors
from nlpia.data.loaders import get_data, BIGDATA_PATH
from gensim.models import KeyedVectors


word_vectors = get_data('word2vec') # not in book

wordvector_path = os.path.join(BIGDATA_PATH, 'GoogleNews-vectors-negative300.bin.gz') # not in book, reader required to compose this path

if 'word_vectors' not in globals():
if 'word_vectors' not in globals(): # not in book
WV = word_vectors = get_data('word2vec')
# word_vectors = KeyedVectors.load_word2vec_format(wordvector_path, binary=True)
word_vectors = KeyedVectors.load_word2vec_format(wordvector_path, binary=True)


###################################################
Expand Down
4 changes: 2 additions & 2 deletions src/nlpia/book/examples/ch07.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
"outputs": [],
"source": [
"from nltk.tokenize import TreebankWordTokenizer\n",
"from gensim.models.keyedvectors import KeyedVectors\n",
"from gensim.models import KeyedVectors\n",
"word_vectors = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True, limit=200000)\n",
"\n",
"def tokenize_and_vectorize(dataset):\n",
Expand Down Expand Up @@ -430,4 +430,4 @@
},
"nbformat": 4,
"nbformat_minor": 0
}
}
2 changes: 1 addition & 1 deletion src/nlpia/book/examples/ch08.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@
],
"source": [
"from nltk.tokenize import TreebankWordTokenizer\n",
"from gensim.models.keyedvectors import KeyedVectors\n",
"from gensim.models import KeyedVectors\n",
"word_vectors = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True, limit=200000)\n",
"\n",
"def tokenize_and_vectorize(dataset):\n",
Expand Down
2 changes: 1 addition & 1 deletion src/nlpia/book/examples/ch09.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@
"outputs": [],
"source": [
"from nltk.tokenize import TreebankWordTokenizer\n",
"from gensim.models.keyedvectors import KeyedVectors\n",
"from gensim.models import KeyedVectors\n",
"word_vectors = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True, limit=200000)\n",
"\n",
"def tokenize_and_vectorize(dataset):\n",
Expand Down

0 comments on commit aa3ca7d

Please sign in to comment.