Fix sphinx docs (dmlc#92)

* Fix pygments lexers `log` (used in beam_search_generator.rst) is not a valid lexer. `bash` is for shell scripts. We are presenting interactive sessions and should use `console`. * Add .ipynb_checkpoints to sphinx exclude_patterns This is required: https://media.readthedocs.org/pdf/nbsphinx/latest/nbsphinx.pdf Otherwise we will get a lot of warnings if the environment contains ipynb_checkpoints.. * Declare development requirements This may not be exhaustive yet, but are the requirements needed on my machine to build the documentation. * Make CI deploy stage fail if there are any Sphinx warnings * Add 'release' dependency to 'docs' in Makefile 'docs' depends on .zip archives created in the 'release' step * Declare word embedding evaluation extended_results.ipynb as orphan Sphinx won't warn about orphan elements not being included in the TOC. * Fix docstrings causing sphinx warnings * Install ipython from conda-forge Workaround ContinuumIO/anaconda-issues#1430
cgraywang · Apr 29, 2018 · 56824c7 · 56824c7
1 parent a6ec891
commit 56824c7
Show file tree

Hide file tree

Showing 18 changed files with 37 additions and 33 deletions.
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -62,7 +62,7 @@ stage("Deploy") {
       export LD_LIBRARY_PATH=/usr/local/cuda/lib64
       make clean
       make release
-      make -C docs html"""
+      make -C docs html SPHINXOPTS=-W"""
 
       if (env.BRANCH_NAME.startsWith("PR-")) {
         sh """#!/bin/bash

diff --git a/Makefile b/Makefile
@@ -20,7 +20,7 @@ ROOTDIR = $(CURDIR)
 pylint:
 	pylint --rcfile=$(ROOTDIR)/.pylintrc gluonnlp scripts/*/*.py
 
-docs:
+docs: release
 	make -C docs html
 
 clean:

diff --git a/docs/api/model.rst b/docs/api/model.rst
@@ -6,7 +6,6 @@ all requested pre-trained weights are downloaded from public repo and stored in
 
 .. currentmodule:: gluonnlp.model
 
-
 Language Modeling
 -----------------
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -111,7 +111,7 @@
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build']
+exclude_patterns = ['_build', '**.ipynb_checkpoints']
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.

diff --git a/docs/index.rst b/docs/index.rst
@@ -29,7 +29,7 @@ GluonNLP relies on the recent version of MXNet. The easiest way to install MXNet
 is through `pip <https://pip.pypa.io/en/stable/installing/>`_. The following
 command installs a nightly built CPU version of MXNet.
 
-.. code-block:: bash
+.. code-block:: console
 
    pip install --pre --upgrade mxnet
 
@@ -45,7 +45,7 @@ command installs a nightly built CPU version of MXNet.
 
 Then install the GluonNLP toolkit by
 
-.. code-block:: bash
+.. code-block:: console
 
    pip install gluonnlp
 

diff --git a/env/doc.yml b/env/doc.yml
@@ -4,6 +4,7 @@ channels:
   - defaults
 dependencies:
   - python
+  - ipython
   - sphinx=1.7.2
   - sphinx-gallery
   - sphinx_rtd_theme

diff --git a/gluonnlp/data/word_embedding_evaluation.py b/gluonnlp/data/word_embedding_evaluation.py
@@ -410,6 +410,7 @@ class SimLex999(WordSimilarityEvaluationDataset):
     License: Unspecified
 
     The dataset contains
+
     - word1: The first concept in the pair.
     - word2: The second concept in the pair. Note that the order is only
       relevant to the column Assoc(USF). These values (free association scores)

diff --git a/gluonnlp/embedding/evaluation.py b/gluonnlp/embedding/evaluation.py
@@ -234,6 +234,7 @@ class ThreeCosMul(WordEmbeddingAnalogyFunction):
         \\arg\\max_{b^* ∈ V}\\frac{\\cos(b^∗, b) \\cos(b^*, a)}{cos(b^*, a^*) + ε}
 
     See the following paper for more details:
+
     - Levy, O., & Goldberg, Y. (2014). Linguistic regularities in sparse and
       explicit word representations. In R. Morante, & W. Yih, Proceedings of the
       Eighteenth Conference on Computational Natural Language Learning, CoNLL 2014,

diff --git a/gluonnlp/embedding/token_embedding.py b/gluonnlp/embedding/token_embedding.py
@@ -132,7 +132,6 @@ def list_sources(embedding_name=None):
 class TokenEmbedding(object):
     """Token embedding base class.
 
-
     To load token embedding from an externally hosted pre-trained token embedding file, such as
     those of GloVe and FastText, use :func:`gluonnlp.embedding.create`.
     To get all the available `embedding_name` and `source`, use
@@ -150,14 +149,12 @@ class TokenEmbedding(object):
     If a token is encountered multiple times in the pre-trained token embedding file, only the
     first-encountered token embedding vector will be loaded and the rest will be skipped.
 
-
     Parameters
     ----------
     unknown_token : hashable object or None, default '<unk>'
         The representation for any unknown token. In other words, any unknown token will be indexed
         as the same representation.
 
-
     Properties
     ----------
     idx_to_token : list of strs
@@ -624,7 +621,6 @@ def deserialize(cls, file_path):
 class GloVe(TokenEmbedding):
     """The GloVe word embedding.
 
-
     GloVe is an unsupervised learning algorithm for obtaining vector representations for words.
     Training is performed on aggregated global word-word co-occurrence statistics from a corpus, and
     the resulting representations showcase interesting linear substructures of the word vector
@@ -647,7 +643,6 @@ class GloVe(TokenEmbedding):
 
     https://opendatacommons.org/licenses/pddl/
 
-
     Parameters
     ----------
     source : str, default 'glove.6B.50d'
@@ -657,7 +652,6 @@ class GloVe(TokenEmbedding):
     init_unknown_vec : callback
         The callback used to initialize the embedding vector for the unknown token.
 
-
     Properties
     ----------
     idx_to_vec : mxnet.ndarray.NDArray
@@ -723,7 +717,6 @@ class FastText(TokenEmbedding):
 
     https://creativecommons.org/licenses/by-sa/3.0/
 
-
     Parameters
     ----------
     source : str, default 'glove.6B.50d'
@@ -733,7 +726,6 @@ class FastText(TokenEmbedding):
     init_unknown_vec : callback
         The callback used to initialize the embedding vector for the unknown token.
 
-
     Properties
     ----------
     idx_to_vec : mxnet.ndarray.NDArray

diff --git a/gluonnlp/model/attention_cell.py b/gluonnlp/model/attention_cell.py
@@ -401,7 +401,9 @@ def create_operator(self, ctx, in_shapes, in_dtypes):
 # pylint: enable=unused-argument
 
 class DotProductAttentionCell(AttentionCell):
-    r"""Dot product attention between the query and the key::
+    r"""Dot product attention between the query and the key.
+
+    Depending on parameters, defined as::
 
         units is None:
             score = <h_q, h_k>

diff --git a/gluonnlp/vocab.py b/gluonnlp/vocab.py
@@ -38,7 +38,6 @@
 class Vocab(object):
     """Indexing and embedding attachment for text tokens.
 
-
     Parameters
     ----------
     counter : Counter or None, default None
@@ -72,7 +71,6 @@ class Vocab(object):
         and values of `reserved_tokens` must be of the same hashable type. Examples: str, int, and
         tuple.
 
-
     Properties
     ----------
     embedding : instance of :class:`gluonnlp.embedding.TokenEmbedding`

diff --git a/scripts/beam_search/beam_search_generator.rst b/scripts/beam_search/beam_search_generator.rst
@@ -7,13 +7,13 @@ This script can be used to generate sentences using beam search from a pretraine
 
 Use the following command to generate the sentences
 
-.. code-block:: bash
+.. code-block:: console
 
    $ python beam_search_generator.py --bos I love it --beam_size 5
 
 Output is
 
-.. code-block:: log
+.. code-block:: console
 
    Beam Seach Parameters: beam_size=5, alpha=0.0, K=5
    Generation Result:
@@ -23,13 +23,13 @@ Output is
 
 You can also try a larger beam size.
 
-.. code-block:: bash
+.. code-block:: console
 
    $ python beam_search_generator.py --bos I love it --beam_size 10
 
 Output is
 
-.. code-block:: log
+.. code-block:: console
 
    Beam Seach Parameters: beam_size=10, alpha=0.0, K=5
    Generation Result:
@@ -40,13 +40,13 @@ Output is
 Try beam size equals to 15
 
 
-.. code-block:: bash
+.. code-block:: console
 
    $ python beam_search_generator.py --bos I love it --beam_size 15
 
 Output is
 
-.. code-block:: log
+.. code-block:: console
 
    Beam Seach Parameters: beam_size=15, alpha=0.0, K=5
    Generation Result:

diff --git a/scripts/language_model/word_language_model.rst b/scripts/language_model/word_language_model.rst
@@ -45,30 +45,30 @@ The dataset used for training the models is wikitext-2.
 
 [1] awd_lstm_lm_1150_wikitext-2 (Val PPL 73.32 Test PPL 69.74)
 
-.. code-block:: bash
+.. code-block:: console
 
    $ python word_language_model.py --gpus 0 --tied --save awd_lstm_lm_1150_wikitext-2
 
 [2] awd_lstm_lm_600_wikitext-2 (Val PPL 84.61 Test PPL 80.96)
 
-.. code-block:: bash
+.. code-block:: console
 
    $ python word_language_model.py -gpus 0 --emsize 200 --nhid 600 --dropout 0.2 --dropout_h 0.1 --dropout_i 0.3 --dropout_e 0.05 --weight_drop 0.2 --tied --save awd_lstm_lm_600_wikitext-2
 
 [3] standard_lstm_lm_1500_wikitext-2 (Val PPL 98.29 Test PPL 92.83)
 
-.. code-block:: bash
+.. code-block:: console
 
    $ python word_language_model.py --gpus 0 --emsize 1500 --nhid 1500 --nlayers 2 --lr 20 --epochs 750 --batch_size 20 --bptt 35 --dropout 0.65 --dropout_h 0 --dropout_i 0 --dropout_e 0 --weight_drop 0 --tied --wd 0 --alpha 0 --beta 0 --save standard_lstm_lm_1500_wikitext-2
 
 [4] standard_lstm_lm_650_wikitext-2 (Val PPL 98.96 Test PPL 93.90)
 
-.. code-block:: bash
+.. code-block:: console
 
    $ python word_language_model.py --gpus 0 --emsize 650 --nhid 650 --nlayers 2 --lr 20 --epochs 750 --batch_size 20 --bptt 35 --dropout 0.5 --dropout_h 0 --dropout_i 0 --dropout_e 0 --weight_drop 0 --tied --wd 0 --alpha 0 --beta 0 --save standard_lstm_lm_650_wikitext-2
 
 [5] standard_lstm_lm_200_wikitext-2 (Val PPL 108.25 Test PPL 102.26)
 
-.. code-block:: bash
+.. code-block:: console
 
    $ python word_language_model.py --gpus 0 --emsize 200 --nhid 200 --nlayers 2 --lr 20 --epochs 750 --batch_size 20 --bptt 35 --dropout 0.2 --dropout_h 0 --dropout_i 0 --dropout_e 0 --weight_drop 0 --tied --wd 0 --alpha 0 --beta 0 --save standard_lstm_lm_200_wikitext-2
diff --git a/scripts/nmt/machine_translation.rst b/scripts/nmt/machine_translation.rst
@@ -5,7 +5,7 @@ Machine Translation
 
 Use the following command to train the GNMT model on the IWSLT2015 dataset.
 
-.. code-block:: bash
+.. code-block:: console
 
    $ python gnmt.py --src_lang en --tgt_lang vi --batch_size 64 \
                     --optimizer adam --lr 0.001 --lr_update_factor 0.5 --beam_size 10 \

diff --git a/scripts/sentiment_analysis/sentiment_analysis.rst b/scripts/sentiment_analysis/sentiment_analysis.rst
@@ -9,13 +9,13 @@ bucketing strategies to speed up training.
 
 Use the following command to run without using pretrained model
 
-.. code-block:: bash
+.. code-block:: console
 
    $ python sentiment_analysis.py --gpu 0 --batch_size 16 --bucket_type fixed --epochs 3 --dropout 0 --no_pretrained --lr 0.005 --valid_ratio 0.1 --save-prefix imdb_lstm_200  # Test Accuracy 85.36
 
 Use the following command to run with pretrained model
 
-.. code-block:: bash
+.. code-block:: console
 
    $ python sentiment_analysis.py --gpu 0 --batch_size 16 --bucket_type fixed --epochs 3 --dropout 0 --lr 0.005 --valid_ratio 0.1 --save-prefix imdb_lstm_200  # Test Accuracy 87.41
 
diff --git a/scripts/word_embedding_evaluation/extended_results.ipynb b/scripts/word_embedding_evaluation/extended_results.ipynb
@@ -7024,6 +7024,9 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.6.4"
+  },
+  "nbsphinx": {
+   "orphan": true
   }
  },
  "nbformat": 4,

diff --git a/scripts/word_embedding_evaluation/word_embedding_evaluation.rst b/scripts/word_embedding_evaluation/word_embedding_evaluation.rst
@@ -14,7 +14,7 @@ the respective datasets.
 We include a `run_all.sh` script to reproduce the results.
 
 
-.. code-block:: bash
+.. code-block:: console
 
    $ run_all.sh
 
@@ -23,7 +23,7 @@ To evaluate a specific embedding on one or multiple datasets you can use the
 included `word_embedding_evaluation.py` as follows.
 
 
-.. code-block:: bash
+.. code-block:: console
 
    $ python word_embedding_evaluation.py
 

diff --git a/setup.py b/setup.py
@@ -57,5 +57,12 @@ def find_version(*file_paths):
             'nltk',
             'scipy',
         ],
+        'dev': [
+            'pytest',
+            'recommonmark',
+            'sphinx-gallery',
+            'sphinx_rtd_theme',
+            'nbsphinx',
+        ],
     },
 )
-Original file line number
+Diff line change
@@ Expand Up / @@ -20,7 +20,7 @@ ROOTDIR = $(CURDIR) @@
     pylint:
     	pylint --rcfile=$(ROOTDIR)/.pylintrc gluonnlp scripts/*/*.py
-    docs:
+    docs: release
     	make -C docs html
     clean:
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,7 +6,6 @@ all requested pre-trained weights are downloaded from public repo and stored in

		.. currentmodule:: gluonnlp.model


		Language Modeling
		-----------------

Expand Down