Switch CI to Python 3.5 and declare Python 3.5 support (#1009)

* Switch CI to Python 3.5 and declare Python 3.5 support * Fix lint after updating pylint * Fix imports * Workaround bug in mx-theme setup.py * Fix Python 3.5 support in codebase * Fix lint in scripts/bert * Delete Python3.5 incompatible multiprocessing.Pool usage of BERTDatasetTransform Traceback (most recent call last): File "./scripts/bert/finetune_classifier.py", line 373, in <module> bert_tokenizer, task, batch_size, dev_batch_size, args.max_len, vocabulary, args.pad) File "./scripts/bert/finetune_classifier.py", line 306, in preprocess_data data_train = mx.gluon.data.SimpleDataset(pool.map(trans, train_tsv)) File "/var/lib/jenkins/workspace/gluon-nlp-gpu-py3-master@6/conda/gpu/py3-master/lib/python3.5/multiprocessing/pool.py", line 266, in map return self._map_async(func, iterable, mapstar, chunksize).get() File "/var/lib/jenkins/workspace/gluon-nlp-gpu-py3-master@6/conda/gpu/py3-master/lib/python3.5/multiprocessing/pool.py", line 644, in get raise self._value File "/var/lib/jenkins/workspace/gluon-nlp-gpu-py3-master@6/conda/gpu/py3-master/lib/python3.5/multiprocessing/pool.py", line 424, in _handle_tasks put(task) File "/var/lib/jenkins/workspace/gluon-nlp-gpu-py3-master@6/conda/gpu/py3-master/lib/python3.5/multiprocessing/connection.py", line 206, in send self._send_bytes(ForkingPickler.dumps(obj)) File "/var/lib/jenkins/workspace/gluon-nlp-gpu-py3-master@6/conda/gpu/py3-master/lib/python3.5/multiprocessing/reduction.py", line 50, in dumps cls(buf, protocol).dump(obj) _pickle.PicklingError: Can't pickle <class 'module'>: attribute lookup module on builtins failed
dmlc · Nov 15, 2019 · 5e11334 · 5e11334
1 parent e09281c
commit 5e11334
Show file tree

Hide file tree

Showing 32 changed files with 158 additions and 179 deletions.
diff --git a/README.rst b/README.rst
@@ -15,7 +15,7 @@ GluonNLP: Your Choice of Deep Learning for NLP
 
 .. raw:: html
 
-   <a href='http://ci.mxnet.io/job/gluon-nlp/job/master/'><img src='https://img.shields.io/badge/python-3.6%2C3.7-blue.svg'></a>
+   <a href='http://ci.mxnet.io/job/gluon-nlp/job/master/'><img src='https://img.shields.io/badge/python-3.5%2C3.7-blue.svg'></a>
    <a href='https://codecov.io/gh/dmlc/gluon-nlp'><img src='https://codecov.io/gh/dmlc/gluon-nlp/branch/master/graph/badge.svg'></a>
    <a href='http://ci.mxnet.io/job/gluonnlp-py3-master-gpu-doc/job/master/'><img src='http://ci.mxnet.io/buildStatus/icon?job=gluonnlp-py3-master-gpu-doc%2Fmaster'></a>
    <a href='https://pypi.org/project/gluonnlp/#history'><img src='https://img.shields.io/pypi/v/gluonnlp.svg'></a>
@@ -43,8 +43,8 @@ News
 Installation
 ============
 
-Make sure you have Python 3.6 or newer and a recent version of MXNet (our CI
-server runs the testsuite with Python 3.6).
+Make sure you have Python 3.5 or newer and a recent version of MXNet (our CI
+server runs the testsuite with Python 3.5).
 
 You can install ``MXNet`` and ``GluonNLP`` using pip.
 

diff --git a/docs/install/install-include.rst b/docs/install/install-include.rst
@@ -47,7 +47,7 @@ Select your preferences and run the install command.
   .. admonition:: Prerequisites:
 
      - Requires `pip >= 9. <https://pip.pypa.io/en/stable/installing/>`_.
-       Python 3.6+ are supported.
+       Python 3.5+ are supported.
 
      .. container:: nightly
 

diff --git a/env/cpu/py3-master.yml b/env/cpu/py3-master.yml
@@ -1,26 +1,26 @@
 channels:
   - conda-forge
 dependencies:
-  - python=3.6
-  - pip=18.1
-  - cython
+  - python=3.5
+  - pip
   - perl
-  - pylint=2.3.1
-  - flake8
-  - sphinx=2.1.2
-  - spacy>2
-  - nltk
-  - pytest=4.5.0
-  - pytest-env
-  - flaky=3.5.3
-  - pytest-cov=2.7.1
-  - mock<3
-  - pytest-xdist<2
-  - regex
-  - scipy=1.3.1
   - pip:
+    - cython
+    - pytest==5.2.3
+    - pytest-env==0.6.2
+    - pytest-cov==2.8.1
+    - pytest-xdist==1.30.0
+    - pylint==2.4.4
     - pylint-quotes==0.2.1
+    - flaky==3.6.1
+    - flake8==3.7.9
+    - mock<3
+    - sphinx==2.2.1
     - mxnet>=1.6.0b20191103
-    - sacremoses
-    - sentencepiece<0.2
+    - scipy==1.3.2
+    - regex==2019.11.1 
+    - nltk==3.4.5
+    - sacremoses==0.0.35
+    - spacy==2.2.2
+    - sentencepiece==0.1.83
     - sphinx-autodoc-typehints==1.7.0
diff --git a/env/docker/py3.yml b/env/docker/py3.yml
@@ -1,39 +1,40 @@
 channels:
   - conda-forge
 dependencies:
-  - python=3.6
-  - pip=18.1
-  - cython
+  - python=3.5
+  - pip
   - perl
-  - pylint=1.9.2
-  - flake8
-  - sphinx=2.1.2
-  - spacy>2
-  - nltk
-  - pytest=4.5.0
-  - pytest-env
-  - flaky=3.5.3
-  - pytest-cov=2.7.1
-  - mock<3
-  - pytest-xdist<2
-  - recommonmark
   - pandoc=1.19.2
-  - notedown
-  - numba>=v0.40.0
-  - sphinx-gallery
-  - nbsphinx>=0.3.4,<0.4
-  - nbconvert=5.4.0
   - tornado=5.1.1
-  - ipython
-  - ipykernel
-  - regex
-  - scipy=1.3.1
-  - scikit-learn=0.21.3
+  - sphinx=2.2.1
   - pip:
-    - pylint-quotes<0.2
-    - mxnet-cu100>=1.6.0b20191027
-    - sacremoses
-    - sentencepiece<0.2
+    - notedown==1.5.1
+    - sphinx-gallery==0.4.0
+    - recommonmark==0.6.0
+    - nbconvert==5.6.1
+    - nbsphinx>=0.3.4,<0.4
+    - ipython
+    - ipykernel
+    - numba==0.46
     - https://github.com/szha/mx-theme/tarball/master
     - seaborn
     - jieba
+    - scikit-learn==0.21.3
+    - cython
+    - pytest==5.2.3
+    - pytest-env==0.6.2
+    - pytest-cov==2.8.1
+    - pytest-xdist==1.30.0
+    - pylint==2.4.4
+    - pylint-quotes==0.2.1
+    - flaky==3.6.1
+    - flake8==3.7.9
+    - mock<3
+    - mxnet-cu100>=1.6.0b20191027,!=1.6.0b20191102
+    - scipy==1.3.2
+    - regex==2019.11.1 
+    - nltk==3.4.5
+    - sacremoses==0.0.35
+    - spacy==2.2.2
+    - sentencepiece==0.1.83
+    - sphinx-autodoc-typehints==1.7.0
diff --git a/env/gpu/py3-master.yml b/env/gpu/py3-master.yml
@@ -1,39 +1,39 @@
 channels:
   - conda-forge
 dependencies:
-  - python=3.6
-  - pip=18.1
-  - cython
+  - python=3.5
+  - pip
   - perl
-  - pylint=2.3.1
-  - flake8
-  - sphinx=2.1.2
-  - spacy>2
-  - nltk
-  - pytest=4.5.0
-  - pytest-env
-  - flaky=3.5.3
-  - pytest-cov=2.7.1
-  - mock<3
-  - pytest-xdist<2
-  - recommonmark
   - pandoc=1.19.2
-  - notedown
-  - numba>=v0.40.0
-  - sphinx-gallery
-  - nbsphinx>=0.3.4,<0.4
-  - nbconvert=5.4.0
   - tornado=5.1.1
-  - ipython
-  - ipykernel
-  - regex
-  - scipy=1.3.1
+  - sphinx=2.2.1
   - pip:
-    - pylint-quotes<0.2
-    - mxnet-cu100>=1.6.0b20191027,!=1.6.0b20191102
-    - sacremoses
-    - sentencepiece<0.2
+    - notedown==1.5.1
+    - sphinx-gallery==0.4.0
+    - recommonmark==0.6.0
+    - nbconvert==5.6.1
+    - nbsphinx>=0.3.4,<0.4
+    - ipython
+    - ipykernel
+    - numba==0.46
     - https://github.com/szha/mx-theme/tarball/master
     - seaborn
     - jieba
+    - cython
+    - pytest==5.2.3
+    - pytest-env==0.6.2
+    - pytest-cov==2.8.1
+    - pytest-xdist==1.30.0
+    - pylint==2.4.4
+    - pylint-quotes==0.2.1
+    - flaky==3.6.1
+    - flake8==3.7.9
+    - mock<3
+    - mxnet-cu100>=1.6.0b20191027,!=1.6.0b20191102
+    - scipy==1.3.2
+    - regex==2019.11.1 
+    - nltk==3.4.5
+    - sacremoses==0.0.35
+    - spacy==2.2.2
+    - sentencepiece==0.1.83
     - sphinx-autodoc-typehints==1.7.0
diff --git a/scripts/bert/conversion_tools/convert_paddle_to_gluon.py b/scripts/bert/conversion_tools/convert_paddle_to_gluon.py
@@ -132,7 +132,7 @@ def extract_weights(args):
         if 'w_0' in ernie_name:
             fluid_array = fluid_array.transpose()
         state_dict[gluon_name] = fluid_array
-        print(f'{ernie_name} -> {gluon_name} {fluid_array.shape}')
+        print('{} -> {} {}'.format(ernie_name, gluon_name, fluid_array.shape))
     print('extract weights done!'.center(60, '='))
     return state_dict
 

diff --git a/scripts/bert/data/pretrain.py b/scripts/bert/data/pretrain.py
@@ -19,6 +19,7 @@
 """Dataset for pre-training. """
 import logging
 
+from mxnet.gluon.data import DataLoader
 import gluonnlp as nlp
 from gluonnlp.data.batchify import Tuple, Stack, Pad
 try:
@@ -70,7 +71,6 @@ def __init__(self, num_ctxes, vocab):
                                   Stack())                          # valid_length
 
     def __call__(self, dataset, sampler):
-        from mxnet.gluon.data import DataLoader
         dataloader = DataLoader(dataset=dataset,
                                 batch_sampler=sampler,
                                 batchify_fn=self._batchify_fn,

diff --git a/scripts/bert/finetune_classifier.py b/scripts/bert/finetune_classifier.py
@@ -39,10 +39,10 @@
 import random
 import logging
 import warnings
-import multiprocessing
 import numpy as np
 import mxnet as mx
 from mxnet import gluon
+from mxnet.contrib.amp import amp
 import gluonnlp as nlp
 from gluonnlp.data import BERTTokenizer
 from gluonnlp.model import BERTClassifier, RoBERTaClassifier
@@ -208,19 +208,13 @@
 # data type with mixed precision training
 if args.dtype == 'float16':
     try:
-        from mxnet.contrib import amp # pylint: disable=ungrouped-imports
         # monkey patch amp list since topk does not support fp16
         amp.lists.symbol.FP32_FUNCS.append('topk')
         amp.lists.symbol.FP16_FP32_FUNCS.remove('topk')
         amp.init()
     except ValueError:
         # topk is already in the FP32_FUNCS list
         amp.init()
-    except ImportError:
-        # amp is not available
-        logging.info('Mixed precision training with float16 requires MXNet >= '
-                     '1.5.0b20190627. Please consider upgrading your MXNet version.')
-        exit()
 
 # model and loss
 only_inference = args.only_inference
@@ -294,8 +288,6 @@
 
 def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab, pad=False):
     """Train/eval Data preparation function."""
-    pool = multiprocessing.Pool()
-
     # transformation for data train and dev
     label_dtype = 'float32' if not task.class_labels else 'int32'
     trans = BERTDatasetTransform(tokenizer, max_len,
@@ -308,7 +300,7 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab,
     # data train
     # task.dataset_train returns (segment_name, dataset)
     train_tsv = task.dataset_train()[1]
-    data_train = mx.gluon.data.SimpleDataset(pool.map(trans, train_tsv))
+    data_train = mx.gluon.data.SimpleDataset(list(map(trans, train_tsv)))
     data_train_len = data_train.transform(
         lambda input_id, length, segment_id, label_id: length, lazy=False)
     # bucket sampler for training
@@ -336,7 +328,7 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab,
     dev_tsv_list = dev_tsv if isinstance(dev_tsv, list) else [dev_tsv]
     loader_dev_list = []
     for segment, data in dev_tsv_list:
-        data_dev = mx.gluon.data.SimpleDataset(pool.map(trans, data))
+        data_dev = mx.gluon.data.SimpleDataset(list(map(trans, data)))
         loader_dev = mx.gluon.data.DataLoader(
             data_dev,
             batch_size=dev_batch_size,
@@ -361,7 +353,7 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab,
     test_tsv_list = test_tsv if isinstance(test_tsv, list) else [test_tsv]
     loader_test_list = []
     for segment, data in test_tsv_list:
-        data_test = mx.gluon.data.SimpleDataset(pool.map(test_trans, data))
+        data_test = mx.gluon.data.SimpleDataset(list(map(test_trans, data)))
         loader_test = mx.gluon.data.DataLoader(
             data_test,
             batch_size=dev_batch_size,

diff --git a/scripts/bert/model/ner.py b/scripts/bert/model/ner.py
@@ -16,6 +16,8 @@
 # under the License.
 """Gluon model block for the named entity recognition task."""
 
+from contextlib import ExitStack
+
 import mxnet as mx
 from mxnet.gluon import Block, nn
 
@@ -91,7 +93,6 @@ def attach_prediction(data_loader, net, ctx, is_train):
         text_ids, token_types, valid_length, tag_ids, flag_nonnull_tag = \
             [x.astype('float32').as_in_context(ctx) for x in data]
 
-        from contextlib import ExitStack
         with ExitStack() as stack:
             if is_train:
                 stack.enter_context(mx.autograd.record())

diff --git a/scripts/bert/pretraining_utils.py b/scripts/bert/pretraining_utils.py
@@ -18,6 +18,7 @@
 """Utilities for pre-training."""
 import time
 import os
+import sys
 import logging
 import random
 import multiprocessing
@@ -437,4 +438,4 @@ def profile(curr_step, start_step, end_step, profile_name='profile.json',
         logging.info(mx.profiler.dumps())
         mx.profiler.dump()
         if early_exit:
-            exit()
+            sys.exit(0)
diff --git a/scripts/bert/run_pretraining.py b/scripts/bert/run_pretraining.py
@@ -29,6 +29,7 @@
 # pylint:disable=redefined-outer-name,logging-format-interpolation
 
 import os
+import sys
 import random
 import warnings
 import logging
@@ -183,10 +184,10 @@ def init_comm(backend):
     # backend specific implementation
     if backend == 'horovod':
         try:
-            import horovod.mxnet as hvd
+            import horovod.mxnet as hvd  # pylint: disable=import-outside-toplevel
         except ImportError:
             logging.info('horovod must be installed.')
-            exit()
+            sys.exit(1)
         hvd.init()
         store = None
         num_workers = hvd.size()

diff --git a/scripts/bert/utils.py b/scripts/bert/utils.py
@@ -16,6 +16,7 @@
 # under the License.
 """Utility functions for BERT."""
 
+import sys
 import logging
 import collections
 import hashlib
@@ -48,7 +49,7 @@ def get_hash(filename):
 
 def read_tf_checkpoint(path):
     """read tensorflow checkpoint"""
-    from tensorflow.python import pywrap_tensorflow
+    from tensorflow.python import pywrap_tensorflow  # pylint: disable=import-outside-toplevel
     tensors = {}
     reader = pywrap_tensorflow.NewCheckpointReader(path)
     var_to_shape_map = reader.get_variable_to_shape_map()
@@ -72,7 +73,7 @@ def profile(curr_step, start_step, end_step, profile_name='profile.json',
         logging.info(mx.profiler.dumps())
         mx.profiler.dump()
         if early_exit:
-            exit()
+            sys.exit(0)
 
 def load_text_vocab(vocab_file):
     """Loads a vocabulary file into a dictionary."""