[TVM] Fix Tvm Integration (dmlc#1502)

* fix-tvm-integration * update * update * update unittests.yml * udpate doc
PawelGlomski-Intel · Jan 23, 2021 · 4624e6b · 4624e6b
1 parent 09f3435
commit 4624e6b
Show file tree

Hide file tree

Showing 10 changed files with 94 additions and 61 deletions.
diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -55,7 +55,7 @@ jobs:
           python -m pip install --upgrade pip
           python -m pip install setuptools pytest pytest-cov contextvars
           python -m pip install --upgrade cython
-          python -m pip install --pre "mxnet>=2.0.0b20201206" -f https://dist.mxnet.io/python
+          python -m pip install --pre "mxnet>=2.0.0b20210121" -f https://dist.mxnet.io/python
           python -m pip install -U -e .[extras]
       - name: Build and Install TVM
         if: matrix.os == 'ubuntu-latest'

diff --git a/README.md b/README.md
@@ -34,16 +34,16 @@ First of all, install the latest MXNet. You may use the following commands:
 
 ```bash
 # Install the version with CUDA 10.1
-python3 -m pip install -U --pre "mxnet-cu101>=2.0.0b20201206" -f https://dist.mxnet.io/python
+python3 -m pip install -U --pre "mxnet-cu101>=2.0.0b20210121" -f https://dist.mxnet.io/python
 
 # Install the version with CUDA 10.2
-python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b20201206" -f https://dist.mxnet.io/python
+python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b20210121" -f https://dist.mxnet.io/python
 
 # Install the version with CUDA 11
-python3 -m pip install -U --pre "mxnet-cu110>=2.0.0b20201206" -f https://dist.mxnet.io/python
+python3 -m pip install -U --pre "mxnet-cu110>=2.0.0b20210121" -f https://dist.mxnet.io/python
 
 # Install the cpu-only version
-python3 -m pip install -U --pre "mxnet>=2.0.0b20201206" -f https://dist.mxnet.io/python
+python3 -m pip install -U --pre "mxnet>=2.0.0b20210121" -f https://dist.mxnet.io/python
 ```
 
 

diff --git a/src/gluonnlp/utils/misc.py b/src/gluonnlp/utils/misc.py
@@ -1,7 +1,7 @@
 __all__ = ['glob', 'file_line_number', 'md5sum', 'sha1sum', 'naming_convention',
            'logging_config', 'set_seed', 'sizeof_fmt', 'grouper', 'repeat',
            'parse_ctx', 'load_checksum_stats', 'download', 'check_version',
-           'init_comm', 'get_mxnet_visible_ctx', 'get_ec2_tvm_flags']
+           'init_comm', 'get_mxnet_visible_ctx']
 
 import os
 import sys
@@ -11,7 +11,7 @@
 import functools
 import uuid
 from types import ModuleType
-from typing import Optional, Tuple, Dict
+from typing import Optional, Tuple
 import numpy as np
 import hashlib
 import requests
@@ -584,46 +584,3 @@ def get_mxnet_visible_ctx():
         ctx_l = [mx.gpu(i) for i in range(num_gpus)]
     return ctx_l
 
-
-def get_ec2_tvm_flags() -> Dict[str, Dict]:
-    r"""Return the recommended flags for TVM compilation in AWS EC2 instances.
-
-    Including C4, C5, G4, P3.
-
-    For more details about AWS EC2 instances, refer to https://aws.amazon.com/ec2/instance-types/.
-
-    Returns
-    -------
-    info_dict
-        A dictionary that contains the mapping between instance type and the
-        corresponding compilation flags.
-        Each element includes:
-
-        - target
-            The compilation target
-        - use_gpu
-            Whether it's a GPU instance
-        - opt_level
-            The optimization level in compilation
-        - pass
-            Additional graph passes for further improvement.
-    """
-    instance_info = {
-        'g4': {'target': "cuda -model=t4 -libs=cublas,cudnn",
-               'use_gpu': True,
-               'opt_level': 3,
-               'required_pass': ["FastMath"]},
-        'c4': {'target': 'llvm -mcpu=core-avx2 -libs=cblas',
-               'use_gpu': False,
-               'opt_level': 3,
-               'required_pass': ["FastMath"]},
-        'c5': {'target': 'llvm -mcpu=skylake-avx512 -libs=cblas',
-               'use_gpu': False,
-               'opt_level': 3,
-               'required_pass': ["FastMath"]},
-        'p3': {'target': 'cuda -model=v100 -libs=cublas,cudnn',
-               'use_gpu': True,
-               'opt_level': 3,
-               'required_pass': ["FastMath"]}
-    }
-    return instance_info
diff --git a/src/gluonnlp/utils/tvm_utils.py b/src/gluonnlp/utils/tvm_utils.py
@@ -0,0 +1,75 @@
+__all__ = ['get_ec2_tvm_flags', 'update_tvm_convert_map']
+
+import tvm.relay.op as _op
+import tvm.relay.expr as _expr
+from typing import Dict
+from tvm.relay.frontend.mxnet import _convert_map
+from tvm.relay.frontend.common import infer_type as _infer_type
+
+def get_ec2_tvm_flags() -> Dict[str, Dict]:
+    r"""Return the recommended flags for TVM compilation in AWS EC2 instances.
+
+    Including C4, C5, G4, P3.
+
+    For more details about AWS EC2 instances, refer to https://aws.amazon.com/ec2/instance-types/.
+
+    Returns
+    -------
+    info_dict
+        A dictionary that contains the mapping between instance type and the
+        corresponding compilation flags.
+        Each element includes:
+
+        - target
+            The compilation target
+        - use_gpu
+            Whether it's a GPU instance
+        - opt_level
+            The optimization level in compilation
+        - pass
+            Additional graph passes for further improvement.
+    """
+    instance_info = {
+        'g4': {'target': "cuda -model=t4 -libs=cublas,cudnn",
+               'use_gpu': True,
+               'opt_level': 3,
+               'required_pass': ["FastMath"]},
+        'c4': {'target': 'llvm -mcpu=core-avx2 -libs=cblas',
+               'use_gpu': False,
+               'opt_level': 3,
+               'required_pass': ["FastMath"]},
+        'c5': {'target': 'llvm -mcpu=skylake-avx512 -libs=cblas',
+               'use_gpu': False,
+               'opt_level': 3,
+               'required_pass': ["FastMath"]},
+        'p3': {'target': 'cuda -model=v100 -libs=cublas,cudnn',
+               'use_gpu': True,
+               'opt_level': 3,
+               'required_pass': ["FastMath"]}
+    }
+    return instance_info
+
+
+def update_tvm_convert_map() -> None:
+    """A Monkey Patch to update convert map in tvm/relay/frontend/mxnet.py"""
+    op = (('masked_softmax', _mx_masked_softmax),)
+    _convert_map.update({key: value for key, value in op})
+
+
+def _mx_masked_softmax(inputs, attrs):
+    assert len(inputs) == 1 or len(inputs) == 2
+    axis = attrs.get_int("axis")
+    temperature = attrs.get_float("temperature")
+    if len(inputs) == 1:
+        result = _op.nn.softmax(inputs[0] / _expr.const(temperature), axis=axis)
+    else:
+        neg = -1e18
+        att_score, mask = inputs
+        att_score_dtype = _infer_type(att_score).checked_type.dtype
+        if att_score_dtype == "float16":
+            neg = -1e4
+        temp = _op.where(mask, 
+                         att_score,
+                         _expr.const(neg))
+        result = _op.multiply(_op.nn.softmax(temp / _expr.const(temperature), axis=axis), mask.astype("float32"))
+    return result
diff --git a/tests/test_models.py b/tests/test_models.py
@@ -5,7 +5,6 @@
 import numpy as np
 import numpy.testing as npt
 from gluonnlp.models import get_backbone, list_backbone_names
-from gluonnlp.utils.misc import get_ec2_tvm_flags
 from gluonnlp.utils.parameter import count_parameters
 from gluonnlp.utils.lazy_imports import try_import_tvm
 mx.npx.set_np()
@@ -74,11 +73,13 @@ def test_get_backbone(name, ctx):
 @pytest.mark.parametrize('layout', ['NT', 'TN'])
 @pytest.mark.skipif(not tvm_enabled(),
                     reason='TVM is not supported. So this test is skipped.')
-@pytest.mark.skip('TVM issue https://github.com/dmlc/gluon-nlp/issues/1425.')
+# @pytest.mark.skip('TVM issue https://github.com/dmlc/gluon-nlp/issues/1425.')
 def test_tvm_integration(model_name, batch_size, seq_length, layout, ctx):
     tvm = try_import_tvm()
     from tvm import relay
     from tvm.contrib import graph_runtime
+    from gluonnlp.utils.tvm_utils import get_ec2_tvm_flags, update_tvm_convert_map
+    update_tvm_convert_map()
     tvm_recommended_flags = get_ec2_tvm_flags()
     if ctx.device_type == 'gpu':
         flags = tvm_recommended_flags['g4']
@@ -162,11 +163,11 @@ def test_tvm_integration(model_name, batch_size, seq_length, layout, ctx):
             ctx = tvm.cpu()
         rt = graph_runtime.GraphModule(lib["default"](ctx))
         if 'bart' in model_name:
-            rt.set_input(data0=token_ids, data1=valid_length, data2=token_ids, data3=valid_length)
+            rt.set_input(data0=token_ids.asnumpy(), data1=valid_length.asnumpy(), data2=token_ids.asnumpy(), data3=valid_length.asnumpy())
         elif 'roberta' in model_name:
-            rt.set_input(data0=token_ids, data1=valid_length)
+            rt.set_input(data0=token_ids.asnumpy(), data1=valid_length.asnumpy())
         else:
-            rt.set_input(data0=token_ids, data1=token_types, data2=valid_length)
+            rt.set_input(data0=token_ids.asnumpy(), data1=token_types.asnumpy(), data2=valid_length.asnumpy())
         rt.run()
         for i in range(rt.get_num_outputs()):
             out = rt.get_output(i)

diff --git a/tools/docker/gluon_nlp_job.sh b/tools/docker/gluon_nlp_job.sh
@@ -23,15 +23,15 @@ git fetch origin $SOURCE_REF:working
 git checkout working
 
 if [ $DEVICE == "cpu" ]; then
-  python3 -m pip install -U --quiet --pre "mxnet>=2.0.0b20201206" -f https://dist.mxnet.io/python --user
+  python3 -m pip install -U --quiet --pre "mxnet>=2.0.0b20210121" -f https://dist.mxnet.io/python --user
 else
   # Due to the issue in https://forums.aws.amazon.com/thread.jspa?messageID=953912
   # We need to manually configure the shm to ensure that Horovod is runnable.
   # The reason that we need a larger shm is described in https://github.com/NVIDIA/nccl/issues/290
   umount shm
   mount -t tmpfs -o rw,nosuid,nodev,noexec,relatime,size=2G shm /dev/shm
   sudo python3 -m pip uninstall --quiet mxnet-cu102
-  python3 -m pip install -U --quiet --pre "mxnet-cu102>=2.0.0b20201206" -f https://dist.mxnet.io/python/cu102 --user
+  python3 -m pip install -U --quiet --pre "mxnet-cu102>=2.0.0b20210121" -f https://dist.mxnet.io/python/cu102 --user
 fi
 
 python3 -m pip install --quiet -e .[extras]

diff --git a/tools/docker/install/install_tvm_cpu.sh b/tools/docker/install/install_tvm_cpu.sh
@@ -24,7 +24,7 @@ cd ${WORKDIR}
 git clone https://github.com/apache/incubator-tvm tvm --recursive
 cd ${WORKDIR}/tvm
 # checkout a hash-tag
-git checkout ef6e52f191888ee2a5f2221bde3b69391766903f
+git checkout 790344c6ef035947caaaf1cd812ade8d862802aa
 
 mkdir -p build
 cp cmake/config.cmake build

diff --git a/tools/docker/install/install_tvm_gpu.sh b/tools/docker/install/install_tvm_gpu.sh
@@ -24,7 +24,7 @@ cd ${WORKDIR}
 git clone https://github.com/apache/incubator-tvm tvm --recursive
 cd ${WORKDIR}/tvm
 # checkout a hash-tag
-git checkout ef6e52f191888ee2a5f2221bde3b69391766903f
+git checkout 790344c6ef035947caaaf1cd812ade8d862802aa
 
 
 mkdir -p build

diff --git a/tools/docker/ubuntu18.04-cpu.Dockerfile b/tools/docker/ubuntu18.04-cpu.Dockerfile
@@ -33,7 +33,7 @@ RUN bash /install/install_python_packages.sh
 RUN bash /install/install_tvm_cpu.sh
 
 # Install MXNet
-RUN python3 -m pip install -U --pre "mxnet>=2.0.0b20201206" -f https://dist.mxnet.io/python --user
+RUN python3 -m pip install -U --pre "mxnet>=2.0.0b20210121" -f https://dist.mxnet.io/python --user
 
 # Install PyTorch
 RUN python3 -m pip install -U torch torchvision --user

diff --git a/tools/docker/ubuntu18.04-gpu.Dockerfile b/tools/docker/ubuntu18.04-gpu.Dockerfile
@@ -32,7 +32,7 @@ RUN bash /install/install_python_packages.sh
 RUN bash /install/install_tvm_gpu.sh
 
 # Install MXNet
-RUN python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b20201206" -f https://dist.mxnet.io/python --user
+RUN python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b20210121" -f https://dist.mxnet.io/python --user
 
 # Install PyTorch
 RUN python3 -m pip install -U torch torchvision --user