Skip to content

Commit

Permalink
[TVM] Fix Tvm Integration (dmlc#1502)
Browse files Browse the repository at this point in the history
* fix-tvm-integration

* update

* update

* update unittests.yml

* udpate doc
  • Loading branch information
barry-jin authored Jan 23, 2021
1 parent 09f3435 commit 4624e6b
Show file tree
Hide file tree
Showing 10 changed files with 94 additions and 61 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
python -m pip install --upgrade pip
python -m pip install setuptools pytest pytest-cov contextvars
python -m pip install --upgrade cython
python -m pip install --pre "mxnet>=2.0.0b20201206" -f https://dist.mxnet.io/python
python -m pip install --pre "mxnet>=2.0.0b20210121" -f https://dist.mxnet.io/python
python -m pip install -U -e .[extras]
- name: Build and Install TVM
if: matrix.os == 'ubuntu-latest'
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,16 @@ First of all, install the latest MXNet. You may use the following commands:

```bash
# Install the version with CUDA 10.1
python3 -m pip install -U --pre "mxnet-cu101>=2.0.0b20201206" -f https://dist.mxnet.io/python
python3 -m pip install -U --pre "mxnet-cu101>=2.0.0b20210121" -f https://dist.mxnet.io/python

# Install the version with CUDA 10.2
python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b20201206" -f https://dist.mxnet.io/python
python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b20210121" -f https://dist.mxnet.io/python

# Install the version with CUDA 11
python3 -m pip install -U --pre "mxnet-cu110>=2.0.0b20201206" -f https://dist.mxnet.io/python
python3 -m pip install -U --pre "mxnet-cu110>=2.0.0b20210121" -f https://dist.mxnet.io/python

# Install the cpu-only version
python3 -m pip install -U --pre "mxnet>=2.0.0b20201206" -f https://dist.mxnet.io/python
python3 -m pip install -U --pre "mxnet>=2.0.0b20210121" -f https://dist.mxnet.io/python
```


Expand Down
47 changes: 2 additions & 45 deletions src/gluonnlp/utils/misc.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
__all__ = ['glob', 'file_line_number', 'md5sum', 'sha1sum', 'naming_convention',
'logging_config', 'set_seed', 'sizeof_fmt', 'grouper', 'repeat',
'parse_ctx', 'load_checksum_stats', 'download', 'check_version',
'init_comm', 'get_mxnet_visible_ctx', 'get_ec2_tvm_flags']
'init_comm', 'get_mxnet_visible_ctx']

import os
import sys
Expand All @@ -11,7 +11,7 @@
import functools
import uuid
from types import ModuleType
from typing import Optional, Tuple, Dict
from typing import Optional, Tuple
import numpy as np
import hashlib
import requests
Expand Down Expand Up @@ -584,46 +584,3 @@ def get_mxnet_visible_ctx():
ctx_l = [mx.gpu(i) for i in range(num_gpus)]
return ctx_l


def get_ec2_tvm_flags() -> Dict[str, Dict]:
r"""Return the recommended flags for TVM compilation in AWS EC2 instances.
Including C4, C5, G4, P3.
For more details about AWS EC2 instances, refer to https://aws.amazon.com/ec2/instance-types/.
Returns
-------
info_dict
A dictionary that contains the mapping between instance type and the
corresponding compilation flags.
Each element includes:
- target
The compilation target
- use_gpu
Whether it's a GPU instance
- opt_level
The optimization level in compilation
- pass
Additional graph passes for further improvement.
"""
instance_info = {
'g4': {'target': "cuda -model=t4 -libs=cublas,cudnn",
'use_gpu': True,
'opt_level': 3,
'required_pass': ["FastMath"]},
'c4': {'target': 'llvm -mcpu=core-avx2 -libs=cblas',
'use_gpu': False,
'opt_level': 3,
'required_pass': ["FastMath"]},
'c5': {'target': 'llvm -mcpu=skylake-avx512 -libs=cblas',
'use_gpu': False,
'opt_level': 3,
'required_pass': ["FastMath"]},
'p3': {'target': 'cuda -model=v100 -libs=cublas,cudnn',
'use_gpu': True,
'opt_level': 3,
'required_pass': ["FastMath"]}
}
return instance_info
75 changes: 75 additions & 0 deletions src/gluonnlp/utils/tvm_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
__all__ = ['get_ec2_tvm_flags', 'update_tvm_convert_map']

import tvm.relay.op as _op
import tvm.relay.expr as _expr
from typing import Dict
from tvm.relay.frontend.mxnet import _convert_map
from tvm.relay.frontend.common import infer_type as _infer_type

def get_ec2_tvm_flags() -> Dict[str, Dict]:
r"""Return the recommended flags for TVM compilation in AWS EC2 instances.
Including C4, C5, G4, P3.
For more details about AWS EC2 instances, refer to https://aws.amazon.com/ec2/instance-types/.
Returns
-------
info_dict
A dictionary that contains the mapping between instance type and the
corresponding compilation flags.
Each element includes:
- target
The compilation target
- use_gpu
Whether it's a GPU instance
- opt_level
The optimization level in compilation
- pass
Additional graph passes for further improvement.
"""
instance_info = {
'g4': {'target': "cuda -model=t4 -libs=cublas,cudnn",
'use_gpu': True,
'opt_level': 3,
'required_pass': ["FastMath"]},
'c4': {'target': 'llvm -mcpu=core-avx2 -libs=cblas',
'use_gpu': False,
'opt_level': 3,
'required_pass': ["FastMath"]},
'c5': {'target': 'llvm -mcpu=skylake-avx512 -libs=cblas',
'use_gpu': False,
'opt_level': 3,
'required_pass': ["FastMath"]},
'p3': {'target': 'cuda -model=v100 -libs=cublas,cudnn',
'use_gpu': True,
'opt_level': 3,
'required_pass': ["FastMath"]}
}
return instance_info


def update_tvm_convert_map() -> None:
"""A Monkey Patch to update convert map in tvm/relay/frontend/mxnet.py"""
op = (('masked_softmax', _mx_masked_softmax),)
_convert_map.update({key: value for key, value in op})


def _mx_masked_softmax(inputs, attrs):
assert len(inputs) == 1 or len(inputs) == 2
axis = attrs.get_int("axis")
temperature = attrs.get_float("temperature")
if len(inputs) == 1:
result = _op.nn.softmax(inputs[0] / _expr.const(temperature), axis=axis)
else:
neg = -1e18
att_score, mask = inputs
att_score_dtype = _infer_type(att_score).checked_type.dtype
if att_score_dtype == "float16":
neg = -1e4
temp = _op.where(mask,
att_score,
_expr.const(neg))
result = _op.multiply(_op.nn.softmax(temp / _expr.const(temperature), axis=axis), mask.astype("float32"))
return result
11 changes: 6 additions & 5 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import numpy as np
import numpy.testing as npt
from gluonnlp.models import get_backbone, list_backbone_names
from gluonnlp.utils.misc import get_ec2_tvm_flags
from gluonnlp.utils.parameter import count_parameters
from gluonnlp.utils.lazy_imports import try_import_tvm
mx.npx.set_np()
Expand Down Expand Up @@ -74,11 +73,13 @@ def test_get_backbone(name, ctx):
@pytest.mark.parametrize('layout', ['NT', 'TN'])
@pytest.mark.skipif(not tvm_enabled(),
reason='TVM is not supported. So this test is skipped.')
@pytest.mark.skip('TVM issue https://github.com/dmlc/gluon-nlp/issues/1425.')
# @pytest.mark.skip('TVM issue https://github.com/dmlc/gluon-nlp/issues/1425.')
def test_tvm_integration(model_name, batch_size, seq_length, layout, ctx):
tvm = try_import_tvm()
from tvm import relay
from tvm.contrib import graph_runtime
from gluonnlp.utils.tvm_utils import get_ec2_tvm_flags, update_tvm_convert_map
update_tvm_convert_map()
tvm_recommended_flags = get_ec2_tvm_flags()
if ctx.device_type == 'gpu':
flags = tvm_recommended_flags['g4']
Expand Down Expand Up @@ -162,11 +163,11 @@ def test_tvm_integration(model_name, batch_size, seq_length, layout, ctx):
ctx = tvm.cpu()
rt = graph_runtime.GraphModule(lib["default"](ctx))
if 'bart' in model_name:
rt.set_input(data0=token_ids, data1=valid_length, data2=token_ids, data3=valid_length)
rt.set_input(data0=token_ids.asnumpy(), data1=valid_length.asnumpy(), data2=token_ids.asnumpy(), data3=valid_length.asnumpy())
elif 'roberta' in model_name:
rt.set_input(data0=token_ids, data1=valid_length)
rt.set_input(data0=token_ids.asnumpy(), data1=valid_length.asnumpy())
else:
rt.set_input(data0=token_ids, data1=token_types, data2=valid_length)
rt.set_input(data0=token_ids.asnumpy(), data1=token_types.asnumpy(), data2=valid_length.asnumpy())
rt.run()
for i in range(rt.get_num_outputs()):
out = rt.get_output(i)
Expand Down
4 changes: 2 additions & 2 deletions tools/docker/gluon_nlp_job.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ git fetch origin $SOURCE_REF:working
git checkout working

if [ $DEVICE == "cpu" ]; then
python3 -m pip install -U --quiet --pre "mxnet>=2.0.0b20201206" -f https://dist.mxnet.io/python --user
python3 -m pip install -U --quiet --pre "mxnet>=2.0.0b20210121" -f https://dist.mxnet.io/python --user
else
# Due to the issue in https://forums.aws.amazon.com/thread.jspa?messageID=953912
# We need to manually configure the shm to ensure that Horovod is runnable.
# The reason that we need a larger shm is described in https://github.com/NVIDIA/nccl/issues/290
umount shm
mount -t tmpfs -o rw,nosuid,nodev,noexec,relatime,size=2G shm /dev/shm
sudo python3 -m pip uninstall --quiet mxnet-cu102
python3 -m pip install -U --quiet --pre "mxnet-cu102>=2.0.0b20201206" -f https://dist.mxnet.io/python/cu102 --user
python3 -m pip install -U --quiet --pre "mxnet-cu102>=2.0.0b20210121" -f https://dist.mxnet.io/python/cu102 --user
fi

python3 -m pip install --quiet -e .[extras]
Expand Down
2 changes: 1 addition & 1 deletion tools/docker/install/install_tvm_cpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ cd ${WORKDIR}
git clone https://github.com/apache/incubator-tvm tvm --recursive
cd ${WORKDIR}/tvm
# checkout a hash-tag
git checkout ef6e52f191888ee2a5f2221bde3b69391766903f
git checkout 790344c6ef035947caaaf1cd812ade8d862802aa

mkdir -p build
cp cmake/config.cmake build
Expand Down
2 changes: 1 addition & 1 deletion tools/docker/install/install_tvm_gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ cd ${WORKDIR}
git clone https://github.com/apache/incubator-tvm tvm --recursive
cd ${WORKDIR}/tvm
# checkout a hash-tag
git checkout ef6e52f191888ee2a5f2221bde3b69391766903f
git checkout 790344c6ef035947caaaf1cd812ade8d862802aa


mkdir -p build
Expand Down
2 changes: 1 addition & 1 deletion tools/docker/ubuntu18.04-cpu.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ RUN bash /install/install_python_packages.sh
RUN bash /install/install_tvm_cpu.sh

# Install MXNet
RUN python3 -m pip install -U --pre "mxnet>=2.0.0b20201206" -f https://dist.mxnet.io/python --user
RUN python3 -m pip install -U --pre "mxnet>=2.0.0b20210121" -f https://dist.mxnet.io/python --user

# Install PyTorch
RUN python3 -m pip install -U torch torchvision --user
Expand Down
2 changes: 1 addition & 1 deletion tools/docker/ubuntu18.04-gpu.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ RUN bash /install/install_python_packages.sh
RUN bash /install/install_tvm_gpu.sh

# Install MXNet
RUN python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b20201206" -f https://dist.mxnet.io/python --user
RUN python3 -m pip install -U --pre "mxnet-cu102>=2.0.0b20210121" -f https://dist.mxnet.io/python --user

# Install PyTorch
RUN python3 -m pip install -U torch torchvision --user
Expand Down

0 comments on commit 4624e6b

Please sign in to comment.