diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a5250c1ffc74..dd85ef2a5d17 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -34,53 +34,55 @@ jobs:
Build:
strategy:
matrix:
- os: [windows-latest, macOS-latest]
+ os: [windows-2016, macOS-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- - uses: actions/setup-python@v2
- - name: Lint Python
- if: matrix.os == 'macOS-latest'
- run: |
- pip install flake8
- flake8 . --count --select=E9,F63,F7 --show-source --statistics
- name: Initialize submodules
run: git submodule update --recursive --init
-
- - name: Make Build Directory
- run: cmake -E make_directory build.common
-
- # configuration for Windows
- - name: CMake@Win
- if: matrix.os == 'windows-latest'
- working-directory: build.common
+ - name: Lint Python
+ if: startsWith(matrix.os, 'macOS')
+ run: |
+ python3 -m pip install flake8
+ python3 -m flake8 . --count --select=E9,F63,F7 --show-source --statistics
+ - uses: actions/cache@v1
+ env:
+ CACHE_NUMBER: 0
+ with:
+ path: ~/conda_pkgs_dir
+ key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('conda/build-environment.yaml') }}
+ - uses: conda-incubator/setup-miniconda@v2
+ with:
+ activate-environment: tvm-build
+ channel-priority: strict
+ environment-file: conda/build-environment.yaml
+ auto-activate-base: false
+ use-only-tar-bz2: true
+ - name: Conda info
+ run: |
+ conda info
+ conda list
+ - name: Conda-Build@Win
+ if: startsWith(matrix.os, 'windows')
+ shell: cmd /C call {0}
run: >-
- cmake
- -DUSE_SORT=ON
- -DUSE_RPC=ON
- -DUSE_GRAPH_RUNTIME=ON
- -DCMAKE_BUILD_TYPE=Release
- -DCMAKE_CONFIGURATION_TYPES="Release"
- ..
-
- # configuration for Mac
- - name: CMake@MacOS
- if: matrix.os == 'macOS-latest'
- working-directory: build.common
+ conda build --output-folder=conda/pkg conda/recipe &&
+ conda install tvm -c ./conda/pkg
+ - name: Conda-Build@MacOS
+ if: startsWith(matrix.os, 'macOS')
+ shell: bash -l {0}
run: >-
- cmake
- "-DUSE_SORT=ON"
- "-DUSE_RPC=ON"
- "-DUSE_GRAPH_RUNTIME=ON"
- "-DUSE_METAL=ON"
- ..
-
- - name: Build@Win
- if: matrix.os == 'windows-latest'
- run: cmake --build build.common --config Release -- /m
-
- - name: Build@MacOS
- if: matrix.os == 'macOS-latest'
- run: cmake --build build.common --config Release -j3
+ conda build --output-folder=conda/pkg conda/recipe &&
+ conda install tvm -c ./conda/pkg
+ - name: Test@Win
+ if: startsWith(matrix.os, 'windows')
+ shell: cmd /C call {0}
+ run: >-
+ python -m pytest -v tests/python/all-platform-minimal-test
+ - name: Test@MacOS
+ if: startsWith(matrix.os, 'macOS')
+ shell: bash -l {0}
+ run: >-
+ python -m pytest -v tests/python/all-platform-minimal-test
diff --git a/.gitignore b/.gitignore
index 77c593ca2ab8..cdcf6780a3f2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,7 +24,7 @@ var/
*.egg-info/
.installed.cfg
*.egg
-
+.conda/
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
diff --git a/3rdparty/vta-hw b/3rdparty/vta-hw
index 87ce9acfae55..12fb486a491b 160000
--- a/3rdparty/vta-hw
+++ b/3rdparty/vta-hw
@@ -1 +1 @@
-Subproject commit 87ce9acfae550d1a487746e9d06c2e250076e54c
+Subproject commit 12fb486a491b75d70ec4c5e0a0cd112ab49a95bc
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e24bbeb5acd8..8fe416e9de93 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,13 +2,13 @@ cmake_minimum_required(VERSION 3.2)
project(tvm C CXX)
# Utility functions
-include(cmake/util/Util.cmake)
-include(cmake/util/FindCUDA.cmake)
-include(cmake/util/FindOpenCL.cmake)
-include(cmake/util/FindVulkan.cmake)
-include(cmake/util/FindLLVM.cmake)
-include(cmake/util/FindROCM.cmake)
-include(cmake/util/FindEthosN.cmake)
+include(cmake/utils/Utils.cmake)
+include(cmake/utils/FindCUDA.cmake)
+include(cmake/utils/FindOpenCL.cmake)
+include(cmake/utils/FindVulkan.cmake)
+include(cmake/utils/FindLLVM.cmake)
+include(cmake/utils/FindROCM.cmake)
+include(cmake/utils/FindEthosN.cmake)
if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/config.cmake)
include(${CMAKE_CURRENT_BINARY_DIR}/config.cmake)
@@ -79,6 +79,10 @@ tvm_option(USE_COREML "Build with coreml support" OFF)
tvm_option(USE_TARGET_ONNX "Build with ONNX Codegen support" OFF)
tvm_option(USE_ARM_COMPUTE_LIB "Build with Arm Compute Library" OFF)
tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME "Build with Arm Compute Library graph runtime" OFF)
+tvm_option(USE_TENSORRT_CODEGEN "Build with TensorRT Codegen support" OFF)
+tvm_option(USE_TENSORRT_RUNTIME "Build with TensorRT runtime" OFF)
+tvm_option(USE_RUST_EXT "Build with Rust based compiler extensions, STATIC, DYNAMIC, or OFF" OFF)
+tvm_option(USE_VITIS_AI "Build with VITIS-AI Codegen support" OFF)
# include directories
include_directories(${CMAKE_INCLUDE_PATH})
@@ -100,6 +104,8 @@ if(MSVC)
add_definitions(-D_SCL_SECURE_NO_WARNINGS)
add_definitions(-D_ENABLE_EXTENDED_ALIGNED_STORAGE)
add_definitions(-DNOMINMAX)
+ # regeneration does not work well with msbuild custom rules.
+ set(CMAKE_SUPPRESS_REGENERATION ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /bigobj")
@@ -361,8 +367,12 @@ include(cmake/modules/contrib/TF_TVMDSOOP.cmake)
include(cmake/modules/contrib/CoreML.cmake)
include(cmake/modules/contrib/ONNX.cmake)
include(cmake/modules/contrib/ArmComputeLib.cmake)
+include(cmake/modules/contrib/TensorRT.cmake)
+include(cmake/modules/contrib/VitisAI.cmake)
+include(cmake/modules/contrib/Verilator.cmake)
include(cmake/modules/Git.cmake)
include(cmake/modules/LibInfo.cmake)
+include(cmake/modules/RustExt.cmake)
include(CheckCXXCompilerFlag)
if(NOT MSVC)
@@ -400,23 +410,23 @@ endif()
if(USE_RELAY_DEBUG)
message(STATUS "Building Relay in debug mode...")
- set_target_properties(tvm_objs PROPERTIES COMPILE_DEFINITIONS "USE_RELAY_DEBUG")
- set_target_properties(tvm_objs PROPERTIES COMPILE_DEFINITIONS "DMLC_LOG_DEBUG")
+ target_compile_definitions(tvm_objs PRIVATE "USE_RELAY_DEBUG")
+ target_compile_definitions(tvm_objs PRIVATE "DMLC_LOG_DEBUG")
else()
- set_target_properties(tvm_objs PROPERTIES COMPILE_DEFINITIONS "NDEBUG")
+ target_compile_definitions(tvm_objs PRIVATE "NDEBUG")
endif(USE_RELAY_DEBUG)
if(USE_FALLBACK_STL_MAP)
message(STATUS "Building with STL Map...")
- set_target_properties(tvm_objs PROPERTIES COMPILE_DEFINITIONS "USE_FALLBACK_STL_MAP=1")
+ target_compile_definitions(tvm_objs PRIVATE "USE_FALLBACK_STL_MAP=1")
else()
message(STATUS "Building with TVM Map...")
- set_target_properties(tvm_objs PROPERTIES COMPILE_DEFINITIONS "USE_FALLBACK_STL_MAP=0")
+ target_compile_definitions(tvm_objs PRIVATE "USE_FALLBACK_STL_MAP=0")
endif(USE_FALLBACK_STL_MAP)
if(BUILD_FOR_HEXAGON)
# Wrap pthread_create to allow setting custom stack size.
- set_target_properties(tvm_runtime PROPERTIES LINK_FLAGS
+ set_property(TARGET tvm_runtime APPEND PROPERTY LINK_FLAGS
"-Wl,--wrap=pthread_create")
target_include_directories(tvm_runtime SYSTEM
@@ -483,7 +493,7 @@ if(GTEST_INCLUDE_DIR AND GTEST_LIB)
add_executable(${__execname} ${__srcpath})
list(APPEND TEST_EXECS ${__execname})
target_include_directories(${__execname} SYSTEM PUBLIC ${GTEST_INCLUDE_DIR})
- target_link_libraries(${__execname} ${TVM_TEST_LIBRARY_NAME} ${GTEST_LIB} pthread dl)
+ target_link_libraries(${__execname} PRIVATE ${TVM_TEST_LIBRARY_NAME} ${GTEST_LIB} pthread dl)
set_target_properties(${__execname} PROPERTIES EXCLUDE_FROM_ALL 1)
set_target_properties(${__execname} PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD 1)
endforeach()
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 9b2faf78d8bc..650d1bc40e6d 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -40,6 +40,8 @@ We add tag along with committer name to show areas that they are familiar with.
We do encourage everyone to work anything they are interested in.
- [Aditya Atluri](https://github.com/adityaatluri): @adityaatluri - rocm
+- [Matthew Barrett](https://github.com/mbaret): @mbaret - byoc, arm
+- [Matthew Brookhart](https://github.com/mbrookhart): @mbrookhart - relay, frontends
- [Tianqi Chen](https://github.com/tqchen) (PPMC): @tqchen - topi, compiler, relay, docs
- [Liangfu Chen](https://github.com/liangfu): @liangfu - vta, chisel, intel FPGA, c runtime
- [Wei Chen](https://github.com/wweic): @wweic - runtime, relay, vm
@@ -59,6 +61,7 @@ We do encourage everyone to work anything they are interested in.
- [Jared Roesch](https://github.com/jroesch) (PPMC): @jroesch - relay
- [Siju Samuel](https://github.com/siju-samuel): @siju-samuel - frontends
- [Siva](https://github.com/srkreddy1238): @srkreddy1238 - frontends, golang
+- [Junru Shao](https://github.com/junrushao1994) @junrushao1994 - relay, compiler
- [Haichen Shen](https://github.com/icemelon9) (PPMC): @icemelon9 - relay, topi
- [Zhixun Tan](https://github.com/phisiart): @phisiart - opengl, web
- [Andrew Tulloch](https://github.com/ajtulloch): @ajtulloch - topi, compiler, runtime
@@ -136,7 +139,7 @@ We do encourage everyone to work anything they are interested in.
- [Lianmin Zheng](https://github.com/merrymercy): @merrymercy
## List of Contributors
-- [Full List of Contributors](https://github.com/apache/incubator-tvm/graphs/contributors)
+- [Full List of Contributors](https://github.com/apache/tvm/graphs/contributors)
- To contributors: please add your name to the list.
- [Qiao Zhang](https://github.com/zhangqiaorjc)
- [Haolong Zhang](https://github.com/haolongzhangm)
diff --git a/Jenkinsfile b/Jenkinsfile
index 207d12c21d6d..feea8c2f9489 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -45,11 +45,12 @@
// NOTE: these lines are scanned by docker/dev_common.sh. Please update the regex as needed. -->
ci_lint = "tlcpack/ci-lint:v0.62"
-ci_gpu = "tlcpack/ci-gpu:v0.64"
-ci_cpu = "tlcpack/ci-cpu:v0.66"
-ci_wasm = "tlcpack/ci-wasm:v0.60"
-ci_i386 = "tlcpack/ci-i386:v0.52"
+ci_gpu = "tlcpack/ci-gpu:v0.72"
+ci_cpu = "tlcpack/ci-cpu:v0.71"
+ci_wasm = "tlcpack/ci-wasm:v0.70"
+ci_i386 = "tlcpack/ci-i386:v0.71"
ci_qemu = "tlcpack/ci-qemu:v0.01"
+ci_arm = "tlcpack/ci-arm:v0.01"
// <--- End of regex-scanned config.
// tvm libraries
@@ -180,11 +181,12 @@ stage('Build') {
make(ci_cpu, 'build', '-j2')
pack_lib('cpu', tvm_multilib)
timeout(time: max_time, unit: 'MINUTES') {
+ sh "${docker_run} ${ci_cpu} ./tests/scripts/task_ci_python_setup.sh"
sh "${docker_run} ${ci_cpu} ./tests/scripts/task_python_unittest.sh"
sh "${docker_run} ${ci_cpu} ./tests/scripts/task_python_integration.sh"
sh "${docker_run} ${ci_cpu} ./tests/scripts/task_python_vta_fsim.sh"
sh "${docker_run} ${ci_cpu} ./tests/scripts/task_python_vta_tsim.sh"
- sh "${docker_run} ${ci_cpu} ./tests/scripts/task_golang.sh"
+ // sh "${docker_run} ${ci_cpu} ./tests/scripts/task_golang.sh"
sh "${docker_run} ${ci_cpu} ./tests/scripts/task_rust.sh"
}
}
@@ -197,6 +199,7 @@ stage('Build') {
sh "${docker_run} ${ci_wasm} ./tests/scripts/task_config_build_wasm.sh"
make(ci_wasm, 'build', '-j2')
timeout(time: max_time, unit: 'MINUTES') {
+ sh "${docker_run} ${ci_wasm} ./tests/scripts/task_ci_python_setup.sh"
sh "${docker_run} ${ci_wasm} ./tests/scripts/task_web_wasm.sh"
}
}
@@ -212,6 +215,16 @@ stage('Build') {
}
}
},
+ // 'BUILD : arm': {
+ // node('ARM') {
+ // ws(per_exec_ws("tvm/build-arm")) {
+ // init_git()
+ // sh "${docker_run} ${ci_arm} ./tests/scripts/task_config_build_arm.sh"
+ // make(ci_arm, 'build', '-j4')
+ // pack_lib('arm', tvm_multilib)
+ // }
+ // }
+ // },
'BUILD: QEMU': {
node('CPU') {
ws(per_exec_ws("tvm/build-qemu")) {
@@ -219,6 +232,7 @@ stage('Build') {
sh "${docker_run} ${ci_qemu} ./tests/scripts/task_config_build_qemu.sh"
make(ci_qemu, 'build', '-j2')
timeout(time: max_time, unit: 'MINUTES') {
+ sh "${docker_run} ${ci_qemu} ./tests/scripts/task_ci_python_setup.sh"
sh "${docker_run} ${ci_qemu} ./tests/scripts/task_python_microtvm.sh"
}
}
@@ -233,6 +247,7 @@ stage('Unit Test') {
init_git()
unpack_lib('gpu', tvm_multilib)
timeout(time: max_time, unit: 'MINUTES') {
+ sh "${docker_run} ${ci_gpu} ./tests/scripts/task_ci_python_setup.sh"
sh "${docker_run} ${ci_gpu} ./tests/scripts/task_sphinx_precheck.sh"
sh "${docker_run} ${ci_gpu} ./tests/scripts/task_python_unittest_gpuonly.sh"
sh "${docker_run} ${ci_gpu} ./tests/scripts/task_python_integration_gpuonly.sh"
@@ -246,6 +261,7 @@ stage('Unit Test') {
init_git()
unpack_lib('i386', tvm_multilib)
timeout(time: max_time, unit: 'MINUTES') {
+ sh "${docker_run} ${ci_i386} ./tests/scripts/task_ci_python_setup.sh"
sh "${docker_run} ${ci_i386} ./tests/scripts/task_python_unittest.sh"
sh "${docker_run} ${ci_i386} ./tests/scripts/task_python_integration.sh"
sh "${docker_run} ${ci_i386} ./tests/scripts/task_python_vta_fsim.sh"
@@ -253,12 +269,26 @@ stage('Unit Test') {
}
}
},
+ // 'python3: arm': {
+ // node('ARM') {
+ // ws(per_exec_ws("tvm/ut-python-arm")) {
+ // init_git()
+ // unpack_lib('arm', tvm_multilib)
+ // timeout(time: max_time, unit: 'MINUTES') {
+ // sh "${docker_run} ${ci_arm} ./tests/scripts/task_ci_python_setup.sh"
+ // sh "${docker_run} ${ci_arm} ./tests/scripts/task_python_unittest.sh"
+ // // sh "${docker_run} ${ci_arm} ./tests/scripts/task_python_integration.sh"
+ // }
+ // }
+ // }
+ // },
'java: GPU': {
node('GPU') {
ws(per_exec_ws("tvm/ut-java")) {
init_git()
unpack_lib('gpu', tvm_multilib)
timeout(time: max_time, unit: 'MINUTES') {
+ sh "${docker_run} ${ci_gpu} ./tests/scripts/task_ci_python_setup.sh"
sh "${docker_run} ${ci_gpu} ./tests/scripts/task_java_unittest.sh"
}
}
@@ -273,6 +303,7 @@ stage('Integration Test') {
init_git()
unpack_lib('gpu', tvm_multilib)
timeout(time: max_time, unit: 'MINUTES') {
+ sh "${docker_run} ${ci_gpu} ./tests/scripts/task_ci_python_setup.sh"
sh "${docker_run} ${ci_gpu} ./tests/scripts/task_python_topi.sh"
}
}
@@ -284,6 +315,7 @@ stage('Integration Test') {
init_git()
unpack_lib('gpu', tvm_multilib)
timeout(time: max_time, unit: 'MINUTES') {
+ sh "${docker_run} ${ci_gpu} ./tests/scripts/task_ci_python_setup.sh"
sh "${docker_run} ${ci_gpu} ./tests/scripts/task_python_frontend.sh"
}
}
@@ -295,6 +327,7 @@ stage('Integration Test') {
init_git()
unpack_lib('cpu', tvm_multilib)
timeout(time: max_time, unit: 'MINUTES') {
+ sh "${docker_run} ${ci_gpu} ./tests/scripts/task_ci_python_setup.sh"
sh "${docker_run} ${ci_cpu} ./tests/scripts/task_python_frontend_cpu.sh"
}
}
@@ -307,12 +340,12 @@ stage('Integration Test') {
// init_git()
// unpack_lib('gpu', tvm_multilib)
// timeout(time: max_time, unit: 'MINUTES') {
+ // sh "${docker_run} ${ci_gpu} ./tests/scripts/task_ci_python_setup.sh"
// sh "${docker_run} ${ci_gpu} ./tests/scripts/task_python_docs.sh"
// }
// pack_lib('mydocs', 'docs.tgz')
// }
// }
- // }
}
/*
diff --git a/README.md b/README.md
index 6c82b1585c45..b3a3e850adb2 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@
-
Open Deep Learning Compiler Stack
+
Open Deep Learning Compiler Stack
==============================================
[Documentation](https://tvm.apache.org/docs) |
[Contributors](CONTRIBUTORS.md) |
@@ -23,7 +23,7 @@
[Release Notes](NEWS.md)
[](https://ci.tlcpack.ai/job/tvm/job/main/)
-[](https://github.com/apache/incubator-tvm/actions?query=workflow%3AWinMacBuild)
+[](https://github.com/apache/tvm/actions?query=workflow%3AWinMacBuild)
Apache TVM (incubating) is a compiler stack for deep learning systems. It is designed to close the gap between the
productivity-focused deep learning frameworks, and the performance- and efficiency-focused hardware backends.
diff --git a/apps/android_camera/app/src/main/jni/tvm_runtime.h b/apps/android_camera/app/src/main/jni/tvm_runtime.h
index bc10bdaa508c..5f3db04274a1 100644
--- a/apps/android_camera/app/src/main/jni/tvm_runtime.h
+++ b/apps/android_camera/app/src/main/jni/tvm_runtime.h
@@ -40,7 +40,7 @@
#include "../src/runtime/c_runtime_api.cc"
#include "../src/runtime/cpu_device_api.cc"
#include "../src/runtime/dso_library.cc"
-#include "../src/runtime/file_util.cc"
+#include "../src/runtime/file_utils.cc"
#include "../src/runtime/graph/graph_runtime.cc"
#include "../src/runtime/library_module.cc"
#include "../src/runtime/module.cc"
diff --git a/apps/android_camera/models/prepare_model.py b/apps/android_camera/models/prepare_model.py
index 19be368c97e9..ab20e028c2ad 100644
--- a/apps/android_camera/models/prepare_model.py
+++ b/apps/android_camera/models/prepare_model.py
@@ -25,7 +25,7 @@
import tvm
import tvm.relay as relay
-from tvm.contrib import util, ndk, graph_runtime as runtime
+from tvm.contrib import utils, ndk, graph_runtime as runtime
from tvm.contrib.download import download_testdata, download
target = "llvm -mtriple=arm64-linux-android"
diff --git a/apps/android_deploy/README.md b/apps/android_deploy/README.md
index d5efba88b901..32e601840f04 100644
--- a/apps/android_deploy/README.md
+++ b/apps/android_deploy/README.md
@@ -34,7 +34,7 @@ Alternatively, you may execute Docker image we provide which contains the requir
### Build APK
-Before you build the Android application, please refer to [TVM4J Installation Guide](https://github.com/apache/incubator-tvm/blob/main/jvm/README.md) and install tvm4j-core to your local maven repository. You can find tvm4j dependency declare in `app/build.gradle`. Modify it if it is necessary.
+Before you build the Android application, please refer to [TVM4J Installation Guide](https://github.com/apache/tvm/blob/main/jvm/README.md) and install tvm4j-core to your local maven repository. You can find tvm4j dependency declare in `app/build.gradle`. Modify it if it is necessary.
```
dependencies {
@@ -124,7 +124,7 @@ If everything goes well, you will find compile tools in `/opt/android-toolchain-
Follow instruction to get compiled version model for android target [here.](https://tvm.apache.org/docs/deploy/android.html)
-Copied these compiled model deploy_lib.so, deploy_graph.json and deploy_param.params to apps/android_deploy/app/src/main/assets/ and modify TVM flavor changes on [java](https://github.com/apache/incubator-tvm/blob/main/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java#L81)
+Copied these compiled model deploy_lib.so, deploy_graph.json and deploy_param.params to apps/android_deploy/app/src/main/assets/ and modify TVM flavor changes on [java](https://github.com/apache/tvm/blob/main/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java#L81)
`CPU Verison flavor`
```
diff --git a/apps/android_deploy/app/src/main/jni/tvm_runtime.h b/apps/android_deploy/app/src/main/jni/tvm_runtime.h
index f1a47a674281..362d278c38c4 100644
--- a/apps/android_deploy/app/src/main/jni/tvm_runtime.h
+++ b/apps/android_deploy/app/src/main/jni/tvm_runtime.h
@@ -28,7 +28,7 @@
#include "../src/runtime/c_runtime_api.cc"
#include "../src/runtime/cpu_device_api.cc"
#include "../src/runtime/dso_library.cc"
-#include "../src/runtime/file_util.cc"
+#include "../src/runtime/file_utils.cc"
#include "../src/runtime/graph/graph_runtime.cc"
#include "../src/runtime/library_module.cc"
#include "../src/runtime/module.cc"
diff --git a/apps/android_rpc/README.md b/apps/android_rpc/README.md
index 29962d329165..c5e21ecbbc12 100644
--- a/apps/android_rpc/README.md
+++ b/apps/android_rpc/README.md
@@ -28,7 +28,7 @@ You will need JDK, [Android NDK](https://developer.android.com/ndk) and an Andro
We use [Gradle](https://gradle.org) to build. Please follow [the installation instruction](https://gradle.org/install) for your operating system.
-Before you build the Android application, please refer to [TVM4J Installation Guide](https://github.com/apache/incubator-tvm/blob/main/jvm/README.md) and install tvm4j-core to your local maven repository. You can find tvm4j dependency declare in `app/build.gradle`. Modify it if it is necessary.
+Before you build the Android application, please refer to [TVM4J Installation Guide](https://github.com/apache/tvm/blob/main/jvm/README.md) and install tvm4j-core to your local maven repository. You can find tvm4j dependency declare in `app/build.gradle`. Modify it if it is necessary.
```
dependencies {
@@ -146,7 +146,7 @@ android 1 1 0
```
-Then checkout [android\_rpc/tests/android\_rpc\_test.py](https://github.com/apache/incubator-tvm/blob/main/apps/android_rpc/tests/android_rpc_test.py) and run,
+Then checkout [android\_rpc/tests/android\_rpc\_test.py](https://github.com/apache/tvm/blob/main/apps/android_rpc/tests/android_rpc_test.py) and run,
```bash
# Specify the RPC tracker
@@ -157,7 +157,7 @@ export TVM_NDK_CC=/opt/android-toolchain-arm64/bin/aarch64-linux-android-g++
python android_rpc_test.py
```
-This will compile TVM IR to shared libraries (CPU, OpenCL and Vulkan) and run vector addition on your Android device. To verify compiled TVM IR shared libraries on OpenCL target set `'test_opencl = True'` and on Vulkan target set `'test_vulkan = True'` in [tests/android_rpc_test.py](https://github.com/apache/incubator-tvm/blob/main/apps/android_rpc/tests/android_rpc_test.py), by default on CPU target will execute.
+This will compile TVM IR to shared libraries (CPU, OpenCL and Vulkan) and run vector addition on your Android device. To verify compiled TVM IR shared libraries on OpenCL target set `'test_opencl = True'` and on Vulkan target set `'test_vulkan = True'` in [tests/android_rpc_test.py](https://github.com/apache/tvm/blob/main/apps/android_rpc/tests/android_rpc_test.py), by default on CPU target will execute.
On my test device, it gives following results.
```bash
diff --git a/apps/android_rpc/app/src/main/jni/tvm_runtime.h b/apps/android_rpc/app/src/main/jni/tvm_runtime.h
index aea61e757aa7..2005568c608c 100644
--- a/apps/android_rpc/app/src/main/jni/tvm_runtime.h
+++ b/apps/android_rpc/app/src/main/jni/tvm_runtime.h
@@ -40,7 +40,7 @@
#include "../src/runtime/c_runtime_api.cc"
#include "../src/runtime/cpu_device_api.cc"
#include "../src/runtime/dso_library.cc"
-#include "../src/runtime/file_util.cc"
+#include "../src/runtime/file_utils.cc"
#include "../src/runtime/graph/graph_runtime.cc"
#include "../src/runtime/graph/graph_runtime_factory.cc"
#include "../src/runtime/library_module.cc"
diff --git a/apps/android_rpc/tests/android_rpc_test.py b/apps/android_rpc/tests/android_rpc_test.py
index 2827c140ea92..9586bffeca0b 100644
--- a/apps/android_rpc/tests/android_rpc_test.py
+++ b/apps/android_rpc/tests/android_rpc_test.py
@@ -25,7 +25,7 @@
from tvm import te
import os
from tvm import rpc
-from tvm.contrib import util, ndk
+from tvm.contrib import utils, ndk
import numpy as np
# Set to be address of tvm proxy.
@@ -50,7 +50,7 @@ def test_rpc_module():
A = te.placeholder((n,), name="A")
B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name="B")
a_np = np.random.uniform(size=1024).astype(A.dtype)
- temp = util.tempdir()
+ temp = utils.tempdir()
# Establish remote connection with target hardware
tracker = rpc.connect_tracker(tracker_host, tracker_port)
diff --git a/apps/benchmark/README.md b/apps/benchmark/README.md
index 920033f755ea..43d93d9e00fa 100644
--- a/apps/benchmark/README.md
+++ b/apps/benchmark/README.md
@@ -20,7 +20,7 @@
## Results
-See results on wiki page https://github.com/apache/incubator-tvm/wiki/Benchmark
+See results on wiki page https://github.com/apache/tvm/wiki/Benchmark
## How to Reproduce
@@ -78,7 +78,7 @@ python3 -m tvm.exec.rpc_tracker
`python3 -m tvm.exec.rpc_server --tracker=10.77.1.123:9190 --key=rk3399`, where 10.77.1.123 is the IP address of the tracker.
* For Android device
- * Build and install tvm RPC apk on your device [Help](https://github.com/apache/incubator-tvm/tree/main/apps/android_rpc).
+ * Build and install tvm RPC apk on your device [Help](https://github.com/apache/tvm/tree/main/apps/android_rpc).
Make sure you can pass the android rpc test. Then you have alreadly known how to register.
3. Verify the device registration
diff --git a/apps/benchmark/arm_cpu_imagenet_bench.py b/apps/benchmark/arm_cpu_imagenet_bench.py
index fb58819d3c5c..e7233370e6d6 100644
--- a/apps/benchmark/arm_cpu_imagenet_bench.py
+++ b/apps/benchmark/arm_cpu_imagenet_bench.py
@@ -23,7 +23,7 @@
import tvm
from tvm import te
-from tvm.contrib.util import tempdir
+from tvm.contrib.utils import tempdir
import tvm.contrib.graph_runtime as runtime
from tvm import relay
diff --git a/apps/benchmark/mobile_gpu_imagenet_bench.py b/apps/benchmark/mobile_gpu_imagenet_bench.py
index b57f6028ab73..cf78c66141d0 100644
--- a/apps/benchmark/mobile_gpu_imagenet_bench.py
+++ b/apps/benchmark/mobile_gpu_imagenet_bench.py
@@ -23,7 +23,7 @@
import tvm
from tvm import te
-from tvm.contrib.util import tempdir
+from tvm.contrib.utils import tempdir
import tvm.contrib.graph_runtime as runtime
from tvm import relay
diff --git a/apps/bundle_deploy/runtime.cc b/apps/bundle_deploy/runtime.cc
index 8e294a05775d..3224028b60a1 100644
--- a/apps/bundle_deploy/runtime.cc
+++ b/apps/bundle_deploy/runtime.cc
@@ -24,7 +24,7 @@
#include "../../src/runtime/c_runtime_api.cc"
#include "../../src/runtime/cpu_device_api.cc"
-#include "../../src/runtime/file_util.cc"
+#include "../../src/runtime/file_utils.cc"
#include "../../src/runtime/graph/graph_runtime.cc"
#include "../../src/runtime/library_module.cc"
#include "../../src/runtime/module.cc"
diff --git a/apps/cpp_rpc/main.cc b/apps/cpp_rpc/main.cc
index 777fffa7d37c..e381dd2b261b 100644
--- a/apps/cpp_rpc/main.cc
+++ b/apps/cpp_rpc/main.cc
@@ -35,7 +35,7 @@
#include
#include "../../src/support/socket.h"
-#include "../../src/support/util.h"
+#include "../../src/support/utils.h"
#include "rpc_server.h"
#if defined(_WIN32)
@@ -139,7 +139,7 @@ string GetCmdOption(int argc, char* argv[], string option, bool key = false) {
return cmd;
}
// We assume "=" is the end of option.
- CHECK_EQ(*option.rbegin(), '=');
+ ICHECK_EQ(*option.rbegin(), '=');
cmd = arg.substr(arg.find('=') + 1);
return cmd;
}
diff --git a/apps/cpp_rpc/rpc_env.cc b/apps/cpp_rpc/rpc_env.cc
index c64cb2f09f94..5b351725b1f1 100644
--- a/apps/cpp_rpc/rpc_env.cc
+++ b/apps/cpp_rpc/rpc_env.cc
@@ -40,8 +40,7 @@ int mkdir(const char* path, int /* ignored */) { return _mkdir(path); }
#include
#include
-#include "../../src/runtime/file_util.h"
-#include "../../src/support/util.h"
+#include "../../src/support/utils.h"
#include "rpc_env.h"
namespace {
@@ -115,7 +114,15 @@ RPCEnv::RPCEnv() {
std::string file_name = this->GetPath(args[0]);
file_name = BuildSharedLibrary(file_name);
std::string bin;
- LoadBinaryFromFile(file_name, &bin);
+
+ std::ifstream fs(file_name, std::ios::in | std::ios::binary);
+ ICHECK(!fs.fail()) << "Cannot open " << file_name;
+ fs.seekg(0, std::ios::end);
+ size_t size = static_cast(fs.tellg());
+ fs.seekg(0, std::ios::beg);
+ bin.resize(size);
+ fs.read(dmlc::BeginPtr(bin), size);
+
TVMByteArray binarr;
binarr.data = bin.data();
binarr.size = bin.length();
diff --git a/apps/cpp_rpc/rpc_server.cc b/apps/cpp_rpc/rpc_server.cc
index 592a6db6d2ef..16939456451b 100644
--- a/apps/cpp_rpc/rpc_server.cc
+++ b/apps/cpp_rpc/rpc_server.cc
@@ -245,7 +245,7 @@ class RPCServer {
support::TCPSocket conn = listen_sock_.Accept(addr);
int code = kRPCMagic;
- CHECK_EQ(conn.RecvAll(&code, sizeof(code)), sizeof(code));
+ ICHECK_EQ(conn.RecvAll(&code, sizeof(code)), sizeof(code));
if (code != kRPCMagic) {
conn.Close();
LOG(FATAL) << "Client connected is not TVM RPC server";
@@ -253,7 +253,7 @@ class RPCServer {
}
int keylen = 0;
- CHECK_EQ(conn.RecvAll(&keylen, sizeof(keylen)), sizeof(keylen));
+ ICHECK_EQ(conn.RecvAll(&keylen, sizeof(keylen)), sizeof(keylen));
const char* CLIENT_HEADER = "client:";
const char* SERVER_HEADER = "server:";
@@ -265,10 +265,10 @@ class RPCServer {
continue;
}
- CHECK_NE(keylen, 0);
+ ICHECK_NE(keylen, 0);
std::string remote_key;
remote_key.resize(keylen);
- CHECK_EQ(conn.RecvAll(&remote_key[0], keylen), keylen);
+ ICHECK_EQ(conn.RecvAll(&remote_key[0], keylen), keylen);
std::stringstream ssin(remote_key);
std::string arg0;
@@ -280,16 +280,16 @@ class RPCServer {
if (arg0 != expect_header) {
code = kRPCMismatch;
- CHECK_EQ(conn.SendAll(&code, sizeof(code)), sizeof(code));
+ ICHECK_EQ(conn.SendAll(&code, sizeof(code)), sizeof(code));
conn.Close();
LOG(WARNING) << "Mismatch key from" << addr->AsString();
continue;
} else {
code = kRPCSuccess;
- CHECK_EQ(conn.SendAll(&code, sizeof(code)), sizeof(code));
+ ICHECK_EQ(conn.SendAll(&code, sizeof(code)), sizeof(code));
keylen = int(server_key.length());
- CHECK_EQ(conn.SendAll(&keylen, sizeof(keylen)), sizeof(keylen));
- CHECK_EQ(conn.SendAll(server_key.c_str(), keylen), keylen);
+ ICHECK_EQ(conn.SendAll(&keylen, sizeof(keylen)), sizeof(keylen));
+ ICHECK_EQ(conn.SendAll(server_key.c_str(), keylen), keylen);
LOG(INFO) << "Connection success " << addr->AsString();
#ifndef __ANDROID__
ssin >> *opts;
@@ -325,7 +325,7 @@ class RPCServer {
size_t pos = opts.rfind(option);
if (pos != std::string::npos) {
const std::string cmd = opts.substr(pos + option.size());
- CHECK(support::IsNumber(cmd)) << "Timeout is not valid";
+ ICHECK(support::IsNumber(cmd)) << "Timeout is not valid";
return std::stoi(cmd);
}
return 0;
diff --git a/apps/cpp_rpc/rpc_tracker_client.h b/apps/cpp_rpc/rpc_tracker_client.h
index cdfb64780ba6..1497ab3251be 100644
--- a/apps/cpp_rpc/rpc_tracker_client.h
+++ b/apps/cpp_rpc/rpc_tracker_client.h
@@ -74,9 +74,9 @@ class TrackerClient {
tracker_sock_ = ConnectWithRetry();
int code = kRPCTrackerMagic;
- CHECK_EQ(tracker_sock_.SendAll(&code, sizeof(code)), sizeof(code));
- CHECK_EQ(tracker_sock_.RecvAll(&code, sizeof(code)), sizeof(code));
- CHECK_EQ(code, kRPCTrackerMagic) << tracker_addr_.c_str() << " is not RPC Tracker";
+ ICHECK_EQ(tracker_sock_.SendAll(&code, sizeof(code)), sizeof(code));
+ ICHECK_EQ(tracker_sock_.RecvAll(&code, sizeof(code)), sizeof(code));
+ ICHECK_EQ(code, kRPCTrackerMagic) << tracker_addr_.c_str() << " is not RPC Tracker";
std::ostringstream ss;
ss << "[" << static_cast(TrackerCode::kUpdateInfo) << ", {\"key\": \"server:" << key_
@@ -85,7 +85,7 @@ class TrackerClient {
// Receive status and validate
std::string remote_status = tracker_sock_.RecvBytes();
- CHECK_EQ(std::stoi(remote_status), static_cast(TrackerCode::kSuccess));
+ ICHECK_EQ(std::stoi(remote_status), static_cast(TrackerCode::kSuccess));
}
}
/*!
@@ -117,7 +117,7 @@ class TrackerClient {
// Receive status and validate
std::string remote_status = tracker_sock_.RecvBytes();
- CHECK_EQ(std::stoi(remote_status), static_cast(TrackerCode::kSuccess));
+ ICHECK_EQ(std::stoi(remote_status), static_cast(TrackerCode::kSuccess));
} else {
*matchkey = key_;
}
@@ -167,7 +167,7 @@ class TrackerClient {
tracker_sock_.SendBytes(ss.str());
std::string remote_status = tracker_sock_.RecvBytes();
- CHECK_EQ(std::stoi(remote_status), static_cast(TrackerCode::kSuccess));
+ ICHECK_EQ(std::stoi(remote_status), static_cast(TrackerCode::kSuccess));
unmatch_period_count = 0;
}
continue;
@@ -199,7 +199,7 @@ class TrackerClient {
auto period = (std::chrono::duration_cast(
std::chrono::system_clock::now() - tbegin))
.count();
- CHECK(period < timeout) << "Failed to connect to server" << addr.AsString();
+ ICHECK(period < timeout) << "Failed to connect to server" << addr.AsString();
LOG(WARNING) << "Cannot connect to tracker " << addr.AsString() << " retry in "
<< retry_period << " seconds.";
std::this_thread::sleep_for(std::chrono::seconds(retry_period));
diff --git a/apps/cpp_rpc/win32_process.h b/apps/cpp_rpc/win32_process.h
index 621444e18764..0f784681f209 100644
--- a/apps/cpp_rpc/win32_process.h
+++ b/apps/cpp_rpc/win32_process.h
@@ -23,8 +23,12 @@
*/
#ifndef TVM_APPS_CPP_RPC_WIN32_PROCESS_H_
#define TVM_APPS_CPP_RPC_WIN32_PROCESS_H_
+
#include
#include
+
+#include "../../src/support/socket.h"
+
namespace tvm {
namespace runtime {
/*!
@@ -41,4 +45,4 @@ void SpawnRPCChild(SOCKET fd, std::chrono::seconds timeout);
void ChildProcSocketHandler(const std::string& mmap_path);
} // namespace runtime
} // namespace tvm
-#endif // TVM_APPS_CPP_RPC_WIN32_PROCESS_H_
\ No newline at end of file
+#endif // TVM_APPS_CPP_RPC_WIN32_PROCESS_H_
diff --git a/apps/extension/src/tvm_ext.cc b/apps/extension/src/tvm_ext.cc
index 87cb69b4f4ce..be431bab68d1 100644
--- a/apps/extension/src/tvm_ext.cc
+++ b/apps/extension/src/tvm_ext.cc
@@ -75,12 +75,12 @@ class NDSubClass : public tvm::runtime::NDArray {
NDSubClass AddWith(const NDSubClass& other) const {
SubContainer* a = static_cast(get_mutable());
SubContainer* b = static_cast(other.get_mutable());
- CHECK(a != nullptr && b != nullptr);
+ ICHECK(a != nullptr && b != nullptr);
return NDSubClass(a->additional_info_ + b->additional_info_);
}
int get_additional_info() const {
SubContainer* self = static_cast(get_mutable());
- CHECK(self != nullptr);
+ ICHECK(self != nullptr);
return self->additional_info_;
}
using ContainerType = SubContainer;
@@ -146,7 +146,7 @@ TVM_REGISTER_GLOBAL("device_api.ext_dev").set_body([](TVMArgs args, TVMRetValue*
TVM_REGISTER_GLOBAL("tvm_ext.nd_create").set_body([](TVMArgs args, TVMRetValue* rv) {
int additional_info = args[0];
*rv = NDSubClass(additional_info);
- CHECK_EQ(rv->type_code(), kTVMNDArrayHandle);
+ ICHECK_EQ(rv->type_code(), kTVMNDArrayHandle);
});
TVM_REGISTER_GLOBAL("tvm_ext.nd_add_two").set_body([](TVMArgs args, TVMRetValue* rv) {
diff --git a/apps/howto_deploy/cpp_deploy.cc b/apps/howto_deploy/cpp_deploy.cc
index fdb55a51480a..829241d31a6d 100644
--- a/apps/howto_deploy/cpp_deploy.cc
+++ b/apps/howto_deploy/cpp_deploy.cc
@@ -31,7 +31,7 @@
void Verify(tvm::runtime::Module mod, std::string fname) {
// Get the function from the module.
tvm::runtime::PackedFunc f = mod.GetFunction(fname);
- CHECK(f != nullptr);
+ ICHECK(f != nullptr);
// Allocate the DLPack data structures.
//
// Note that we use TVM runtime API to allocate the DLTensor in this example.
@@ -64,7 +64,7 @@ void Verify(tvm::runtime::Module mod, std::string fname) {
f(x, y);
// Print out the output
for (int i = 0; i < shape[0]; ++i) {
- CHECK_EQ(static_cast(y->data)[i], i + 1.0f);
+ ICHECK_EQ(static_cast(y->data)[i], i + 1.0f);
}
LOG(INFO) << "Finish verification...";
TVMArrayFree(x);
@@ -112,7 +112,7 @@ void DeployGraphRuntime() {
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 2; ++j) {
- CHECK_EQ(static_cast(y->data)[i * 2 + j], i * 2 + j + 1);
+ ICHECK_EQ(static_cast(y->data)[i * 2 + j], i * 2 + j + 1);
}
}
}
diff --git a/apps/howto_deploy/tvm_runtime_pack.cc b/apps/howto_deploy/tvm_runtime_pack.cc
index b43f920b6056..d6dd5876a994 100644
--- a/apps/howto_deploy/tvm_runtime_pack.cc
+++ b/apps/howto_deploy/tvm_runtime_pack.cc
@@ -39,7 +39,7 @@
*/
#include "../../src/runtime/c_runtime_api.cc"
#include "../../src/runtime/cpu_device_api.cc"
-#include "../../src/runtime/file_util.cc"
+#include "../../src/runtime/file_utils.cc"
#include "../../src/runtime/library_module.cc"
#include "../../src/runtime/module.cc"
#include "../../src/runtime/ndarray.cc"
diff --git a/apps/ios_rpc/tests/ios_rpc_mobilenet.py b/apps/ios_rpc/tests/ios_rpc_mobilenet.py
index 132377ac4412..90ac6bfb9218 100644
--- a/apps/ios_rpc/tests/ios_rpc_mobilenet.py
+++ b/apps/ios_rpc/tests/ios_rpc_mobilenet.py
@@ -22,7 +22,7 @@
from tvm.relay import transform
from tvm.relay.op.annotation import compiler_begin, compiler_end
from tvm.relay.quantize.quantize import prerequisite_optimize
-from tvm.contrib import util, xcode, graph_runtime, coreml_runtime
+from tvm.contrib import utils, xcode, graph_runtime, coreml_runtime
from tvm.contrib.target import coreml as _coreml
import os
@@ -98,7 +98,7 @@ def get_model(model_name, data_shape):
def test_mobilenet():
- temp = util.tempdir()
+ temp = utils.tempdir()
image, synset = prepare_input()
model, params = get_model("mobilenetv2_1.0", image.shape)
diff --git a/apps/ios_rpc/tests/ios_rpc_test.py b/apps/ios_rpc/tests/ios_rpc_test.py
index 620fe493771f..a967c2f75e61 100644
--- a/apps/ios_rpc/tests/ios_rpc_test.py
+++ b/apps/ios_rpc/tests/ios_rpc_test.py
@@ -26,7 +26,7 @@
import re
import sys
from tvm import rpc
-from tvm.contrib import util, xcode
+from tvm.contrib import utils, xcode
import numpy as np
# Set to be address of tvm proxy.
@@ -59,7 +59,7 @@ def test_rpc_module():
n = tvm.runtime.convert(1024)
A = te.placeholder((n,), name="A")
B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name="B")
- temp = util.tempdir()
+ temp = utils.tempdir()
s = te.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=64)
s[B].bind(xi, te.thread_axis("threadIdx.x"))
diff --git a/apps/ios_rpc/tvmrpc/TVMRuntime.mm b/apps/ios_rpc/tvmrpc/TVMRuntime.mm
index 9e2899bf6e5e..fbe4850e1b57 100644
--- a/apps/ios_rpc/tvmrpc/TVMRuntime.mm
+++ b/apps/ios_rpc/tvmrpc/TVMRuntime.mm
@@ -25,7 +25,7 @@
#include "../../../src/runtime/c_runtime_api.cc"
#include "../../../src/runtime/cpu_device_api.cc"
#include "../../../src/runtime/dso_library.cc"
-#include "../../../src/runtime/file_util.cc"
+#include "../../../src/runtime/file_utils.cc"
#include "../../../src/runtime/library_module.cc"
#include "../../../src/runtime/metadata_module.cc"
#include "../../../src/runtime/module.cc"
@@ -118,7 +118,7 @@ void LaunchSyncServer() {
std::ifstream fs(name, std::ios::in);
std::string url, key;
int port;
- CHECK(fs >> url >> port >> key) << "Invalid RPC config file " << name;
+ ICHECK(fs >> url >> port >> key) << "Invalid RPC config file " << name;
RPCConnect(url, port, "server:" + key, TVMArgs(nullptr, nullptr, 0))->ServerLoop();
}
diff --git a/apps/ios_rpc/tvmrpc/ViewController.mm b/apps/ios_rpc/tvmrpc/ViewController.mm
index 6c618c48096f..910c650aedc1 100644
--- a/apps/ios_rpc/tvmrpc/ViewController.mm
+++ b/apps/ios_rpc/tvmrpc/ViewController.mm
@@ -80,7 +80,7 @@ - (void)onReadAvailable {
} else {
initialized_ = true;
self.statusLabel.text = @"Proxy connected.";
- CHECK(handler_ != nullptr);
+ ICHECK(handler_ != nullptr);
}
}
const int kBufferSize = 4 << 10;
@@ -158,7 +158,7 @@ - (void)open {
[outputStream_ open];
[inputStream_ open];
handler_ = tvm::runtime::CreateServerEventHandler(outputStream_, key_, "%toinit");
- CHECK(handler_ != nullptr);
+ ICHECK(handler_ != nullptr);
self.infoText.text = @"";
self.statusLabel.text = @"Connecting...";
}
diff --git a/apps/microtvm/README.md b/apps/microtvm/README.md
new file mode 100644
index 000000000000..97b844a4c01b
--- /dev/null
+++ b/apps/microtvm/README.md
@@ -0,0 +1,28 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# microTVM Reference Virtual Machines
+
+
+microTVM is the effort to allow TVM to build and execute models on bare-metal microcontrollers.
+These Virtual Machines are used to reproduce results and bugs when using microTVM with real
+physical hardware. Note that they are not used to run Continuous Integration regression tests--
+those are instead run by the QEMU container (they run against an emulator, rather than real
+hardware).
+
+
+See the "microTVM Reference Virtual Machines" tutorial for information on how to use these.
diff --git a/apps/microtvm/reference-vm/.gitignore b/apps/microtvm/reference-vm/.gitignore
new file mode 100644
index 000000000000..d918f5e13cc5
--- /dev/null
+++ b/apps/microtvm/reference-vm/.gitignore
@@ -0,0 +1 @@
+/release-test
\ No newline at end of file
diff --git a/apps/microtvm/reference-vm/README.md b/apps/microtvm/reference-vm/README.md
new file mode 100644
index 000000000000..7ef7900c3e05
--- /dev/null
+++ b/apps/microtvm/reference-vm/README.md
@@ -0,0 +1,67 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# microTVM Reference Virtual Machines
+
+This directory contains Vagrant specifications that create reference Virtual Machines for use with
+microTVM. These machines help microTVM users collaborate by providing a stable reference test
+environment.
+
+For more information on how to use them, see the microTVM Reference Virtual Machines tutorial.
+
+
+## Reference VM Developer Information
+
+Each RTOS or platform that integrates with microTVM can check-in a Reference VM in this directory to
+help the community collaborate. You should use the tools provided here to ensure a uniform release
+process across all platforms. Typically, releases need to be created by TVM committers.
+
+Generally speaking, it's expected that any integrated platform with a regression test checked-in to
+the tvm repository should also define a reference VM. If you want to integrate a new platform,
+please raise a discussion on [the forum](https://discuss.tvm.ai).
+
+### Organization
+
+Reference VMs are organized as follows:
+
+* `base-box-tool.py` - Reference VM build, test, and release tool
+* `/`
+** `Vagrantfile` Vagrantfile that end-users will inovke. Should be based off a base box
+ which contains dependencies other than the TVM python dependencies.
+** `base-box` - Top-level directory which defines the base box.
+*** `Vagrantfile.packer-template` - Packer template Vagrantfile which will be used to build the
+ base box.
+*** `test-config.json` - JSON file explaining how to perform release tests to `base-box-tool.py`
+
+## Creating Releases
+
+1. Build the base box for the given platform: `$ ./base-box-tool.py build `
+2. Run release tests for each platform:
+ 1. Connect any needed hardware to the VM host machine.
+ 2. Run tests: `$ ./base-box-tool.py test [--test-device-serial=]`. This
+ command does the following for each provider:
+ 1. Copies all files inside `./` except `.vagrant` and `base-box` to
+ `./release-test`. This is done to avoid reusing any VM the developer may have started.
+ 2. Executes `$ vagrant up --provider=`.
+ 3. Finds an attached USB device matching the VID and PID specified in `test-config.json`,
+ and if `--test-device-serial` was given, that serial number (as reported to USB). Creates
+ a rule to autoconnect this device to the VM, and also attaches it to the VM>
+ 4. SSHs to the VM, `cd` to the TVM root directory, and runs `test_cmd` from
+ `test-config.json`. Nonzero status means failure.
+3. If release tests fail, fix them and restart from step 1.
+4. If release tests pass: `$ ./base-box-tool.py release `. Be sure you've logged
+ in to Vagrant Cloud using the `vagrant` tool.
diff --git a/apps/microtvm/reference-vm/base-box-tool.py b/apps/microtvm/reference-vm/base-box-tool.py
new file mode 100755
index 000000000000..c317a373bd8b
--- /dev/null
+++ b/apps/microtvm/reference-vm/base-box-tool.py
@@ -0,0 +1,426 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import argparse
+import json
+import logging
+import os
+import re
+import shlex
+import shutil
+import subprocess
+import sys
+
+
+_LOG = logging.getLogger(__name__)
+
+
+THIS_DIR = os.path.realpath(os.path.dirname(__file__) or ".")
+
+
+# List of vagrant providers supported by this tool
+ALL_PROVIDERS = (
+ "parallels",
+ "virtualbox",
+)
+
+
+def parse_virtualbox_devices():
+ output = subprocess.check_output(["VBoxManage", "list", "usbhost"], encoding="utf-8")
+ devices = []
+ current_dev = {}
+ for line in output.split("\n"):
+ if not line.strip():
+ if current_dev:
+ if "VendorId" in current_dev and "ProductId" in current_dev:
+ devices.append(current_dev)
+ current_dev = {}
+
+ continue
+
+ key, value = line.split(":", 1)
+ value = value.lstrip(" ")
+ current_dev[key] = value
+
+ if current_dev:
+ devices.append(current_dev)
+ return devices
+
+
+VIRTUALBOX_VID_PID_RE = re.compile(r"0x([0-9A-Fa-f]{4}).*")
+
+
+def attach_virtualbox(uuid, vid_hex=None, pid_hex=None, serial=None):
+ usb_devices = parse_virtualbox_devices()
+ for dev in usb_devices:
+ m = VIRTUALBOX_VID_PID_RE.match(dev["VendorId"])
+ if not m:
+ _LOG.warning("Malformed VendorId: %s", dev["VendorId"])
+ continue
+
+ dev_vid_hex = m.group(1).lower()
+
+ m = VIRTUALBOX_VID_PID_RE.match(dev["ProductId"])
+ if not m:
+ _LOG.warning("Malformed ProductId: %s", dev["ProductId"])
+ continue
+
+ dev_pid_hex = m.group(1).lower()
+
+ if (
+ vid_hex == dev_vid_hex
+ and pid_hex == dev_pid_hex
+ and (serial is None or serial == dev["SerialNumber"])
+ ):
+ rule_args = [
+ "VBoxManage",
+ "usbfilter",
+ "add",
+ "0",
+ "--action",
+ "hold",
+ "--name",
+ "test device",
+ "--target",
+ uuid,
+ "--vendorid",
+ vid_hex,
+ "--productid",
+ pid_hex,
+ ]
+ if serial is not None:
+ rule_args.extend(["--serialnumber", serial])
+ subprocess.check_call(rule_args)
+ subprocess.check_call(["VBoxManage", "controlvm", uuid, "usbattach", dev["UUID"]])
+ return
+
+ raise Exception(
+ f"Device with vid={vid_hex}, pid={pid_hex}, serial={serial!r} not found:\n{usb_devices!r}"
+ )
+
+
+def attach_parallels(uuid, vid_hex=None, pid_hex=None, serial=None):
+ usb_devices = json.loads(
+ subprocess.check_output(["prlsrvctl", "usb", "list", "-j"], encoding="utf-8")
+ )
+ for dev in usb_devices:
+ _, dev_vid_hex, dev_pid_hex, _, _, dev_serial = dev["System name"].split("|")
+ dev_vid_hex = dev_vid_hex.lower()
+ dev_pid_hex = dev_pid_hex.lower()
+ if (
+ vid_hex == dev_vid_hex
+ and pid_hex == dev_pid_hex
+ and (serial is None or serial == dev_serial)
+ ):
+ subprocess.check_call(["prlsrvctl", "usb", "set", dev["Name"], uuid])
+ if "Used-By-Vm-Name" in dev:
+ subprocess.check_call(
+ ["prlctl", "set", dev["Used-By-Vm-Name"], "--device-disconnect", dev["Name"]]
+ )
+ subprocess.check_call(["prlctl", "set", uuid, "--device-connect", dev["Name"]])
+ return
+
+ raise Exception(
+ f"Device with vid={vid_hex}, pid={pid_hex}, serial={serial!r} not found:\n{usb_devices!r}"
+ )
+
+
+ATTACH_USB_DEVICE = {
+ "parallels": attach_parallels,
+ "virtualbox": attach_virtualbox,
+}
+
+
+def generate_packer_config(file_path, providers):
+ builders = []
+ for provider_name in providers:
+ builders.append(
+ {
+ "type": "vagrant",
+ "output_dir": f"output-packer-{provider_name}",
+ "communicator": "ssh",
+ "source_path": "generic/ubuntu1804",
+ "provider": provider_name,
+ "template": "Vagrantfile.packer-template",
+ }
+ )
+
+ with open(file_path, "w") as f:
+ json.dump(
+ {
+ "builders": builders,
+ },
+ f,
+ sort_keys=True,
+ indent=2,
+ )
+
+
+def build_command(args):
+ generate_packer_config(
+ os.path.join(THIS_DIR, args.platform, "base-box", "packer.json"),
+ args.provider.split(",") or ALL_PROVIDERS,
+ )
+ subprocess.check_call(
+ ["packer", "build", "packer.json"], cwd=os.path.join(THIS_DIR, args.platform, "base-box")
+ )
+
+
+REQUIRED_TEST_CONFIG_KEYS = {
+ "vid_hex": str,
+ "pid_hex": str,
+ "test_cmd": list,
+}
+
+
+VM_BOX_RE = re.compile(r'(.*\.vm\.box) = "(.*)"')
+
+
+# Paths, relative to the platform box directory, which will not be copied to release-test dir.
+SKIP_COPY_PATHS = [".vagrant", "base-box"]
+
+
+def do_build_release_test_vm(release_test_dir, user_box_dir, base_box_dir, provider_name):
+ if os.path.exists(release_test_dir):
+ try:
+ subprocess.check_call(["vagrant", "destroy", "-f"], cwd=release_test_dir)
+ except subprocess.CalledProcessError:
+ _LOG.warning("vagrant destroy failed--removing dirtree anyhow", exc_info=True)
+
+ shutil.rmtree(release_test_dir)
+
+ for dirpath, _, filenames in os.walk(user_box_dir):
+ rel_path = os.path.relpath(dirpath, user_box_dir)
+ if any(
+ rel_path == scp or rel_path.startswith(f"{scp}{os.path.sep}") for scp in SKIP_COPY_PATHS
+ ):
+ continue
+
+ dest_dir = os.path.join(release_test_dir, rel_path)
+ os.makedirs(dest_dir)
+ for filename in filenames:
+ shutil.copy2(os.path.join(dirpath, filename), os.path.join(dest_dir, filename))
+
+ release_test_vagrantfile = os.path.join(release_test_dir, "Vagrantfile")
+ with open(release_test_vagrantfile) as f:
+ lines = list(f)
+
+ found_box_line = False
+ with open(release_test_vagrantfile, "w") as f:
+ for line in lines:
+ m = VM_BOX_RE.match(line)
+ if not m:
+ f.write(line)
+ continue
+
+ box_package = os.path.join(
+ base_box_dir, f"output-packer-{provider_name}", "package.box"
+ )
+ box_relpath = os.path.relpath(box_package, release_test_dir)
+ f.write(f'{m.group(1)} = "{box_relpath}"\n')
+ found_box_line = True
+
+ if not found_box_line:
+ _LOG.error(
+ "testing provider %s: couldn't find config.box.vm = line in Vagrantfile; unable to test",
+ provider_name,
+ )
+ return False
+
+ # Delete the old box registered with Vagrant, which may lead to a falsely-passing release test.
+ remove_args = ["vagrant", "box", "remove", box_relpath]
+ return_code = subprocess.call(remove_args, cwd=release_test_dir)
+ assert return_code in (0, 1), f'{" ".join(remove_args)} returned exit code {return_code}'
+ subprocess.check_call(["vagrant", "up", f"--provider={provider_name}"], cwd=release_test_dir)
+
+ return True
+
+
+def do_run_release_test(release_test_dir, provider_name, test_config, test_device_serial):
+ with open(
+ os.path.join(release_test_dir, ".vagrant", "machines", "default", provider_name, "id")
+ ) as f:
+ machine_uuid = f.read()
+ ATTACH_USB_DEVICE[provider_name](
+ machine_uuid,
+ vid_hex=test_config["vid_hex"],
+ pid_hex=test_config["pid_hex"],
+ serial=test_device_serial,
+ )
+ tvm_home = os.path.realpath(os.path.join(THIS_DIR, "..", "..", ".."))
+
+ def _quote_cmd(cmd):
+ return " ".join(shlex.quote(a) for a in cmd)
+
+ test_cmd = _quote_cmd(["cd", tvm_home]) + " && " + _quote_cmd(test_config["test_cmd"])
+ subprocess.check_call(["vagrant", "ssh", "-c", f"bash -ec '{test_cmd}'"], cwd=release_test_dir)
+
+
+def test_command(args):
+ user_box_dir = os.path.join(THIS_DIR, args.platform)
+ base_box_dir = os.path.join(THIS_DIR, args.platform, "base-box")
+ test_config_file = os.path.join(base_box_dir, "test-config.json")
+ with open(test_config_file) as f:
+ test_config = json.load(f)
+ for key, expected_type in REQUIRED_TEST_CONFIG_KEYS.items():
+ assert key in test_config and isinstance(
+ test_config[key], expected_type
+ ), f"Expected key {key} of type {expected_type} in {test_config_file}: {test_config!r}"
+
+ test_config["vid_hex"] = test_config["vid_hex"].lower()
+ test_config["pid_hex"] = test_config["pid_hex"].lower()
+
+ providers = args.provider
+ provider_passed = {p: False for p in providers}
+
+ release_test_dir = os.path.join(THIS_DIR, "release-test")
+
+ if args.skip_build:
+ assert len(providers) == 1, "--skip-build was given, but >1 provider specified"
+
+ for provider_name in providers:
+ try:
+ if not args.skip_build:
+ do_build_release_test_vm(
+ release_test_dir, user_box_dir, base_box_dir, provider_name
+ )
+ do_run_release_test(
+ release_test_dir, provider_name, test_config, args.test_device_serial
+ )
+ provider_passed[provider_name] = True
+
+ finally:
+ if not args.skip_build and len(providers) > 1:
+ subprocess.check_call(["vagrant", "destroy", "-f"], cwd=release_test_dir)
+ shutil.rmtree(release_test_dir)
+
+ if not all(provider_passed[p] for p in provider_passed.keys()):
+ sys.exit(
+ "some providers failed release test: "
+ + ",".join(name for name, passed in provider_passed if not passed)
+ )
+
+
+def release_command(args):
+ subprocess.check_call(
+ [
+ "vagrant",
+ "cloud",
+ "version",
+ "create",
+ f"tlcpack/microtvm-{args.platform}",
+ args.release_version,
+ ]
+ )
+ if not args.release_version:
+ sys.exit(f"--release-version must be specified")
+
+ for provider_name in args.provider:
+ subprocess.check_call(
+ [
+ "vagrant",
+ "cloud",
+ "publish",
+ "-f",
+ f"tlcpack/microtvm-{args.platform}",
+ args.release_version,
+ provider_name,
+ os.path.join(
+ THIS_DIR,
+ args.platform,
+ "base-box",
+ f"output-packer-{provider_name}/package.box",
+ ),
+ ]
+ )
+
+
+ALL_COMMANDS = {
+ "build": build_command,
+ "test": test_command,
+ "release": release_command,
+}
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description="Automates building, testing, and releasing a base box"
+ )
+ parser.add_argument(
+ "command",
+ default=",".join(ALL_COMMANDS),
+ choices=ALL_COMMANDS,
+ help="Action or actions (comma-separated) to perform.",
+ )
+ parser.add_argument(
+ "platform",
+ help="Name of the platform VM to act on. Must be a sub-directory of this directory.",
+ )
+ parser.add_argument(
+ "--provider",
+ choices=ALL_PROVIDERS,
+ action="append",
+ default=[],
+ help="Name of the provider or providers to act on; if not specified, act on all",
+ )
+ parser.add_argument(
+ "--skip-build",
+ action="store_true",
+ help=(
+ "For use with the 'test' command. If given, assume a box has already been built in "
+ "the release-test subdirectory. Attach a USB device to this box and execute the "
+ "release test script--do not delete it."
+ ),
+ )
+ parser.add_argument(
+ "--test-device-serial",
+ help=(
+ "If given, attach the test device with this USB serial number. Corresponds to the "
+ "iSerial field from `lsusb -v` output."
+ ),
+ )
+ parser.add_argument(
+ "--release-version",
+ help="Version to release, in the form 'x.y.z'. Must be specified with release.",
+ )
+
+ return parser.parse_args()
+
+
+def main():
+ args = parse_args()
+ if os.path.sep in args.platform or not os.path.isdir(os.path.join(THIS_DIR, args.platform)):
+ sys.exit(f" must be a sub-direcotry of {THIS_DIR}; got {args.platform}")
+
+ if not args.provider:
+ args.provider = list(ALL_PROVIDERS)
+
+ todo = []
+ for phase in args.command.split(","):
+ if phase not in ALL_COMMANDS:
+ sys.exit(f"unknown command: {phase}")
+
+ todo.append(ALL_COMMANDS[phase])
+
+ for phase in todo:
+ phase(args)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/apps/microtvm/reference-vm/zephyr/.gitignore b/apps/microtvm/reference-vm/zephyr/.gitignore
new file mode 100644
index 000000000000..dace7081e3f2
--- /dev/null
+++ b/apps/microtvm/reference-vm/zephyr/.gitignore
@@ -0,0 +1 @@
+/.vagrant
diff --git a/apps/microtvm/reference-vm/zephyr/Vagrantfile b/apps/microtvm/reference-vm/zephyr/Vagrantfile
new file mode 100644
index 000000000000..5a73d1f5e79b
--- /dev/null
+++ b/apps/microtvm/reference-vm/zephyr/Vagrantfile
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+Vagrant.configure("2") do |config|
+ config.vm.box = "tlcpack/microtvm-zephyr"
+
+ tvm_home = "../../../.."
+ dirs_to_mount = [Pathname.new(Pathname.new(tvm_home).expand_path())]
+ if ENV.has_key?("TVM_PROJECT_DIR") then
+ dirs_to_mount.append(ENV["TVM_PROJECT_DIR"])
+ puts "NOTE: also configuring project dir: %s" % [dirs_to_mount[-1]]
+ end
+
+ git_file = Pathname.new(tvm_home + "/.git")
+ if git_file.ftype() == "file" then
+ gitdir_match = Regexp.new('^gitdir: (?.*/.git).*\n$', Regexp::MULTILINE).match(git_file.read())
+ if !gitdir_match.nil? then
+ dirs_to_mount.append(Pathname.new(gitdir_match.named_captures["gitdir"]))
+ puts "NOTE: also configuring git-worktree gitdir: %s" % [dirs_to_mount[-1]]
+ end
+ end
+
+ config.vm.provision "shell", path: "setup.sh", env: {"TVM_HOME": dirs_to_mount[0]}, privileged: false
+
+ # Enable USB Controller on VirtualBox
+ vm_name = "microtvm-#{Time.now.tv_sec}"
+ config.vm.provider "virtualbox" do |vb, overrides|
+ vb.name = vm_name
+ vb.customize ["modifyvm", :id, "--usb", "on"]
+ vb.customize ["modifyvm", :id, "--usbehci", "on"]
+ vb.customize ["modifyvm", :id, "--usbxhci", "on"]
+ dirs_to_mount.each do |d|
+ overrides.vm.synced_folder d.to_s, d.to_s
+ end
+ end
+
+ config.vm.provider "parallels" do |prl, overrides|
+ prl.name = vm_name
+ prl.update_guest_tools = true
+ prl.customize ["set", :id, "--support-usb30", "on"]
+ dirs_to_mount.each do |d|
+ overrides.vm.synced_folder d.to_s, d.to_s, mount_options: ["share", "nosuid", "host_inodes"]
+ end
+ end
+
+end
diff --git a/apps/microtvm/reference-vm/zephyr/base-box/.gitignore b/apps/microtvm/reference-vm/zephyr/base-box/.gitignore
new file mode 100644
index 000000000000..e4406c4f61e2
--- /dev/null
+++ b/apps/microtvm/reference-vm/zephyr/base-box/.gitignore
@@ -0,0 +1,4 @@
+*.box
+.vagrant
+/output-packer-*
+/packer.json
diff --git a/conda/tvm/meta.yaml b/apps/microtvm/reference-vm/zephyr/base-box/Vagrantfile.packer-template
similarity index 50%
rename from conda/tvm/meta.yaml
rename to apps/microtvm/reference-vm/zephyr/base-box/Vagrantfile.packer-template
index 9e8f94789394..b1fff9c63806 100644
--- a/conda/tvm/meta.yaml
+++ b/apps/microtvm/reference-vm/zephyr/base-box/Vagrantfile.packer-template
@@ -15,48 +15,26 @@
# specific language governing permissions and limitations
# under the License.
-{% set version = "0.8.dev0" %}
+Vagrant.configure("2") do |config|
+ # From hashicorp default template:
+ # https://github.com/hashicorp/packer/blob/master/builder/vagrant/step_create_vagrantfile.go#L23-L37
-package:
- name: tvm
- version: {{ version }}
+ config.vm.define "source" do |source|
+ source.vm.box = "{{.SourceBox}}"
+ config.ssh.insert_key = {{.InsertKey}}
+ end
-source:
- path: ../..
+ config.vm.define "output" do |output|
+ output.vm.box = "{{.BoxName}}"
+ output.vm.box_url = "file://package.box"
+ config.ssh.insert_key = {{.InsertKey}}
+ end
-build:
- number: 0
+ {{ if ne .SyncedFolder "" -}}
+ config.vm.synced_folder "{{.SyncedFolder}}", "/vagrant"
+ {{- else -}}
+ config.vm.synced_folder ".", "/vagrant", disabled: true
+ {{- end}}
-requirements:
- build:
- - {{ compiler('cxx') }}
- host:
- - python {{ python }}
- - cython
- - numpy
- - setuptools
- - decorator
- - tvm-libs {{ version }}
- run:
- - python {{ python }}
- - {{ pin_compatible('numpy') }}
- - decorator
- - tvm-libs {{ version }}
- - psutil
-
-test:
- imports:
- - tvm
- requires:
- - pytest
- - scipy
- source_files:
- - tests/python
- commands:
- - python -m pytest -v tests/python/integration
-
-about:
- home: https://github.com/apache/incubator-tvm
- license: Apache-2.0
- license_family: Apache
- summary: a low level domain specific language for compiling tensor computation pipelines
+ config.vm.provision "shell", path: "../setup.sh", privileged: false
+end
diff --git a/apps/microtvm/reference-vm/zephyr/base-box/setup.sh b/apps/microtvm/reference-vm/zephyr/base-box/setup.sh
new file mode 100644
index 000000000000..fd758064f4ca
--- /dev/null
+++ b/apps/microtvm/reference-vm/zephyr/base-box/setup.sh
@@ -0,0 +1,105 @@
+#!/bin/bash -e
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+sudo apt update
+sudo apt install -y build-essential
+sudo apt-get --purge remove modemmanager # required to access serial ports.
+
+# Zephyr
+wget --no-verbose https://apt.kitware.com/keys/kitware-archive-latest.asc
+sudo apt-key add kitware-archive-latest.asc
+sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
+sudo apt update
+# NOTE: latest cmake cannot be installed due to
+# https://github.com/zephyrproject-rtos/zephyr/issues/30232
+sudo apt install -y --no-install-recommends git \
+ cmake=3.18.4-0kitware1 cmake-data=3.18.4-0kitware1 \
+ ninja-build gperf ccache dfu-util device-tree-compiler wget \
+ python3-dev python3-pip python3-setuptools python3-tk python3-wheel xz-utils file \
+ make gcc gcc-multilib g++-multilib libsdl2-dev
+
+# Avahi, so that ssh microtvm works.
+# apt install -y avahi-daemon
+
+OLD_HOSTNAME=$(hostname)
+sudo hostnamectl set-hostname microtvm
+sudo sed -i.bak "s/${OLD_HOSTNAME}/microtvm.localdomain/g" /etc/hosts
+
+# Poetry deps
+sudo apt install -y python3-venv
+
+# TVM deps
+sudo apt install -y llvm
+
+# ONNX deps
+sudo apt install -y protobuf-compiler libprotoc-dev
+
+# nrfjprog
+cd ~
+mkdir -p nrfjprog
+wget --no-verbose -O nRFCommandLineTools1090Linuxamd64.tar.gz https://www.nordicsemi.com/-/media/Software-and-other-downloads/Desktop-software/nRF-command-line-tools/sw/Versions-10-x-x/10-9-0/nRFCommandLineTools1090Linuxamd64tar.gz
+cd nrfjprog
+tar -xzvf ../nRFCommandLineTools1090Linuxamd64.tar.gz
+sudo apt install -y ./JLink_Linux_V680a_x86_64.deb
+sudo apt install -y ./nRF-Command-Line-Tools_10_9_0_Linux-amd64.deb
+source ~/.profile
+nrfjprog --help
+cd ..
+rm -rf nrfjprog nRFCommandLineTools1090Linuxamd64.tar.gz
+
+# Zephyr
+pip3 install --user -U west
+echo 'export PATH=$HOME/.local/bin:"$PATH"' >> ~/.profile
+source ~/.profile
+echo PATH=$PATH
+west init --mr v2.4.0 ~/zephyr
+cd ~/zephyr
+west update
+west zephyr-export
+
+cd ~
+echo "Downloading zephyr SDK..."
+wget --no-verbose https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.11.3/zephyr-sdk-0.11.3-setup.run
+chmod +x zephyr-sdk-0.11.3-setup.run
+./zephyr-sdk-0.11.3-setup.run -- -d ~/zephyr-sdk -y
+rm -rf zephyr-sdk-0.11.3-setup.run
+
+# GDB for Zephyr SDK depends on python3.8
+sudo add-apt-repository ppa:deadsnakes/ppa
+sudo apt install -y python3.8-dev
+
+sudo find ~/zephyr-sdk -name '*.rules' -exec cp {} /etc/udev/rules.d \;
+sudo udevadm control --reload
+
+# Poetry
+curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python3
+sed -i "/^# If not running interactively,/ i source \$HOME/.poetry/env" ~/.bashrc
+sed -i "/^# If not running interactively,/ i export ZEPHYR_BASE=$HOME/zephyr/zephyr" ~/.bashrc
+sed -i "/^# If not running interactively,/ i\\ " ~/.bashrc
+
+# Clean box for packaging as a base box
+sudo apt-get clean
+EMPTY_FILE="$HOME/EMPTY"
+dd if=/dev/zero "of=${EMPTY_FILE}" bs=1M || /bin/true
+if [ ! -e "${EMPTY_FILE}" ]; then
+ echo "failed to zero empty sectors on disk"
+ exit 2
+fi
+rm -f "${EMPTY_FILE}"
diff --git a/apps/microtvm/reference-vm/zephyr/base-box/test-config.json b/apps/microtvm/reference-vm/zephyr/base-box/test-config.json
new file mode 100644
index 000000000000..78a6bd216e65
--- /dev/null
+++ b/apps/microtvm/reference-vm/zephyr/base-box/test-config.json
@@ -0,0 +1,4 @@
+{"vid_hex": "0483",
+ "pid_hex": "374b",
+ "test_cmd": ["pytest", "tests/micro/qemu/test_zephyr.py", "--microtvm-platforms=stm32f746xx"]
+}
diff --git a/apps/microtvm/reference-vm/zephyr/pyproject.toml b/apps/microtvm/reference-vm/zephyr/pyproject.toml
new file mode 100644
index 000000000000..ed8182584e36
--- /dev/null
+++ b/apps/microtvm/reference-vm/zephyr/pyproject.toml
@@ -0,0 +1,141 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[tool.black]
+line-length = 100
+target-version = ['py36']
+include = '(\.pyi?$)'
+exclude = '''
+
+(
+ /(
+ \.github
+ | \.tvm
+ | \.tvm_test_data
+ | \.vscode
+ | \.venv
+ | 3rdparty
+ | build\/
+ | cmake\/
+ | conda\/
+ | docker\/
+ | docs\/
+ | golang\/
+ | include\/
+ | jvm\/
+ | licenses\/
+ | nnvm\/
+ | rust\/
+ | src\/
+ | vta\/
+ | web\/
+ )/
+)
+'''
+[tool.poetry]
+name = "tvm"
+version = "0.1.0"
+description = ""
+authors = ["Your Name "]
+packages = [
+ { include = "tvm", from = "../../../../python" },
+]
+
+[tool.poetry.dependencies]
+attrs = "^19"
+decorator = "^4.4"
+numpy = "~1.19"
+psutil = "^5"
+scipy = "^1.4"
+python = "^3.6"
+tornado = "^6"
+typed_ast = "^1.4"
+
+# AutoTVM
+xgboost = {version = "^1.1", optional = true}
+
+#############
+# Importers #
+#############
+
+# NOTE: Caffe frontend dependency is from torch package.
+
+# CoreML
+coremltools = {version = "^3.3", optional = true}
+
+# Darknet
+opencv-python = {version = "^4.2", optional = true}
+cffi = {version = "^1.14", optional = true}
+
+# NOTE: Keras provided by tensorflow package.
+# If TF version conflict, maybe try: keras = "2.3.1"
+
+# MXNet frontend
+mxnet = {version = "^1.6.0", optional = true}
+
+# ONNX frontend
+onnx = {version = "1.6.0", optional = true}
+onnxruntime = {version = "1.0.0", optional = true}
+
+# Pytorch (also used by ONNX)
+# NOTE: cannot download this right now due to https://github.com/python-poetry/poetry/issues/2247
+# torch = {url = "https://download.pytorch.org/whl/cu101/torch-1.4.0-cp36-cp36m-manylinux1_x86_64.whl", optional = true}
+# torchvision = {version = "0.5.0", optional = true}
+# NOTE: torch depends on a number of other packages, but unhelpfully, does not expose that in the
+# wheel!!!
+future = {version = "*", optional = true}
+
+# Tensorflow frontend
+tensorflow = {version = "^2.1", optional = true}
+tensorflow-estimator = {version = "^2.1", optional = true}
+
+# TFLite frontend
+tflite = {version = "2.1.0", optional = true}
+wheel = "*"
+
+
+[tool.poetry.extras]
+xgboost = ["xgboost"]
+importer-caffe2 = ["torch"]
+importer-coreml = ["coremltools"]
+importer-darknet = ["opencv-python"]
+importer-keras = ["tensorflow", "tensorflow-estimator"]
+importer-onnx = ["onnx", "onnxruntime", "torch", "torchvision", "future"]
+importer-pytorch = ["torch", "torchvision", "future"]
+importer-tensorflow = ["tensorflow", "tensorflow-estimator"]
+importer-tflite = ["tlfite", "tensorflow", "tensorflow-estimator"]
+
+[tool.poetry.dev-dependencies]
+autodocsumm = "^0.1"
+black = "^19.10b0"
+sphinx = "^3.0"
+sphinx-gallery = "^0.4"
+sphinx-rtd-theme = "^0.4"
+matplotlib = "^3.2"
+Image = "^1.5"
+recommonmark = "^0.6"
+pillow = "< 7"
+pyformat = "^0.7"
+pylint = "^2.4"
+pytest = "^5.4"
+
+[build-system]
+requires = ["poetry>=0.12"]
+build-backend = "poetry.masonry.api"
+
+[tool.autopep8]
+max_line_length = 100
diff --git a/apps/microtvm/reference-vm/zephyr/rebuild-tvm.sh b/apps/microtvm/reference-vm/zephyr/rebuild-tvm.sh
new file mode 100755
index 000000000000..df833042c670
--- /dev/null
+++ b/apps/microtvm/reference-vm/zephyr/rebuild-tvm.sh
@@ -0,0 +1,34 @@
+#!/bin/bash -e
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+cd "$(dirname $0)"
+cd "$(git rev-parse --show-toplevel)"
+BUILD_DIR=build-microtvm
+
+if [ ! -e "${BUILD_DIR}" ]; then
+ mkdir "${BUILD_DIR}"
+fi
+cp cmake/config.cmake "${BUILD_DIR}"
+cd "${BUILD_DIR}"
+sed -i 's/USE_MICRO OFF/USE_MICRO ON/' config.cmake
+sed -i 's/USE_GRAPH_RUNTIME_DEBUG OFF/USE_GRAPH_RUNTIME_DEBUG ON/' config.cmake
+sed -i 's/USE_LLVM OFF/USE_LLVM ON/' config.cmake
+cmake ..
+make -j4
diff --git a/apps/microtvm/reference-vm/zephyr/setup.sh b/apps/microtvm/reference-vm/zephyr/setup.sh
new file mode 100644
index 000000000000..053e41e85256
--- /dev/null
+++ b/apps/microtvm/reference-vm/zephyr/setup.sh
@@ -0,0 +1,44 @@
+#!/bin/bash -e
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+# TVM
+# NOTE: TVM is presumed to be mounted already by Vagrantfile.
+cd "${TVM_HOME}"
+
+apps/microtvm/reference-vm/zephyr/rebuild-tvm.sh
+
+cd apps/microtvm/reference-vm/zephyr
+
+poetry env use 3.6
+# NOTE: due to https://github.com/python-poetry/poetry/issues/2247, download torch here.
+poetry run pip3 install torch==1.4.0 torchvision==0.5.0
+
+echo "------------------------------[ TVM Message ]------------------------------"
+echo "WARNING: running 'poetry lock', which could take several minutes (depending"
+echo "on your network connection and the state of PyPI) as dependencies are"
+echo "downloaded and cached for future use."
+echo "------------------------------[ TVM Message ]------------------------------"
+poetry lock -vvv
+poetry install
+poetry run pip3 install -r ~/zephyr/zephyr/scripts/requirements.txt
+
+echo "export TVM_LIBRARY_PATH=\"$TVM_HOME\"/build-microtvm" >>~/.profile
+echo "VENV_PATH=\$((cd \"$TVM_HOME\"/apps/microtvm/reference-vm/zephyr && poetry env list --full-path) | sed -E 's/^(.*)[[:space:]]\(Activated\)\$/\1/g')" >>~/.profile
+echo "source \$VENV_PATH/bin/activate" >>~/.profile
diff --git a/apps/topi_recipe/conv/depthwise_conv2d_test.py b/apps/topi_recipe/conv/depthwise_conv2d_test.py
index 036f1a4240f2..94687edde5f9 100644
--- a/apps/topi_recipe/conv/depthwise_conv2d_test.py
+++ b/apps/topi_recipe/conv/depthwise_conv2d_test.py
@@ -22,7 +22,7 @@
from tvm.contrib import nvcc
from tvm import topi
-from tvm.topi.util import get_const_tuple
+from tvm.topi.utils import get_const_tuple
from tvm.topi.cuda.depthwise_conv2d import (
schedule_depthwise_conv2d_nchw,
schedule_depthwise_conv2d_nhwc,
diff --git a/apps/topi_recipe/conv/test_conv2d_hwcn_map.py b/apps/topi_recipe/conv/test_conv2d_hwcn_map.py
index 1d2032d5c405..d67bfdc8952e 100644
--- a/apps/topi_recipe/conv/test_conv2d_hwcn_map.py
+++ b/apps/topi_recipe/conv/test_conv2d_hwcn_map.py
@@ -22,7 +22,7 @@
from tvm import te
from tvm.contrib import nvcc
from tvm import topi
-from tvm.topi.util import get_const_tuple
+from tvm.topi.utils import get_const_tuple
TASK = "conv2d_hwcn_map"
USE_MANUAL_CODE = False
diff --git a/apps/topi_recipe/gemm/android_gemm_square.py b/apps/topi_recipe/gemm/android_gemm_square.py
index 522818842cfa..0e64dcd3844d 100644
--- a/apps/topi_recipe/gemm/android_gemm_square.py
+++ b/apps/topi_recipe/gemm/android_gemm_square.py
@@ -19,7 +19,7 @@
from tvm import te
import os
from tvm import rpc
-from tvm.contrib import util, ndk
+from tvm.contrib import utils, ndk
import numpy as np
# Set to be address of tvm proxy.
@@ -121,7 +121,7 @@ def test_gemm_gpu(N, times, bn, num_block, num_thread):
print(tvm.lower(s, [A, B, C], simple_mode=True))
f = tvm.build(s, [A, B, C], "opencl", target_host=target, name="gemm_gpu")
- temp = util.tempdir()
+ temp = utils.tempdir()
path_dso = temp.relpath("gemm_gpu.so")
f.export_library(path_dso, ndk.create_shared)
diff --git a/apps/wasm-standalone/wasm-graph/Cargo.toml b/apps/wasm-standalone/wasm-graph/Cargo.toml
index 9cdc8f599579..cea491b2f128 100644
--- a/apps/wasm-standalone/wasm-graph/Cargo.toml
+++ b/apps/wasm-standalone/wasm-graph/Cargo.toml
@@ -22,7 +22,7 @@ authors = ["TVM Contributors"]
edition = "2018"
description = "WebAssembly graph to deep learning frameworks using TVM"
readme = "README.md"
-repository = "https://github.com/apache/incubator-tvm"
+repository = "https://github.com/apache/tvm"
license = "Apache-2.0"
keywords = ["wasm", "machine learning", "tvm"]
diff --git a/apps/wasm-standalone/wasm-graph/tools/build_graph_lib.py b/apps/wasm-standalone/wasm-graph/tools/build_graph_lib.py
index cfea02a230d2..42695d28fadb 100644
--- a/apps/wasm-standalone/wasm-graph/tools/build_graph_lib.py
+++ b/apps/wasm-standalone/wasm-graph/tools/build_graph_lib.py
@@ -44,7 +44,7 @@ def build_graph_lib(model_file, opt_level):
# Compile the relay mod
mod, params = _get_mod_and_params(model_file)
- target = "llvm -target=wasm32-unknown-unknown -mattr=+simd128 --system-lib"
+ target = "llvm -mtriple=wasm32-unknown-unknown -mattr=+simd128 --system-lib"
with tvm.transform.PassContext(opt_level=opt_level):
graph_json, lib, params = relay.build(mod, target=target, params=params)
@@ -71,7 +71,7 @@ def build_graph_lib(model_file, opt_level):
"--opt-level",
type=int,
default=0,
- help="level of optimization. 0 is unoptimized and 3 is the highest level",
+ help="level of optimization. 0 is non-optimized and 3 is the highest level",
)
args = parser.parse_args()
diff --git a/apps/wasm-standalone/wasm-runtime/Cargo.toml b/apps/wasm-standalone/wasm-runtime/Cargo.toml
index db00a55c31b5..99f6db54431f 100644
--- a/apps/wasm-standalone/wasm-runtime/Cargo.toml
+++ b/apps/wasm-standalone/wasm-runtime/Cargo.toml
@@ -21,7 +21,7 @@ version = "0.1.0"
authors = ["TVM Contributors"]
edition = "2018"
description = "WebAssembly runtime to deep learning frameworks using wasmtime"
-repository = "https://github.com/apache/incubator-tvm"
+repository = "https://github.com/apache/tvm"
license = "Apache-2.0"
keywords = ["wasm", "machine learning", "wasmtime"]
diff --git a/cmake/config.cmake b/cmake/config.cmake
index 1d465b2fe389..4a010d3ef099 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -113,15 +113,16 @@ set(USE_MICRO_STANDALONE_RUNTIME OFF)
#
# Possible values:
# - ON: enable llvm with cmake's find search
-# - OFF: disable llvm
+# - OFF: disable llvm, note this will disable CPU codegen
+# which is needed for most cases
# - /path/to/llvm-config: enable specific LLVM when multiple llvm-dev is available.
-set(USE_LLVM OFF)
+set(USE_LLVM ON)
#---------------------------------------------
# Contrib libraries
#---------------------------------------------
# Whether to build with BYODT software emulated posit custom datatype
-#
+#
# Possible values:
# - ON: enable BYODT posit, requires setting UNIVERSAL_PATH
# - OFF: disable BYODT posit
@@ -222,6 +223,22 @@ set(USE_ETHOSN OFF)
# otherwise use ETHOSN_HW (OFF) to use the software test infrastructure
set(USE_ETHOSN_HW OFF)
+# Whether to build with TensorRT codegen or runtime
+# Examples are available here: docs/deploy/tensorrt.rst.
+#
+# USE_TENSORRT_CODEGEN - Support for compiling a relay graph where supported operators are
+# offloaded to TensorRT. OFF/ON
+# USE_TENSORRT_RUNTIME - Support for running TensorRT compiled modules, requires presense of
+# TensorRT library. OFF/ON/"path/to/TensorRT"
+set(USE_TENSORRT_CODEGEN OFF)
+set(USE_TENSORRT_RUNTIME OFF)
+
+# Whether use VITIS-AI codegen
+set(USE_VITIS_AI OFF)
+
+# Build Verilator codegen and runtime, example located in 3rdparty/vta-hw/apps/verilator
+set(USE_VERILATOR_HW OFF)
+
# Build ANTLR parser for Relay text format
# Possible values:
# - ON: enable ANTLR by searching default locations (cmake find_program for antlr4 and /usr/local for jar)
@@ -260,7 +277,3 @@ set(USE_HEXAGON_SDK /path/to/sdk)
# Whether to use ONNX codegen
set(USE_TARGET_ONNX OFF)
-
-# Whether to compile the standalone C runtime.
-set(USE_STANDALONE_CRT ON)
-
diff --git a/cmake/modules/ClangFlags.cmake b/cmake/modules/ClangFlags.cmake
index 9a3ac05a2a5b..53d0e3631caf 100644
--- a/cmake/modules/ClangFlags.cmake
+++ b/cmake/modules/ClangFlags.cmake
@@ -21,7 +21,11 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE clang_full_version)
string (REGEX REPLACE ".*clang version ([0-9]+\\.[0-9]+).*" "\\1" CLANG_VERSION ${clang_full_version})
message(STATUS "CLANG_VERSION ${CLANG_VERSION}")
- if (CLANG_VERSION VERSION_GREATER_EQUAL 10.0)
+ # cmake 3.2 does not support VERSION_GREATER_EQUAL
+ set(CLANG_MINIMUM_VERSION 10.0)
+ if ((CLANG_VERSION VERSION_GREATER ${CLANG_MINIMUM_VERSION})
+ OR
+ (CLANG_VERSION VERSION_GREATER ${CLANG_MINIMUM_VERSION}))
message(STATUS "Setting enhanced clang warning flags")
# These warnings are only enabled when clang's -Weverything flag is enabled
diff --git a/cmake/modules/LLVM.cmake b/cmake/modules/LLVM.cmake
index 5f8ace17111f..ac870b17faeb 100644
--- a/cmake/modules/LLVM.cmake
+++ b/cmake/modules/LLVM.cmake
@@ -16,7 +16,14 @@
# under the License.
# LLVM rules
-add_definitions(-DDMLC_USE_FOPEN64=0)
+# Due to LLVM debug symbols you can sometimes face linking issues on
+# certain compiler, platform combinations if you don't set NDEBUG.
+#
+# See https://github.com/imageworks/OpenShadingLanguage/issues/1069
+# for more discussion.
+add_definitions(-DDMLC_USE_FOPEN64=0 -DNDEBUG=1)
+# TODO(@jroesch, @tkonolige): if we actually use targets we can do this.
+# target_compile_definitions(tvm PRIVATE NDEBUG=1)
# Test if ${USE_LLVM} is not an explicit boolean false
# It may be a boolean or a string
diff --git a/cmake/modules/RustExt.cmake b/cmake/modules/RustExt.cmake
new file mode 100644
index 000000000000..2922bc48dee2
--- /dev/null
+++ b/cmake/modules/RustExt.cmake
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if(USE_RUST_EXT)
+ set(RUST_SRC_DIR "${CMAKE_SOURCE_DIR}/rust")
+ set(CARGO_OUT_DIR "${CMAKE_SOURCE_DIR}/rust/target")
+
+ if(USE_RUST_EXT STREQUAL "STATIC")
+ set(COMPILER_EXT_PATH "${CARGO_OUT_DIR}/release/libcompiler_ext.a")
+ elseif(USE_RUST_EXT STREQUAL "DYNAMIC")
+ set(COMPILER_EXT_PATH "${CARGO_OUT_DIR}/release/libcompiler_ext.so")
+ else()
+ message(FATAL_ERROR "invalid setting for USE_RUST_EXT, STATIC, DYNAMIC or OFF")
+ endif()
+
+ add_custom_command(
+ OUTPUT "${COMPILER_EXT_PATH}"
+ COMMAND cargo build --release
+ MAIN_DEPENDENCY "${RUST_SRC_DIR}"
+ WORKING_DIRECTORY "${RUST_SRC_DIR}/compiler-ext")
+
+ add_custom_target(rust_ext ALL DEPENDS "${COMPILER_EXT_PATH}")
+
+ # TODO(@jroesch, @tkonolige): move this to CMake target
+ # target_link_libraries(tvm "${COMPILER_EXT_PATH}" PRIVATE)
+ list(APPEND TVM_LINKER_LIBS ${COMPILER_EXT_PATH})
+
+ add_definitions(-DRUST_COMPILER_EXT=1)
+endif()
diff --git a/cmake/modules/StandaloneCrt.cmake b/cmake/modules/StandaloneCrt.cmake
index 73c85d13e2ef..256ce2a48a6c 100644
--- a/cmake/modules/StandaloneCrt.cmake
+++ b/cmake/modules/StandaloneCrt.cmake
@@ -44,6 +44,7 @@ if(USE_MICRO)
"src/runtime/crt/include *.h -> include"
"src/runtime/crt/common *.c -> src/runtime/crt/common"
"src/runtime/crt/graph_runtime *.c -> src/runtime/crt/graph_runtime"
+ "src/runtime/crt/graph_runtime_module *.c -> src/runtime/crt/graph_runtime_module"
"src/runtime/crt/host crt_config.h -> src/runtime/crt/host"
"src/runtime/crt/utvm_rpc_common *.cc -> src/runtime/crt/utvm_rpc_common"
"src/runtime/crt/utvm_rpc_server *.cc -> src/runtime/crt/utvm_rpc_server"
diff --git a/cmake/modules/VTA.cmake b/cmake/modules/VTA.cmake
index a9fc66507d35..115216680fff 100644
--- a/cmake/modules/VTA.cmake
+++ b/cmake/modules/VTA.cmake
@@ -65,7 +65,7 @@ elseif(PYTHON)
target_compile_definitions(vta_fsim PUBLIC ${__strip_def})
endforeach()
if(APPLE)
- set_target_properties(vta_fsim PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+ set_property(TARGET vta_fsim APPEND PROPERTY LINK_FLAGS "-undefined dynamic_lookup")
endif(APPLE)
target_compile_definitions(vta_fsim PUBLIC USE_FSIM_TLPP)
endif()
@@ -86,7 +86,7 @@ elseif(PYTHON)
target_compile_definitions(vta_tsim PUBLIC ${__strip_def})
endforeach()
if(APPLE)
- set_target_properties(vta_tsim PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+ set_property(TARGET vta_fsim APPEND PROPERTY LINK_FLAGS "-undefined dynamic_lookup")
endif(APPLE)
endif()
diff --git a/cmake/modules/contrib/TensorRT.cmake b/cmake/modules/contrib/TensorRT.cmake
index 2615f1fe31e1..24a8241a2229 100644
--- a/cmake/modules/contrib/TensorRT.cmake
+++ b/cmake/modules/contrib/TensorRT.cmake
@@ -15,12 +15,26 @@
# specific language governing permissions and limitations
# under the License.
+# TensorRT Codegen only. This can be enabled independently of USE_TENSORRT_RUNTIME to enable
+# compilation of TensorRT modules without requiring TensorRT to be installed. The compiled modules
+# will only be able to be executed using a TVM built with USE_TENSORRT_RUNTIME=ON.
+
+include (FindPackageHandleStandardArgs)
+
+if(USE_TENSORRT_CODEGEN)
+ message(STATUS "Build with TensorRT codegen")
+ file(GLOB COMPILER_TENSORRT_SRCS src/relay/backend/contrib/tensorrt/*.cc)
+ set_source_files_properties(${COMPILER_TENSORRT_SRCS} PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations")
+ file(GLOB RUNTIME_TENSORRT_SRCS src/runtime/contrib/tensorrt/tensorrt_runtime.cc)
+ set_source_files_properties(${RUNTIME_TENSORRT_SRCS} PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations")
+ list(APPEND COMPILER_SRCS ${COMPILER_TENSORRT_SRCS})
+ list(APPEND COMPILER_SRCS ${RUNTIME_TENSORRT_SRCS})
+endif()
+
# TensorRT Runtime
-if(USE_TENSORRT)
- # Enable codegen as well
- SET(USE_TENSORRT_CODEGEN ON)
- if(IS_DIRECTORY ${USE_TENSORRT})
- set(TENSORRT_ROOT_DIR ${USE_TENSORRT})
+if(USE_TENSORRT_RUNTIME)
+ if(IS_DIRECTORY ${USE_TENSORRT_RUNTIME})
+ set(TENSORRT_ROOT_DIR ${USE_TENSORRT_RUNTIME})
message(STATUS "Custom TensorRT path: " ${TENSORRT_ROOT_DIR})
endif()
find_path(TENSORRT_INCLUDE_DIR NvInfer.h HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES include)
@@ -33,21 +47,11 @@ if(USE_TENSORRT)
include_directories(${TENSORRT_INCLUDE_DIR})
list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_LIB_DIR})
- # Relay TRT runtime sources
- file(GLOB TENSORRT_RELAY_CONTRIB_SRC src/runtime/contrib/tensorrt/*.cc)
- list(APPEND RUNTIME_SRCS ${TENSORRT_RELAY_CONTRIB_SRC})
+ # TRT runtime sources
+ file(GLOB RUNTIME_TENSORRT_SRCS src/runtime/contrib/tensorrt/*.cc)
+ set_source_files_properties(${RUNTIME_TENSORRT_SRCS} PROPERTIES COMPILE_FLAGS "-Wno-deprecated-declarations")
+ list(APPEND RUNTIME_SRCS ${RUNTIME_TENSORRT_SRCS})
# Set defines
add_definitions(-DTVM_GRAPH_RUNTIME_TENSORRT)
endif()
-# TensorRT Codegen only. This can be enabled independently of USE_TENSORRT to
-# enable compilation of TensorRT modules without requiring TensorRT to be
-# installed. The compiled modules will only be able to be executed using a TVM
-# built with USE_TENSORRT=ON.
-if(USE_TENSORRT_CODEGEN)
- message(STATUS "Build with TensorRT codegen")
- # Relay TRT codegen sources
- file(GLOB TENSORRT_RELAY_CONTRIB_SRC src/relay/backend/contrib/tensorrt/*.cc)
- list(APPEND COMPILER_SRCS ${TENSORRT_RELAY_CONTRIB_SRC})
- list(APPEND COMPILER_SRCS src/runtime/contrib/tensorrt/tensorrt_module.cc)
-endif()
diff --git a/cmake/modules/contrib/Verilator.cmake b/cmake/modules/contrib/Verilator.cmake
new file mode 100644
index 000000000000..d3c1a7161182
--- /dev/null
+++ b/cmake/modules/contrib/Verilator.cmake
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if(USE_VERILATOR_HW STREQUAL "ON")
+ execute_process(COMMAND make --directory ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/vta-hw/apps/verilator)
+ file(GLOB VERILATOR_RELAY_CONTRIB_SRC src/relay/backend/contrib/verilator/codegen.cc)
+ list(APPEND COMPILER_SRCS ${VERILATOR_RELAY_CONTRIB_SRC})
+ list(APPEND COMPILER_SRCS ${JSON_RELAY_CONTRIB_SRC})
+ find_library(EXTERN_LIBRARY_VERILATOR NAMES verilator PATHS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/vta-hw/apps/verilator)
+ list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_VERILATOR})
+ file(GLOB VERILATOR_CONTRIB_SRC src/runtime/contrib/verilator/verilator_runtime.cc)
+ list(APPEND RUNTIME_SRCS ${VERILATOR_CONTRIB_SRC})
+endif()
+
diff --git a/cmake/modules/contrib/VitisAI.cmake b/cmake/modules/contrib/VitisAI.cmake
new file mode 100644
index 000000000000..083bd6d7adc8
--- /dev/null
+++ b/cmake/modules/contrib/VitisAI.cmake
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if(USE_VITIS_AI)
+ set(PYXIR_SHARED_LIB libpyxir.so)
+ find_package(PythonInterp 3.6 REQUIRED)
+ if(NOT PYTHON)
+ find_program(PYTHON NAMES python3 python3.6)
+ endif()
+ execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+ "import pyxir as px; print(px.get_include_dir()); print(px.get_lib_dir());"
+ RESULT_VARIABLE __result
+ OUTPUT_VARIABLE __output
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+ if(__result MATCHES 0)
+ string(REGEX REPLACE ";" "\\\\;" __values ${__output})
+ string(REGEX REPLACE "\r?\n" ";" __values ${__values})
+ list(GET __values 0 PYXIR_INCLUDE_DIR)
+ list(GET __values 1 PYXIR_LIB_DIR)
+ else()
+ message(FATAL_ERROR "Can't build TVM with Vitis-AI because PyXIR can't be found")
+ endif()
+ message(STATUS "Build with contrib.vitisai")
+ include_directories(${PYXIR_INCLUDE_DIR})
+ file(GLOB VAI_CONTRIB_SRC src/runtime/contrib/vitis_ai/*.cc)
+ file(GLOB COMPILER_VITIS_AI_SRCS
+ CONFIGURE_DEPENDS src/relay/backend/contrib/vitis_ai/*)
+ list(APPEND COMPILER_SRCS ${COMPILER_VITIS_AI_SRCS})
+ link_directories(${PYXIR_LIB_DIR})
+ list(APPEND TVM_RUNTIME_LINKER_LIBS "pyxir")
+ list(APPEND RUNTIME_SRCS ${VAI_CONTRIB_SRC})
+endif(USE_VITIS_AI)
diff --git a/cmake/util/FindCUDA.cmake b/cmake/utils/FindCUDA.cmake
similarity index 93%
rename from cmake/util/FindCUDA.cmake
rename to cmake/utils/FindCUDA.cmake
index f7d9b5ed6d08..c95f8ce722f4 100644
--- a/cmake/util/FindCUDA.cmake
+++ b/cmake/utils/FindCUDA.cmake
@@ -87,15 +87,20 @@ macro(find_cuda use_cuda)
NO_DEFAULT_PATH)
find_library(CUDA_CUDNN_LIBRARY cudnn
${CUDA_TOOLKIT_ROOT_DIR}/lib64
- ${CUDA_TOOLKIT_ROOT_DIR}/lib)
+ ${CUDA_TOOLKIT_ROOT_DIR}/lib
+ NO_DEFAULT_PATH)
+ # search default path if cannot find cudnn in non-default
+ find_library(CUDA_CUDNN_LIBRARY cudnn)
find_library(CUDA_CUBLAS_LIBRARY cublas
${CUDA_TOOLKIT_ROOT_DIR}/lib64
- ${CUDA_TOOLKIT_ROOT_DIR}/lib)
+ ${CUDA_TOOLKIT_ROOT_DIR}/lib
+ NO_DEFAULT_PATH)
find_library(CUDA_CUBLASLT_LIBRARY
NAMES cublaslt cublasLt
PATHS
${CUDA_TOOLKIT_ROOT_DIR}/lib64
- ${CUDA_TOOLKIT_ROOT_DIR}/lib)
+ ${CUDA_TOOLKIT_ROOT_DIR}/lib
+ NO_DEFAULT_PATH)
endif(MSVC)
message(STATUS "Found CUDA_TOOLKIT_ROOT_DIR=" ${CUDA_TOOLKIT_ROOT_DIR})
message(STATUS "Found CUDA_CUDA_LIBRARY=" ${CUDA_CUDA_LIBRARY})
diff --git a/cmake/util/FindEthosN.cmake b/cmake/utils/FindEthosN.cmake
similarity index 100%
rename from cmake/util/FindEthosN.cmake
rename to cmake/utils/FindEthosN.cmake
diff --git a/cmake/util/FindLLVM.cmake b/cmake/utils/FindLLVM.cmake
similarity index 100%
rename from cmake/util/FindLLVM.cmake
rename to cmake/utils/FindLLVM.cmake
diff --git a/cmake/util/FindOpenCL.cmake b/cmake/utils/FindOpenCL.cmake
similarity index 100%
rename from cmake/util/FindOpenCL.cmake
rename to cmake/utils/FindOpenCL.cmake
diff --git a/cmake/util/FindROCM.cmake b/cmake/utils/FindROCM.cmake
similarity index 100%
rename from cmake/util/FindROCM.cmake
rename to cmake/utils/FindROCM.cmake
diff --git a/cmake/util/FindVulkan.cmake b/cmake/utils/FindVulkan.cmake
similarity index 100%
rename from cmake/util/FindVulkan.cmake
rename to cmake/utils/FindVulkan.cmake
diff --git a/cmake/util/Util.cmake b/cmake/utils/Utils.cmake
similarity index 100%
rename from cmake/util/Util.cmake
rename to cmake/utils/Utils.cmake
diff --git a/conda/Dockerfile.template b/conda/Dockerfile.template
index 1b5dc6fbef5e..342d532bbff5 100644
--- a/conda/Dockerfile.template
+++ b/conda/Dockerfile.template
@@ -17,30 +17,16 @@
FROM nvidia/cuda:{{ cuda_version }}-devel-ubuntu16.04
-RUN apt-get update && apt-get install -y --no-install-recommends \
- bzip2 curl sudo binutils && \
- rm -rf /var/lib/apt/lists/*
+RUN apt-get update --fix-missing && apt-get install -y bzip2 wget sudo binutils git
-RUN curl -fsSL http://developer.download.nvidia.com/compute/redist/cudnn/v{{ cudnn_short_version }}/cudnn-{{ cuda_version }}-linux-x64-v{{ cudnn_version }}.tgz -O && \
+RUN wget -q http://developer.download.nvidia.com/compute/redist/cudnn/v{{ cudnn_short_version }}/cudnn-{{ cuda_version }}-linux-x64-v{{ cudnn_version }}.tgz && \
tar --no-same-owner -xzf cudnn-{{ cuda_version }}-linux-x64-v{{ cudnn_version }}.tgz -C /usr/local && \
rm cudnn-{{ cuda_version }}-linux-x64-v{{ cudnn_version }}.tgz && \
ldconfig
-
-RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
- chmod +x ~/miniconda.sh && \
- ~/miniconda.sh -b -p /opt/conda && \
- rm ~/miniconda.sh && \
- /opt/conda/bin/conda upgrade --all && \
- /opt/conda/bin/conda install conda-build conda-verify && \
- /opt/conda/bin/conda clean -ya
-
-RUN /opt/conda/bin/conda install --download-only cmake make zlib
-RUN /opt/conda/bin/conda install --download-only -c numba llvmdev=8.0.0
+COPY install/ubuntu_install_conda.sh /install/ubuntu_install_conda.sh
+RUN bash /install/ubuntu_install_conda.sh
ENV PATH /opt/conda/bin:$PATH
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
ENV CONDA_BLD_PATH /tmp
-
-WORKDIR /workspace
-RUN chmod -R a+w /workspace
diff --git a/conda/build-environment.yaml b/conda/build-environment.yaml
new file mode 100644
index 000000000000..31b39bfafcd0
--- /dev/null
+++ b/conda/build-environment.yaml
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Build environment that can be used to build tvm.
+name: tvm-build
+
+# The conda channels to lookup the dependencies
+channels:
+ - anaconda
+ - conda-forge
+
+# The packages to install to the environment
+dependencies:
+ - conda-build
+ - git
+ - llvmdev ==10.0.0
+ - numpy
+ - pytest
+ - cython
+ - cmake
+ - bzip2
+ - make
+ - scipy
diff --git a/conda/build_cpu.sh b/conda/build_cpu.sh
index 992b1a369b96..48b93b23dc0f 100755
--- a/conda/build_cpu.sh
+++ b/conda/build_cpu.sh
@@ -26,6 +26,4 @@ mkdir -p /tmp/.conda/pkgs
touch /tmp/.conda/pkgs/urls.txt
touch /tmp/.conda/environments.txt
-
-conda build --output-folder=conda/pkg -c numba conda/tvm-libs
-conda build --output-folder=conda/pkg -m conda/conda_build_config.yaml conda/tvm
+conda build --output-folder=conda/pkg conda/recipe
diff --git a/conda/build_cuda.sh b/conda/build_cuda.sh
index 2c9a20ae66ae..ec4a144852b7 100755
--- a/conda/build_cuda.sh
+++ b/conda/build_cuda.sh
@@ -26,5 +26,4 @@ mkdir -p /tmp/.conda/pkgs
touch /tmp/.conda/pkgs/urls.txt
touch /tmp/.conda/environments.txt
-
-conda build --output-folder=conda/pkg --variants "{cuda: True, cuda_version: ${CUDA_VERSION%.*}}" -c numba conda/tvm-libs
+conda build --output-folder=conda/pkg --variants "{cuda: True, cuda_version: ${CUDA_VERSION%.*}}" conda/recipe
diff --git a/conda/build_win.bat b/conda/build_win.bat
new file mode 100644
index 000000000000..59d0d07340c7
--- /dev/null
+++ b/conda/build_win.bat
@@ -0,0 +1,18 @@
+:: Licensed to the Apache Software Foundation (ASF) under one
+:: or more contributor license agreements. See the NOTICE file
+:: distributed with this work for additional information
+:: regarding copyright ownership. The ASF licenses this file
+:: to you under the Apache License, Version 2.0 (the
+:: "License"); you may not use this file except in compliance
+:: with the License. You may obtain a copy of the License at
+::
+:: http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing,
+:: software distributed under the License is distributed on an
+:: "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+:: KIND, either express or implied. See the License for the
+:: specific language governing permissions and limitations
+:: under the License.
+
+conda build --output-folder=conda/pkg conda/recipe
diff --git a/conda/recipe/bld.bat b/conda/recipe/bld.bat
new file mode 100644
index 000000000000..9fc0469febc6
--- /dev/null
+++ b/conda/recipe/bld.bat
@@ -0,0 +1,38 @@
+:: Licensed to the Apache Software Foundation (ASF) under one
+:: or more contributor license agreements. See the NOTICE file
+:: distributed with this work for additional information
+:: regarding copyright ownership. The ASF licenses this file
+:: to you under the Apache License, Version 2.0 (the
+:: "License"); you may not use this file except in compliance
+:: with the License. You may obtain a copy of the License at
+::
+:: http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing,
+:: software distributed under the License is distributed on an
+:: "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+:: KIND, either express or implied. See the License for the
+:: specific language governing permissions and limitations
+:: under the License.
+echo on
+
+rd /s /q build
+mkdir build
+cd build
+
+cmake ^
+ -DCMAKE_PREFIX_PATH=%LIBRARY_PREFIX% ^
+ -DCMAKE_INSTALL_PREFIX:PATH=%LIBRARY_PREFIX% ^
+ -DUSE_LLVM=ON ^
+ -DUSE_RPC=ON ^
+ -DUSE_CPP_RPC=ON ^
+ -DUSE_SORT=ON ^
+ -DUSE_RANDOM=ON ^
+ -DUSE_GRAPH_RUNTIME_DEBUG=ON ^
+ -DINSTALL_DEV=ON ^
+ %SRC_DIR%
+
+cd ..
+:: defer build to install stage to avoid rebuild.
+:: sometimes windows msbuild is not very good at file
+:: caching and install will results in a rebuild
diff --git a/conda/tvm-libs/build.sh b/conda/recipe/build.sh
old mode 100644
new mode 100755
similarity index 63%
rename from conda/tvm-libs/build.sh
rename to conda/recipe/build.sh
index 94919c60e779..c9e76314da31
--- a/conda/tvm-libs/build.sh
+++ b/conda/recipe/build.sh
@@ -6,9 +6,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -19,35 +19,41 @@
set -e
set -u
+GPU_OPT=""
+TOOLCHAIN_OPT=""
+
if [ "$target_platform" == "osx-64" ]; then
# macOS 64 bits
- METAL_OPT="-DUSE_METAL=ON"
- TOOLCHAIN_OPT="-DCMAKE_OSX_DEPLOYMENT_TARGET=10.11"
-else
- METAL_OPT=""
- if [ "$target_platform" == "linux-64" ]; then
- # Linux 64 bits
- TOOLCHAIN_OPT="-DCMAKE_TOOLCHAIN_FILE=${RECIPE_DIR}/../cross-linux.cmake"
- else
- # Windows (or 32 bits, which we don't support)
- TOOLCHAIN_OPT=""
- fi
+ GPU_OPT="-DUSE_METAL=ON"
+elif [ "$target_platform" == "linux-64" ]; then
+ TOOLCHAIN_OPT="-DCMAKE_TOOLCHAIN_FILE=${RECIPE_DIR}/cross-linux.cmake"
fi
# When cuda is not set, we default to False
cuda=${cuda:-False}
if [ "$cuda" == "True" ]; then
- CUDA_OPT="-DUSE_CUDA=ON -DUSE_CUBLAS=ON -DUSE_CUDNN=ON"
+ GPU_OPT="-DUSE_CUDA=ON -DUSE_CUBLAS=ON -DUSE_CUDNN=ON"
TOOLCHAIN_OPT=""
-else
- CUDA_OPT=""
fi
+# remove touched cmake config
+rm -f config.cmake
rm -rf build || true
mkdir -p build
cd build
-cmake $METAL_OPT $CUDA_OPT -DUSE_LLVM=$PREFIX/bin/llvm-config -DINSTALL_DEV=ON -DCMAKE_INSTALL_PREFIX="$PREFIX" $TOOLCHAIN_OPT ..
-make -j${CPU_COUNT} VERBOSE=1
-make install
+
+cmake -DCMAKE_INSTALL_PREFIX="${PREFIX}" \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DUSE_RPC=ON \
+ -DUSE_CPP_RPC=OFF \
+ -DUSE_SORT=ON \
+ -DUSE_RANDOM=ON \
+ -DUSE_GRAPH_RUNTIME_DEBUG=ON \
+ -DUSE_LLVM=ON \
+ -DINSTALL_DEV=ON \
+ ${GPU_OPT} ${TOOLCHAIN_OPT} \
+ ${SRC_DIR}
+
+make -j${CPU_COUNT}
cd ..
diff --git a/conda/conda_build_config.yaml b/conda/recipe/conda_build_config.yaml
similarity index 99%
rename from conda/conda_build_config.yaml
rename to conda/recipe/conda_build_config.yaml
index 79d6bfe3c175..938d294da556 100644
--- a/conda/conda_build_config.yaml
+++ b/conda/recipe/conda_build_config.yaml
@@ -16,9 +16,9 @@
# under the License.
python:
- - 3.5
- 3.6
- 3.7
+ - 3.8
cuda:
- False
diff --git a/conda/cross-linux.cmake b/conda/recipe/cross-linux.cmake
similarity index 100%
rename from conda/cross-linux.cmake
rename to conda/recipe/cross-linux.cmake
diff --git a/conda/recipe/install_libtvm.bat b/conda/recipe/install_libtvm.bat
new file mode 100644
index 000000000000..f423c521f84e
--- /dev/null
+++ b/conda/recipe/install_libtvm.bat
@@ -0,0 +1,22 @@
+:: Licensed to the Apache Software Foundation (ASF) under one
+:: or more contributor license agreements. See the NOTICE file
+:: distributed with this work for additional information
+:: regarding copyright ownership. The ASF licenses this file
+:: to you under the Apache License, Version 2.0 (the
+:: "License"); you may not use this file except in compliance
+:: with the License. You may obtain a copy of the License at
+::
+:: http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing,
+:: software distributed under the License is distributed on an
+:: "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+:: KIND, either express or implied. See the License for the
+:: specific language governing permissions and limitations
+:: under the License.
+
+cmake --build build --config Release --target install
+
+:: Copy files into library bin so that they can be found
+cp %LIBRARY_LIB%\tvm.dll %LIBRARY_BIN%\tvm.dll
+cp %LIBRARY_LIB%\tvm_runtime.dll %LIBRARY_BIN%\tvm_runtime.dll
diff --git a/conda/tvm/build.sh b/conda/recipe/install_libtvm.sh
old mode 100644
new mode 100755
similarity index 88%
rename from conda/tvm/build.sh
rename to conda/recipe/install_libtvm.sh
index 9bdbe0a6f509..b236c7dc2720
--- a/conda/tvm/build.sh
+++ b/conda/recipe/install_libtvm.sh
@@ -19,6 +19,5 @@
set -e
set -u
-cd python
-$PYTHON setup.py install --single-version-externally-managed --record=/tmp/record.txt
-cd ..
+cd build
+make install
diff --git a/conda/recipe/install_tvm_python.bat b/conda/recipe/install_tvm_python.bat
new file mode 100644
index 000000000000..96187468c2b2
--- /dev/null
+++ b/conda/recipe/install_tvm_python.bat
@@ -0,0 +1,20 @@
+:: Licensed to the Apache Software Foundation (ASF) under one
+:: or more contributor license agreements. See the NOTICE file
+:: distributed with this work for additional information
+:: regarding copyright ownership. The ASF licenses this file
+:: to you under the Apache License, Version 2.0 (the
+:: "License"); you may not use this file except in compliance
+:: with the License. You may obtain a copy of the License at
+::
+:: http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing,
+:: software distributed under the License is distributed on an
+:: "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+:: KIND, either express or implied. See the License for the
+:: specific language governing permissions and limitations
+:: under the License.
+echo on
+
+cd %SRC_DIR%\python
+%PYTHON% setup.py install --single-version-externally-managed --record=%SRC_DIR%\record.txt
diff --git a/conda/recipe/install_tvm_python.sh b/conda/recipe/install_tvm_python.sh
new file mode 100755
index 000000000000..2c721c64a156
--- /dev/null
+++ b/conda/recipe/install_tvm_python.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -u
+
+cd ${SRC_DIR}/python
+${PYTHON} setup.py install --single-version-externally-managed --record=/tmp/record.txt
diff --git a/conda/recipe/meta.yaml b/conda/recipe/meta.yaml
new file mode 100644
index 000000000000..0113850a6602
--- /dev/null
+++ b/conda/recipe/meta.yaml
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% set version = '0.8.dev0' %}
+{% set pkg_name = 'tvm' %}
+{% set cuda_tag = cuda_version | replace('.', '') %} # [cuda]
+{% set pkg_name = pkg_name + '-cu' + cuda_tag %} # [cuda]
+{% set build_tag = environ.get('GIT_BUILD_STR', 'unknown') %}
+{% set build_tag = build_tag + '_h' + PKG_HASH + '_' + PKG_BUILDNUM %}
+
+package:
+ name: {{ pkg_name }}-package
+ version: {{ version }}
+
+source:
+ path: '../..'
+
+build:
+ number: 0
+ include_recipe: False
+ missing_dso_whitelist:
+ - "*libcuda.*" # [linux]
+
+requirements:
+ build:
+ # The anaconda compilers for OS X are old an annoying
+ # so we rely on the platform ones for now
+ - {{ compiler('cxx') }} # [not osx]
+ - cmake
+ - make # [not win]
+ host:
+ - zlib
+ - llvmdev ==10.0.0
+
+outputs:
+ - name: {{ pkg_name }}-libs
+ script: install_libtvm.bat # [win]
+ script: install_libtvm.sh # [not win]
+ string: {{ build_tag }}
+ requirements:
+ build:
+ - {{ compiler('cxx') }}
+ - cmake
+ - git
+ - make # [not win]
+ host:
+ - zlib
+ - llvmdev ==10.0.0
+ - {{ pin_compatible('cudatoolkit', lower_bound=cuda_version, max_pin='x.x') }} # [cuda]
+ - {{ pin_compatible('cudnn', lower_bound='7.6.0', max_pin='x') }} # [cuda]
+ run:
+ - llvmdev ==10.0.0
+ - {{ pin_compatible('cudatoolkit', lower_bound=cuda_version, max_pin='x.x') }} # [cuda]
+ - {{ pin_compatible('cudnn', lower_bound='7.6.0', max_pin='x') }} # [cuda]
+
+ - name: {{ pkg_name }}
+ script: install_tvm_python.sh # [not win]
+ script: install_tvm_python.bat # [win]
+ string: {{ build_tag }}_py{{ PY_VER | replace('.', '')}}
+ # skip bytecompile pyc to speedup CI speed
+ skip_compile_pyc:
+ - "*/**/*.py"
+ requirements:
+ host:
+ - python
+ - setuptools
+ - cython
+ - {{ pin_subpackage(pkg_name + '-libs', exact=True) }}
+ run:
+ - python
+ - decorator
+ - psutil
+ - scipy
+ - {{ pin_compatible('numpy') }}
+ - {{ pin_subpackage(pkg_name + '-libs', exact=True) }}
+
+about:
+ home: https://tvm.apache.org
+ license: Apache2
+ summary: An End to End Deep Learning Compiler Stack for CPUs, GPUs and accelerators.
diff --git a/conda/render_cuda.py b/conda/render_cuda_dockerfiles.py
similarity index 98%
rename from conda/render_cuda.py
rename to conda/render_cuda_dockerfiles.py
index efd616946314..d9d32f05fb5e 100644
--- a/conda/render_cuda.py
+++ b/conda/render_cuda_dockerfiles.py
@@ -48,7 +48,7 @@ def render_dockerfile(version):
)
fname = os.path.join(condadir, "../docker/Dockerfile.conda_cuda" + version.replace(".", ""))
with open(fname, "w") as f:
- f.write(txt)
+ f.write(txt + "\n")
return fname
diff --git a/conda/tvm-libs/meta.yaml b/conda/tvm-libs/meta.yaml
deleted file mode 100644
index f151048e445b..000000000000
--- a/conda/tvm-libs/meta.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-{% set version = "0.8.dev0" %}
-
-package:
- name: tvm-libs
- version: {{ version }}
-
-source:
- path: ../..
-
-build:
- number: 0
- string: cuda{{ cuda_version | replace('.', '') }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }} # [cuda]
-
-requirements:
- build:
- # The anaconda compilers for OS X are old an annoying
- # so we rely on the platform ones for now
- - {{ compiler('cxx') }} # [linux]
- - cmake
- - make
- host:
- - llvmdev ==8.0.0
- - zlib # [linux]
- run:
- - {{ pin_compatible('cudatoolkit', lower_bound=cuda_version, max_pin='x.x') }} # [cuda]
- - {{ pin_compatible('cudnn', lower_bound='7.6.0', max_pin='x') }} # [cuda]
-
-about:
- home: https://github.com/apache/incubator-tvm
- license: Apache2
- summary: a low level domain specific language for compiling tensor computation pipelines
\ No newline at end of file
diff --git a/dmlc_tvm_commit_id.txt b/dmlc_tvm_commit_id.txt
index 3476b5d864e2..7b294e50bf70 100644
--- a/dmlc_tvm_commit_id.txt
+++ b/dmlc_tvm_commit_id.txt
@@ -1 +1 @@
-7d805b54d6adda82636d13bf7c46a2e9a933da5f
\ No newline at end of file
+9554e645922357af1d11679a102f3763b80b740f
\ No newline at end of file
diff --git a/docker/Dockerfile.ci_arm b/docker/Dockerfile.ci_arm
new file mode 100644
index 000000000000..f5b2c2af0fbf
--- /dev/null
+++ b/docker/Dockerfile.ci_arm
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# CI docker arm env
+# tag: v0.10
+
+FROM ubuntu:18.04
+
+RUN apt-get update --fix-missing
+RUN apt-get install -y ca-certificates gnupg2
+
+COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh
+RUN bash /install/ubuntu_install_core.sh
+
+COPY install/ubuntu_install_llvm.sh /install/ubuntu_install_llvm.sh
+RUN bash /install/ubuntu_install_llvm.sh
+
+COPY install/ubuntu1804_install_python.sh /install/ubuntu1804_install_python.sh
+RUN bash /install/ubuntu1804_install_python.sh
+
+COPY install/ubuntu_install_cmake_source.sh /install/ubuntu_install_cmake_source.sh
+RUN bash /install/ubuntu_install_cmake_source.sh
+
+COPY install/ubuntu_install_python_package.sh /install/ubuntu_install_python_package.sh
+RUN bash /install/ubuntu_install_python_package.sh
+
+# AutoTVM deps
+COPY install/ubuntu_install_redis.sh /install/ubuntu_install_redis.sh
+RUN bash /install/ubuntu_install_redis.sh
diff --git a/docker/Dockerfile.ci_cpu b/docker/Dockerfile.ci_cpu
index 4823488a731a..a3805660b2b1 100644
--- a/docker/Dockerfile.ci_cpu
+++ b/docker/Dockerfile.ci_cpu
@@ -36,6 +36,10 @@ RUN bash /install/ubuntu1804_install_llvm.sh
COPY install/ubuntu_install_dnnl.sh /install/ubuntu_install_dnnl.sh
RUN bash /install/ubuntu_install_dnnl.sh
+# Install MxNet for access to Gluon Model Zoo.
+COPY install/ubuntu_install_mxnet.sh /install/ubuntu_install_mxnet.sh
+RUN bash /install/ubuntu_install_mxnet.sh
+
# Rust env (build early; takes a while)
COPY install/ubuntu_install_rust.sh /install/ubuntu_install_rust.sh
RUN bash /install/ubuntu_install_rust.sh
@@ -60,9 +64,17 @@ ENV PATH $PATH:$CARGO_HOME/bin:/usr/lib/go-1.10/bin
COPY install/ubuntu_install_java.sh /install/ubuntu_install_java.sh
RUN bash /install/ubuntu_install_java.sh
+# BYODT deps
+COPY install/ubuntu_install_universal.sh /install/ubuntu_install_universal.sh
+RUN bash /install/ubuntu_install_universal.sh
+
# Chisel deps for TSIM
-COPY install/ubuntu_install_chisel.sh /install/ubuntu_install_chisel.sh
-RUN bash /install/ubuntu_install_chisel.sh
+COPY install/ubuntu_install_sbt.sh /install/ubuntu_install_sbt.sh
+RUN bash /install/ubuntu_install_sbt.sh
+
+# Verilator deps
+COPY install/ubuntu_install_verilator.sh /install/ubuntu_install_verilator.sh
+RUN bash /install/ubuntu_install_verilator.sh
# TFLite deps
COPY install/ubuntu_install_tflite.sh /install/ubuntu_install_tflite.sh
diff --git a/docker/Dockerfile.ci_gpu b/docker/Dockerfile.ci_gpu
index 1197d8e4c7b6..ac76af6b0a1e 100644
--- a/docker/Dockerfile.ci_gpu
+++ b/docker/Dockerfile.ci_gpu
@@ -83,6 +83,13 @@ RUN bash /install/ubuntu_install_dgl.sh
COPY install/ubuntu_install_vulkan.sh /install/ubuntu_install_vulkan.sh
RUN bash /install/ubuntu_install_vulkan.sh
+# Rust env (build early; takes a while)
+COPY install/ubuntu_install_rust.sh /install/ubuntu_install_rust.sh
+RUN bash /install/ubuntu_install_rust.sh
+ENV RUSTUP_HOME /opt/rust
+ENV CARGO_HOME /opt/rust
+ENV PATH $PATH:$CARGO_HOME/bin
+
# AutoTVM deps
COPY install/ubuntu_install_redis.sh /install/ubuntu_install_redis.sh
RUN bash /install/ubuntu_install_redis.sh
@@ -91,6 +98,10 @@ RUN bash /install/ubuntu_install_redis.sh
COPY install/ubuntu_install_nnpack.sh /install/ubuntu_install_nnpack.sh
RUN bash /install/ubuntu_install_nnpack.sh
+# BYODT deps
+COPY install/ubuntu_install_universal.sh /install/ubuntu_install_universal.sh
+RUN bash /install/ubuntu_install_universal.sh
+
# Environment variables
ENV PATH=/usr/local/nvidia/bin:${PATH}
ENV PATH=/usr/local/cuda/bin:${PATH}
diff --git a/docker/Dockerfile.ci_i386 b/docker/Dockerfile.ci_i386
index a7d8308d4810..2cdf10c4369e 100644
--- a/docker/Dockerfile.ci_i386
+++ b/docker/Dockerfile.ci_i386
@@ -21,6 +21,7 @@
FROM ioft/i386-ubuntu:16.04
RUN apt-get update --fix-missing
+RUN apt-get install -y ca-certificates
COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh
RUN bash /install/ubuntu_install_core.sh
@@ -31,6 +32,9 @@ RUN bash /install/ubuntu_install_llvm.sh
COPY install/ubuntu_install_python.sh /install/ubuntu_install_python.sh
RUN bash /install/ubuntu_install_python.sh
+COPY install/ubuntu_install_cmake_source.sh /install/ubuntu_install_cmake_source.sh
+RUN bash /install/ubuntu_install_cmake_source.sh
+
COPY install/ubuntu_install_python_package.sh /install/ubuntu_install_python_package.sh
RUN bash /install/ubuntu_install_python_package.sh
@@ -39,5 +43,9 @@ COPY install/ubuntu_install_redis.sh /install/ubuntu_install_redis.sh
RUN bash /install/ubuntu_install_redis.sh
# Chisel deps for TSIM
-COPY install/ubuntu_install_chisel.sh /install/ubuntu_install_chisel.sh
-RUN bash /install/ubuntu_install_chisel.sh
+COPY install/ubuntu_install_sbt.sh /install/ubuntu_install_sbt.sh
+RUN bash /install/ubuntu_install_sbt.sh
+
+# Verilator deps
+COPY install/ubuntu_install_verilator.sh /install/ubuntu_install_verilator.sh
+RUN bash /install/ubuntu_install_verilator.sh
diff --git a/docker/Dockerfile.conda_cpu b/docker/Dockerfile.conda_cpu
index 4e0c35a26e55..d2779afbdaf3 100644
--- a/docker/Dockerfile.conda_cpu
+++ b/docker/Dockerfile.conda_cpu
@@ -17,25 +17,12 @@
FROM ubuntu:16.04
-RUN apt-get update && apt-get install -y bzip2 curl sudo binutils && rm -rf /var/lib/apt/lists/*
+RUN apt-get update --fix-missing && apt-get install -y bzip2 wget sudo binutils git
-RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
- chmod +x ~/miniconda.sh && \
- ~/miniconda.sh -b -p /opt/conda && \
- rm ~/miniconda.sh && \
- /opt/conda/bin/conda upgrade --all && \
- /opt/conda/bin/conda install conda-build conda-verify && \
- /opt/conda/bin/conda clean -ya
-
-# Cache some of the packages for the builds
-RUN /opt/conda/bin/conda install --download-only cmake make zlib && \
- /opt/conda/bin/conda install --download-only -c numba llvmdev=8.0.0 && \
- /opt/conda/bin/conda create -n py35 --download-only pytest scipy numpy=1.11 cython decorator python=3.5 && \
- /opt/conda/bin/conda create -n py36 --download-only pytest scipy numpy=1.11 cython decorator python=3.6 && \
- /opt/conda/bin/conda create -n py37 --download-only pytest scipy numpy=1.11 cython decorator python=3.7
+COPY install/ubuntu_install_conda.sh /install/ubuntu_install_conda.sh
+RUN bash /install/ubuntu_install_conda.sh
ENV PATH /opt/conda/bin:$PATH
ENV CONDA_BLD_PATH /tmp
-
-WORKDIR /workspace
-RUN chmod -R a+w /workspace
+ENV CONDA_PKGS_DIRS /workspace/.conda/pkgs
+ENV CONDA_ENVS_DIRS /workspace/.conda/env
diff --git a/docker/Dockerfile.conda_cuda100 b/docker/Dockerfile.conda_cuda100
index d6e1cddbfd37..7705c8548b52 100644
--- a/docker/Dockerfile.conda_cuda100
+++ b/docker/Dockerfile.conda_cuda100
@@ -17,30 +17,16 @@
FROM nvidia/cuda:10.0-devel-ubuntu16.04
-RUN apt-get update && apt-get install -y --no-install-recommends \
- bzip2 curl sudo binutils && \
- rm -rf /var/lib/apt/lists/*
+RUN apt-get update --fix-missing && apt-get install -y bzip2 wget sudo binutils git
-RUN curl -fsSL http://developer.download.nvidia.com/compute/redist/cudnn/v7.6.0/cudnn-10.0-linux-x64-v7.6.0.64.tgz -O && \
+RUN wget -q http://developer.download.nvidia.com/compute/redist/cudnn/v7.6.0/cudnn-10.0-linux-x64-v7.6.0.64.tgz && \
tar --no-same-owner -xzf cudnn-10.0-linux-x64-v7.6.0.64.tgz -C /usr/local && \
rm cudnn-10.0-linux-x64-v7.6.0.64.tgz && \
ldconfig
-
-RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
- chmod +x ~/miniconda.sh && \
- ~/miniconda.sh -b -p /opt/conda && \
- rm ~/miniconda.sh && \
- /opt/conda/bin/conda upgrade --all && \
- /opt/conda/bin/conda install conda-build conda-verify && \
- /opt/conda/bin/conda clean -ya
-
-RUN /opt/conda/bin/conda install --download-only cmake make zlib
-RUN /opt/conda/bin/conda install --download-only -c numba llvmdev=8.0.0
+COPY install/ubuntu_install_conda.sh /install/ubuntu_install_conda.sh
+RUN bash /install/ubuntu_install_conda.sh
ENV PATH /opt/conda/bin:$PATH
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
ENV CONDA_BLD_PATH /tmp
-
-WORKDIR /workspace
-RUN chmod -R a+w /workspace
\ No newline at end of file
diff --git a/docker/Dockerfile.conda_cuda90 b/docker/Dockerfile.conda_cuda90
index f55aa1bf2e12..372167438141 100644
--- a/docker/Dockerfile.conda_cuda90
+++ b/docker/Dockerfile.conda_cuda90
@@ -17,30 +17,16 @@
FROM nvidia/cuda:9.0-devel-ubuntu16.04
-RUN apt-get update && apt-get install -y --no-install-recommends \
- bzip2 curl sudo binutils && \
- rm -rf /var/lib/apt/lists/*
+RUN apt-get update --fix-missing && apt-get install -y bzip2 wget sudo binutils git
-RUN curl -fsSL http://developer.download.nvidia.com/compute/redist/cudnn/v7.6.0/cudnn-9.0-linux-x64-v7.6.0.64.tgz -O && \
+RUN wget -q http://developer.download.nvidia.com/compute/redist/cudnn/v7.6.0/cudnn-9.0-linux-x64-v7.6.0.64.tgz && \
tar --no-same-owner -xzf cudnn-9.0-linux-x64-v7.6.0.64.tgz -C /usr/local && \
rm cudnn-9.0-linux-x64-v7.6.0.64.tgz && \
ldconfig
-
-RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
- chmod +x ~/miniconda.sh && \
- ~/miniconda.sh -b -p /opt/conda && \
- rm ~/miniconda.sh && \
- /opt/conda/bin/conda upgrade --all && \
- /opt/conda/bin/conda install conda-build conda-verify && \
- /opt/conda/bin/conda clean -ya
-
-RUN /opt/conda/bin/conda install --download-only cmake make zlib
-RUN /opt/conda/bin/conda install --download-only -c numba llvmdev=8.0.0
+COPY install/ubuntu_install_conda.sh /install/ubuntu_install_conda.sh
+RUN bash /install/ubuntu_install_conda.sh
ENV PATH /opt/conda/bin:$PATH
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
ENV CONDA_BLD_PATH /tmp
-
-WORKDIR /workspace
-RUN chmod -R a+w /workspace
\ No newline at end of file
diff --git a/docker/Dockerfile.demo_android b/docker/Dockerfile.demo_android
index cf13daa9734e..039439a937e9 100644
--- a/docker/Dockerfile.demo_android
+++ b/docker/Dockerfile.demo_android
@@ -53,7 +53,7 @@ RUN git clone https://github.com/KhronosGroup/OpenCL-Headers /usr/local/OpenCL-H
# Build TVM
RUN cd /usr && \
- git clone --depth=1 https://github.com/apache/incubator-tvm tvm --recursive && \
+ git clone --depth=1 https://github.com/apache/tvm tvm --recursive && \
cd /usr/tvm && \
mkdir -p build && \
cd build && \
diff --git a/docker/Dockerfile.demo_opencl b/docker/Dockerfile.demo_opencl
index e39ee4128c96..2f534d8b5b5c 100644
--- a/docker/Dockerfile.demo_opencl
+++ b/docker/Dockerfile.demo_opencl
@@ -62,7 +62,7 @@ RUN echo "Cloning TVM source & submodules"
ENV TVM_PAR_DIR="/usr"
RUN mkdir -p TVM_PAR_DIR && \
cd ${TVM_PAR_DIR} && \
- git clone --depth=1 https://github.com/apache/incubator-tvm tvm --recursive
+ git clone --depth=1 https://github.com/apache/tvm tvm --recursive
#RUN git submodule update --init --recursive
diff --git a/docker/README.md b/docker/README.md
index dffaf3a5ba4f..ae972f954668 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -52,7 +52,7 @@ Then inside the docker container, you can type the following command to start th
jupyter notebook
```
-You can find some un-official prebuilt images in https://hub.docker.com/r/tvmai/ .
+You can find some un-official prebuilt images in https://hub.docker.com/r/tlcpack/ .
Note that these are convenience images and are not part of the ASF release.
diff --git a/docker/bash.sh b/docker/bash.sh
index d2424f170219..7420e6f9024c 100755
--- a/docker/bash.sh
+++ b/docker/bash.sh
@@ -70,7 +70,7 @@ else
CUDA_ENV=""
fi
-if [[ "${DOCKER_IMAGE_NAME}" == *"gpu"* ]]; then
+if [[ "${DOCKER_IMAGE_NAME}" == *"gpu"* || "${DOCKER_IMAGE_NAME}" == *"cuda"* ]]; then
if ! type "nvidia-docker" 1> /dev/null 2> /dev/null
then
DOCKER_BINARY="docker"
@@ -83,9 +83,9 @@ else
fi
if [[ "${DOCKER_IMAGE_NAME}" == *"ci"* ]]; then
- CI_PY_ENV="-e PYTHONPATH=/workspace/python"
+ CI_ADDON_ENV="-e PYTHONPATH=/workspace/python"
else
- CI_PY_ENV=""
+ CI_ADDON_ENV=""
fi
# If the Vitis-AI docker image is selected, expose the Xilinx FPGA devices and required volumes containing e.g. DSA's and overlays
@@ -143,7 +143,8 @@ ${DOCKER_BINARY} run --rm --pid=host\
-e "CI_BUILD_GROUP=$(id -g -n)" \
-e "CI_BUILD_GID=$(id -g)" \
-e "CI_PYTEST_ADD_OPTIONS=$CI_PYTEST_ADD_OPTIONS" \
- ${CI_PY_ENV} \
+ -e "CI_IMAGE_NAME=${DOCKER_IMAGE_NAME}" \
+ ${CI_ADDON_ENV} \
${CUDA_ENV} \
"${CI_DOCKER_EXTRA_PARAMS[@]}" \
${DOCKER_IMAGE_NAME} \
diff --git a/docker/build.sh b/docker/build.sh
index 43f0a08700a4..bd13937b2571 100755
--- a/docker/build.sh
+++ b/docker/build.sh
@@ -91,7 +91,7 @@ if [ "$#" -lt 1 ] || [ ! -e "${SCRIPT_DIR}/Dockerfile.${CONTAINER_TYPE}" ]; then
fi
# Use nvidia-docker if the container is GPU.
-if [[ "${CONTAINER_TYPE}" == *"gpu"* ]]; then
+if [[ "${CONTAINER_TYPE}" == *"gpu"* || "${CONTAINER_TYPE}" == *"cuda"* ]]; then
if ! type "nvidia-docker" 1> /dev/null 2> /dev/null
then
DOCKER_BINARY="docker"
@@ -164,6 +164,7 @@ ${DOCKER_BINARY} run --rm --pid=host \
-e "CI_BUILD_GROUP=$(id -g -n)" \
-e "CI_BUILD_GID=$(id -g)" \
-e "CI_PYTEST_ADD_OPTIONS=$CI_PYTEST_ADD_OPTIONS" \
+ -e "CI_IMAGE_NAME=${DOCKER_IMAGE_NAME}" \
${CUDA_ENV}\
${CI_DOCKER_EXTRA_PARAMS[@]} \
${DOCKER_IMG_NAME} \
diff --git a/docker/install/install_tvm_cpu.sh b/docker/install/install_tvm_cpu.sh
index b11c9791fb2d..c3a15fa26b6d 100755
--- a/docker/install/install_tvm_cpu.sh
+++ b/docker/install/install_tvm_cpu.sh
@@ -21,7 +21,7 @@ set -u
set -o pipefail
cd /usr
-git clone https://github.com/apache/incubator-tvm tvm --recursive
+git clone https://github.com/apache/tvm tvm --recursive
cd /usr/tvm
# checkout a hash-tag
git checkout 4b13bf668edc7099b38d463e5db94ebc96c80470
diff --git a/docker/install/install_tvm_gpu.sh b/docker/install/install_tvm_gpu.sh
index 2dbf8e17398d..fe2214da8409 100755
--- a/docker/install/install_tvm_gpu.sh
+++ b/docker/install/install_tvm_gpu.sh
@@ -21,7 +21,7 @@ set -u
set -o pipefail
cd /usr
-git clone https://github.com/apache/incubator-tvm tvm --recursive
+git clone https://github.com/apache/tvm tvm --recursive
cd /usr/tvm
# checkout a hash-tag
git checkout 4b13bf668edc7099b38d463e5db94ebc96c80470
diff --git a/docker/install/ubuntu_install_cmake_source.sh b/docker/install/ubuntu_install_cmake_source.sh
new file mode 100644
index 000000000000..f818fba9721b
--- /dev/null
+++ b/docker/install/ubuntu_install_cmake_source.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -u
+set -o pipefail
+
+v=3.13
+version=3.13.5
+wget https://cmake.org/files/v${v}/cmake-${version}.tar.gz
+tar xvf cmake-${version}.tar.gz
+cd cmake-${version}
+./bootstrap
+make -j$(nproc)
+make install
+cd ..
+rm -rf cmake-${version} cmake-${version}.tar.gz
diff --git a/docker/install/ubuntu_install_conda.sh b/docker/install/ubuntu_install_conda.sh
new file mode 100755
index 000000000000..6f6019340293
--- /dev/null
+++ b/docker/install/ubuntu_install_conda.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -u
+set -o pipefail
+
+cd /tmp && wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+chmod +x Miniconda3-latest-Linux-x86_64.sh
+/tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda
+rm /tmp/Miniconda3-latest-Linux-x86_64.sh
+/opt/conda/bin/conda upgrade --all
+/opt/conda/bin/conda clean -ya
+/opt/conda/bin/conda install conda-build conda-verify
+chmod -R a+w /opt/conda/
diff --git a/docker/install/ubuntu_install_darknet.sh b/docker/install/ubuntu_install_darknet.sh
index c48724c6065b..37adf4a30270 100755
--- a/docker/install/ubuntu_install_darknet.sh
+++ b/docker/install/ubuntu_install_darknet.sh
@@ -6,9 +6,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -23,7 +23,4 @@ set -o pipefail
#install the necessary dependancies, cffi, opencv
wget -q 'https://github.com/siju-samuel/darknet/blob/master/lib/libdarknet.so?raw=true' -O libdarknet.so
debian_version=`cat /etc/debian_version`
-if [ "$debian_version" == "stretch/sid" ]; then
- pip2 install opencv-python cffi
-fi
pip3 install opencv-python cffi
diff --git a/docker/install/ubuntu_install_dgl.sh b/docker/install/ubuntu_install_dgl.sh
old mode 100644
new mode 100755
diff --git a/docker/install/ubuntu_install_emscripten.sh b/docker/install/ubuntu_install_emscripten.sh
index 2e48cccbe2a6..fa44e1c70f1d 100755
--- a/docker/install/ubuntu_install_emscripten.sh
+++ b/docker/install/ubuntu_install_emscripten.sh
@@ -23,5 +23,5 @@ set -o pipefail
cd /
git clone https://github.com/emscripten-core/emsdk.git
cd emsdk
-./emsdk install latest
-./emsdk activate latest
+./emsdk install 2.0.7
+./emsdk activate 2.0.7
diff --git a/docker/install/ubuntu_install_onnx.sh b/docker/install/ubuntu_install_onnx.sh
index 2ad601983fa2..a92a0244d707 100755
--- a/docker/install/ubuntu_install_onnx.sh
+++ b/docker/install/ubuntu_install_onnx.sh
@@ -28,4 +28,4 @@ pip3 install onnxruntime==1.0.0
# not expose that in the wheel!!!
pip3 install future
-pip3 install torch==1.4.0 torchvision==0.5.0
+pip3 install torch==1.7.0 torchvision==0.8.1
diff --git a/docker/install/ubuntu_install_python_package.sh b/docker/install/ubuntu_install_python_package.sh
index 2b8df74dab7b..7989a49a4826 100755
--- a/docker/install/ubuntu_install_python_package.sh
+++ b/docker/install/ubuntu_install_python_package.sh
@@ -21,4 +21,4 @@ set -u
set -o pipefail
# install libraries for python package on ubuntu
-pip3 install six numpy pytest cython decorator scipy tornado typed_ast pytest mypy orderedset attrs requests Pillow packaging
+pip3 install six numpy pytest cython decorator scipy tornado pytest pytest-xdist pytest-profiling mypy orderedset attrs requests Pillow packaging cloudpickle synr
diff --git a/docker/install/ubuntu_install_rust.sh b/docker/install/ubuntu_install_rust.sh
index 310e6507e3f3..5716b11db6c4 100755
--- a/docker/install/ubuntu_install_rust.sh
+++ b/docker/install/ubuntu_install_rust.sh
@@ -26,10 +26,11 @@ export RUSTUP_HOME=/opt/rust
export CARGO_HOME=/opt/rust
# this rustc is one supported by the installed version of rust-sgx-sdk
curl -s -S -L https://sh.rustup.rs -sSf | sh -s -- -y --no-modify-path --default-toolchain stable
-. $CARGO_HOME/env
+export PATH=$CARGO_HOME/bin:$PATH
rustup component add rustfmt
# install wasmtime
+apt-get install -y --no-install-recommends libc6-dev-i386
export WASMTIME_HOME=/opt/wasmtime
curl https://wasmtime.dev/install.sh -sSf | bash
export PATH="${WASMTIME_HOME}/bin:${PATH}"
diff --git a/docker/install/ubuntu_install_chisel.sh b/docker/install/ubuntu_install_sbt.sh
similarity index 80%
rename from docker/install/ubuntu_install_chisel.sh
rename to docker/install/ubuntu_install_sbt.sh
index d6776634ffe0..b02186e3263a 100755
--- a/docker/install/ubuntu_install_chisel.sh
+++ b/docker/install/ubuntu_install_sbt.sh
@@ -22,20 +22,12 @@ set -o pipefail
# The https:// source added below required an apt https transport
# support.
-apt-get update && apt-get install -y apt-transport-https flex bison
+apt-get update && apt-get install -y apt-transport-https
-# Install the necessary dependencies for Chisel
+# Install the necessary dependencies for sbt
echo "deb https://dl.bintray.com/sbt/debian /" | tee -a /etc/apt/sources.list.d/sbt.list
apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823
# Note: The settings in vta/hardware/chisel/project/build.properties
# file determines required sbt version.
apt-get update && apt-get install -y sbt=1.1.1
-
-# Install the Verilator with major version 4.0
-wget https://www.veripool.org/ftp/verilator-4.010.tgz
-tar xf verilator-4.010.tgz
-cd verilator-4.010/
-./configure
-make -j4
-make install
diff --git a/docker/install/ubuntu_install_sphinx.sh b/docker/install/ubuntu_install_sphinx.sh
index 2555164e2292..33757a0d4d57 100755
--- a/docker/install/ubuntu_install_sphinx.sh
+++ b/docker/install/ubuntu_install_sphinx.sh
@@ -20,4 +20,4 @@ set -e
set -u
set -o pipefail
-pip3 install sphinx sphinx-gallery==0.4.0 autodocsumm sphinx_rtd_theme sphinx_autodoc_annotation matplotlib Image commonmark>=0.7.3 docutils>=0.11
+pip3 install sphinx sphinx-gallery==0.4.0 autodocsumm sphinx_rtd_theme sphinx_autodoc_annotation matplotlib Image "commonmark>=0.7.3" "docutils>=0.11"
diff --git a/docker/install/ubuntu_install_tensorflow.sh b/docker/install/ubuntu_install_tensorflow.sh
index 25543909d78b..286a086abd82 100755
--- a/docker/install/ubuntu_install_tensorflow.sh
+++ b/docker/install/ubuntu_install_tensorflow.sh
@@ -20,4 +20,7 @@ set -e
set -u
set -o pipefail
-pip3 install tensorflow==2.1.0 keras==2.3.1 h5py
+# h5py is pinned to minor than 3 due to issues with
+# tensorflow:
+# https://github.com/tensorflow/tensorflow/issues/44467
+pip3 install tensorflow==2.3.1 keras==2.4.3 "h5py<3.0"
diff --git a/docker/install/ubuntu_install_tflite.sh b/docker/install/ubuntu_install_tflite.sh
index 123ff520d725..2dfbb0681a80 100755
--- a/docker/install/ubuntu_install_tflite.sh
+++ b/docker/install/ubuntu_install_tflite.sh
@@ -33,14 +33,14 @@ pip3 install flatbuffers
# Build the TFLite static library, necessary for building with TFLite ON.
# The library is built at:
# tensorflow/tensorflow/lite/tools/make/gen/*/lib/libtensorflow-lite.a.
-git clone https://github.com/tensorflow/tensorflow --branch=r2.1
+git clone https://github.com/tensorflow/tensorflow --branch=r2.3
./tensorflow/tensorflow/lite/tools/make/download_dependencies.sh
./tensorflow/tensorflow/lite/tools/make/build_lib.sh
# Setup tflite from schema
mkdir tflite
cd tflite
-wget -q https://raw.githubusercontent.com/tensorflow/tensorflow/r2.1/tensorflow/lite/schema/schema.fbs
+wget -q https://raw.githubusercontent.com/tensorflow/tensorflow/r2.3/tensorflow/lite/schema/schema.fbs
flatc --python schema.fbs
cat <setup.py
@@ -48,7 +48,7 @@ import setuptools
setuptools.setup(
name="tflite",
- version="2.1.0",
+ version="2.3.1",
author="google",
author_email="google@google.com",
description="TFLite",
diff --git a/docker/install/ubuntu_install_universal.sh b/docker/install/ubuntu_install_universal.sh
new file mode 100644
index 000000000000..a054aafdd5f7
--- /dev/null
+++ b/docker/install/ubuntu_install_universal.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -u
+set -o pipefail
+
+git clone https://github.com/stillwater-sc/universal.git /opt/universal
+
+# Use specific versioning tag.
+(cd /opt/universal && git checkout e32899d551b53d758865fabd5fdd69eed35bfb0f)
\ No newline at end of file
diff --git a/docker/install/ubuntu_install_verilator.sh b/docker/install/ubuntu_install_verilator.sh
new file mode 100644
index 000000000000..1c5193c053c1
--- /dev/null
+++ b/docker/install/ubuntu_install_verilator.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -u
+set -o pipefail
+
+# Verilator version
+version="4.104"
+
+# Install dependencies
+apt-get update && apt-get install -y autoconf g++ flex bison
+
+# Install Verilator
+wget "https://github.com/verilator/verilator/archive/v$version.tar.gz"
+tar xf "v$version.tar.gz"
+cd "verilator-$version"
+autoconf
+./configure
+make -j4
+make install
diff --git a/docker/install/ubuntu_install_vitis_ai_packages_ci.sh b/docker/install/ubuntu_install_vitis_ai_packages_ci.sh
index d4077bc67b44..c34ed3addce2 100644
--- a/docker/install/ubuntu_install_vitis_ai_packages_ci.sh
+++ b/docker/install/ubuntu_install_vitis_ai_packages_ci.sh
@@ -25,5 +25,5 @@ mkdir "$PYXIR_HOME"
pip3 install progressbar
-git clone --recursive --branch v0.1.2 https://github.com/Xilinx/pyxir.git "${PYXIR_HOME}"
+git clone --recursive --branch v0.1.3 https://github.com/Xilinx/pyxir.git "${PYXIR_HOME}"
cd "${PYXIR_HOME}" && python3 setup.py install
diff --git a/docker/with_the_same_user b/docker/with_the_same_user
index 2bcbb6f49201..459978409be5 100644
--- a/docker/with_the_same_user
+++ b/docker/with_the_same_user
@@ -56,5 +56,6 @@ PATH=${PATH} \
JAVA_HOME=${JAVA_HOME} \
LD_LIBRARY_PATH=${LD_LIBRARY_PATH} \
PYTHONPATH=${PYTHONPATH} \
+CI_IMAGE_NAME=${CI_IMAGE_NAME} \
HOME=${CI_BUILD_HOME} \
"${COMMAND[@]}"
diff --git a/docs/README.txt b/docs/README.txt
index 09c8e9b7e557..e409107b78a6 100644
--- a/docs/README.txt
+++ b/docs/README.txt
@@ -3,7 +3,7 @@ TVM Documentations
This folder contains the source of TVM documents
- A hosted version of doc is at https://tvm.apache.org/docs
-- pip install sphinx>=1.5.5 sphinx-gallery sphinx_rtd_theme matplotlib Image recommonmark "Pillow<7" autodocsumm
+- pip install sphinx>=1.5.5 sphinx-gallery sphinx_rtd_theme matplotlib Image recommonmark "Pillow<7" autodocsumm tlcpack-sphinx-addon
- Build tvm first in the root folder.
- Run the following command
```bash
@@ -51,3 +51,8 @@ You will need a gpu CI environment.
```bash
./tests/scripts/task_python_docs.sh
```
+
+Define the Order of Tutorials
+-----------------------------
+You can define the order of tutorials with `conf.py::subsection_order` and `conf.py::within_subsection_order`.
+By default, the tutorials within one subsection is sorted by filename.
\ No newline at end of file
diff --git a/docs/api/python/contrib.rst b/docs/api/python/contrib.rst
index 8ac4e1ff7d3a..0eb3024c2d08 100644
--- a/docs/api/python/contrib.rst
+++ b/docs/api/python/contrib.rst
@@ -122,9 +122,9 @@ tvm.contrib.tar
:members:
-tvm.contrib.util
-~~~~~~~~~~~~~~~~
-.. automodule:: tvm.contrib.util
+tvm.contrib.utils
+~~~~~~~~~~~~~~~~~
+.. automodule:: tvm.contrib.utils
:members:
diff --git a/docs/conf.py b/docs/conf.py
index 259d9c3fa0e2..a7198bf22355 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -48,7 +48,7 @@
project = "tvm"
author = "Apache Software Foundation"
copyright = "2020, %s" % author
-github_doc_root = "https://github.com/apache/incubator-tvm/tree/main/docs/"
+github_doc_root = "https://github.com/apache/tvm/tree/main/docs/"
os.environ["TVM_BUILD_DOC"] = "1"
# Version information.
@@ -204,6 +204,75 @@
]
)
+# Explicitly define the order within a subsection.
+# The listed files are sorted according to the list.
+# The unlisted files are sorted by filenames.
+# The unlisted files always appear after listed files.
+within_subsection_order = {
+ "get_started": [
+ "relay_quick_start.py",
+ "tensor_expr_get_started.py",
+ "tvmc_command_line_driver.py",
+ "cross_compilation_and_rpc.py",
+ ],
+ "frontend": [
+ "from_pytorch.py",
+ "from_tensorflow.py",
+ "from_mxnet.py",
+ "from_onnx.py",
+ "from_keras.py",
+ "from_tflite.py",
+ "from_coreml.py",
+ "from_darknet.py",
+ "from_caffe2.py",
+ ],
+ "language": [
+ "schedule_primitives.py",
+ "reduciton.py",
+ "intrin_math.py",
+ "scan.py",
+ "extern_op.py",
+ "tensorize.py",
+ "tuple_inputs.py",
+ "tedd.py",
+ ],
+ "optimize": [
+ "opt_gemm.py",
+ "opt_conv_cuda.py",
+ "opt_conv_tensorcore.py",
+ "opt_matmul_auto_tensorcore.py",
+ ],
+ "autotvm": [
+ "tune_simple_template.py",
+ "tune_conv2d_cuda.py",
+ "tune_relay_cuda.py",
+ "tune_relay_x86.py",
+ "tune_relay_arm.py",
+ "tune_relay_mobile_gpu.py",
+ ],
+ "auto_scheduler": ["tune_matmul_x86.py", "tune_conv2d_layer_cuda.py"],
+ "dev": ["low_level_custom_pass.py", "use_pass_infra.py", "bring_your_own_datatypes.py"],
+}
+
+
+class WithinSubsectionOrder:
+ def __init__(self, src_dir):
+ self.src_dir = src_dir.split("/")[-1]
+
+ def __call__(self, filename):
+ # If the order is provided, use the provided order
+ if (
+ self.src_dir in within_subsection_order
+ and filename in within_subsection_order[self.src_dir]
+ ):
+ index = within_subsection_order[self.src_dir].index(filename)
+ assert index < 1e10
+ return "\0%010d" % index
+
+ # Otherwise, sort by filename
+ return filename
+
+
sphinx_gallery_conf = {
"backreferences_dir": "gen_modules/backreferences",
"doc_module": ("tvm", "numpy"),
@@ -213,6 +282,7 @@
"numpy": "https://numpy.org/doc/stable",
},
"examples_dirs": examples_dirs,
+ "within_subsection_order": WithinSubsectionOrder,
"gallery_dirs": gallery_dirs,
"subsection_order": subsection_order,
"filename_pattern": os.environ.get("TVM_TUTORIAL_EXEC_PATTERN", ".py"),
@@ -234,6 +304,57 @@
"tvm.relay": ["tvm.ir", "tvm.tir"],
}
+## Setup header and other configs
+import tlcpack_sphinx_addon
+
+footer_copyright = "© 2020 Apache Software Foundation | All right reserved"
+footer_note = " ".join(
+ """
+Copyright © 2020 The Apache Software Foundation. Apache TVM, Apache, the Apache feather,
+and the Apache TVM project logo are either trademarks or registered trademarks of
+the Apache Software Foundation.""".split(
+ "\n"
+ )
+).strip()
+
+header_logo = "https://tvm.apache.org/assets/images/logo.svg"
+header_logo_link = "https://tvm.apache.org/"
+
+header_links = [
+ ("Community", "https://tvm.apache.org/community"),
+ ("Download", "https://tvm.apache.org/download"),
+ ("VTA", "https://tvm.apache.org/vta"),
+ ("Blog", "https://tvm.apache.org/blog"),
+ ("Docs", "https://tvm.apache.org/docs"),
+ ("Conference", "https://tvmconf.org"),
+ ("Github", "https://github.com/apache/tvm/"),
+]
+
+header_dropdown = {
+ "name": "ASF",
+ "items": [
+ ("Apache Homepage", "https://apache.org/"),
+ ("License", "https://www.apache.org/licenses/"),
+ ("Sponsorship", "https://www.apache.org/foundation/sponsorship.html"),
+ ("Security", "https://www.apache.org/security/"),
+ ("Thanks", "https://www.apache.org/foundation/thanks.html"),
+ ("Events", "https://www.apache.org/events/current-event"),
+ ],
+}
+
+html_context = {
+ "footer_copyright": footer_copyright,
+ "footer_note": footer_note,
+ "header_links": header_links,
+ "header_dropdown": header_dropdown,
+ "header_logo": header_logo,
+ "header_logo_link": header_logo_link,
+}
+
+# add additional overrides
+templates_path += [tlcpack_sphinx_addon.get_templates_path()]
+html_static_path += [tlcpack_sphinx_addon.get_static_path()]
+
def update_alias_docstring(name, obj, lines):
"""Update the docstring of alias functions.
@@ -282,4 +403,3 @@ def process_docstring(app, what, name, obj, options, lines):
def setup(app):
app.connect("autodoc-process-docstring", process_docstring)
- app.add_css_file("css/tvm_theme.css")
diff --git a/docs/contribute/community.rst b/docs/contribute/community.rst
index fd6df0f991bd..8867202a674c 100644
--- a/docs/contribute/community.rst
+++ b/docs/contribute/community.rst
@@ -20,7 +20,7 @@
TVM Community Guideline
=======================
-TVM adopts the Apache style model and governs by merit. We believe that it is important to create an inclusive community where everyone can use, contribute to, and influence the direction of the project. See `CONTRIBUTORS.md `_ for the current list of contributors.
+TVM adopts the Apache style model and governs by merit. We believe that it is important to create an inclusive community where everyone can use, contribute to, and influence the direction of the project. See `CONTRIBUTORS.md `_ for the current list of contributors.
diff --git a/docs/contribute/document.rst b/docs/contribute/document.rst
index 1bfab1e1c061..3652a2891b37 100644
--- a/docs/contribute/document.rst
+++ b/docs/contribute/document.rst
@@ -68,7 +68,7 @@ Be careful to leave blank lines between sections of your documents.
In the above case, there has to be a blank line before `Parameters`, `Returns` and `Examples`
in order for the doc to be built correctly. To add a new function to the doc,
we need to add the `sphinx.autodoc `_
-rules to the `docs/api/python `_).
+rules to the `docs/api/python `_).
You can refer to the existing files under this folder on how to add the functions.
@@ -96,7 +96,7 @@ to add comments about code logics to improve readability.
Write Tutorials
---------------
We use the `sphinx-gallery `_ to build python tutorials.
-You can find the source code under `tutorials `_ quite self explanatory.
+You can find the source code under `tutorials `_ quite self explanatory.
One thing that worth noting is that the comment blocks are written in reStructuredText instead of markdown so be aware of the syntax.
The tutorial code will run on our build server to generate the document page.
diff --git a/docs/contribute/error_handling.rst b/docs/contribute/error_handling.rst
index 8f71ee61aeb6..d31b401ea654 100644
--- a/docs/contribute/error_handling.rst
+++ b/docs/contribute/error_handling.rst
@@ -37,14 +37,14 @@ raise an error of the corresponding type.
Note that you do not have to add a new type
:py:class:`tvm.error.TVMError` will be raised by default when
there is no error type prefix in the message.
-This mechanism works for both ``LOG(FATAL)`` and ``CHECK`` macros.
+This mechanism works for both ``LOG(FATAL)`` and ``ICHECK`` macros.
The following code gives an example on how to do so.
.. code:: c
// src/api_test.cc
void ErrorTest(int x, int y) {
- CHECK_EQ(x, y) << "ValueError: expect x and y to be equal."
+ ICHECK_EQ(x, y) << "ValueError: expect x and y to be equal."
if (x == 1) {
LOG(FATAL) << "InternalError: cannot reach here";
}
diff --git a/docs/contribute/release_process.rst b/docs/contribute/release_process.rst
index 0f1e5151f5a9..f330a7ddd3e6 100644
--- a/docs/contribute/release_process.rst
+++ b/docs/contribute/release_process.rst
@@ -17,8 +17,8 @@
.. _release_process:
-Apache TVM (incubating) Release Process
-=======================================
+Apache TVM Release Process
+==========================
The release manager role in TVM means you are responsible for a few different things:
@@ -64,13 +64,13 @@ The last step is to update the KEYS file with your code signing key https://www.
.. code-block:: bash
# the --depth=files will avoid checkout existing folders
- svn co --depth=files "https://dist.apache.org/repos/dist/dev/incubator/tvm" svn-tvm
+ svn co --depth=files "https://dist.apache.org/repos/dist/dev/tvm" svn-tvm
cd svn-tvm
# edit KEYS file
svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m "Update KEYS"
# update downloads.apache.org
- svn rm --username $ASF_USERNAME --password "$ASF_PASSWORD" https://dist.apache.org/repos/dist/release/incubator/tvm/KEYS -m "Update KEYS"
- svn cp --username $ASF_USERNAME --password "$ASF_PASSWORD" https://dist.apache.org/repos/dist/dev/incubator/tvm/KEYS https://dist.apache.org/repos/dist/release/incubator/tvm/ -m "Update KEYS"
+ svn rm --username $ASF_USERNAME --password "$ASF_PASSWORD" https://dist.apache.org/repos/dist/release/tvm/KEYS -m "Update KEYS"
+ svn cp --username $ASF_USERNAME --password "$ASF_PASSWORD" https://dist.apache.org/repos/dist/dev/tvm/KEYS https://dist.apache.org/repos/dist/release/tvm/ -m "Update KEYS"
Cut a Release Candidate
@@ -80,8 +80,8 @@ To cut a release candidate, one needs to first cut a branch using selected versi
.. code-block:: bash
- git clone https://github.com/apache/incubator-tvm.git
- cd incubator-tvm/
+ git clone https://github.com/apache/tvm.git
+ cd tvm/
git branch v0.6.0
git push --set-upstream origin v0.6.0
@@ -107,8 +107,8 @@ Create source code artifacts,
.. code-block:: bash
- git clone git@github.com:apache/incubator-tvm.git apache-tvm-src-v0.6.0.rc0-incubating
- cd apache-tvm-src-v0.6.0.rc0-incubating
+ git clone git@github.com:apache/tvm.git apache-tvm-src-v0.6.0.rc0
+ cd apache-tvm-src-v0.6.0.rc0
git checkout v0.6
git submodule update --init --recursive
git checkout v0.6.0.rc0
@@ -116,7 +116,7 @@ Create source code artifacts,
find . -name ".git*" -print0 | xargs -0 rm -rf
cd ..
brew install gnu-tar
- gtar -czvf apache-tvm-src-v0.6.0.rc0-incubating.tar.gz apache-tvm-src-v0.6.0.rc0-incubating
+ gtar -czvf apache-tvm-src-v0.6.0.rc0.tar.gz apache-tvm-src-v0.6.0.rc0
Use your GPG key to sign the created artifact. First make sure your GPG is set to use the correct private key,
@@ -129,8 +129,8 @@ Create GPG signature as well as the hash of the file,
.. code-block:: bash
- gpg --armor --output apache-tvm-src-v0.6.0.rc0-incubating.tar.gz.asc --detach-sig apache-tvm-src-v0.6.0.rc0-incubating.tar.gz
- shasum -a 512 apache-tvm-src-v0.6.0.rc0-incubating.tar.gz > apache-tvm-src-v0.6.0.rc0-incubating.tar.gz.sha512
+ gpg --armor --output apache-tvm-src-v0.6.0.rc0.tar.gz.asc --detach-sig apache-tvm-src-v0.6.0.rc0.tar.gz
+ shasum -a 512 apache-tvm-src-v0.6.0.rc0.tar.gz > apache-tvm-src-v0.6.0.rc0.tar.gz.sha512
Upload the Release Candidate
@@ -143,7 +143,7 @@ The release manager also needs to upload the artifacts to ASF SVN,
.. code-block:: bash
# the --depth=files will avoid checkout existing folders
- svn co --depth=files "https://dist.apache.org/repos/dist/dev/incubator/tvm" svn-tvm
+ svn co --depth=files "https://dist.apache.org/repos/dist/dev/tvm" svn-tvm
cd svn-tvm
mkdir tvm-v0.6.0-rc0
# copy files into it
@@ -154,9 +154,7 @@ The release manager also needs to upload the artifacts to ASF SVN,
Call a Vote on the Release Candidate
------------------------------------
-As an incubator project, it requires voting on both dev@ and general@.
-
-The first voting takes place on the Apache TVM (incubator) developers list (dev@tvm.apache.org). To get more attention, one can create a github issue start with "[VOTE]" instead, it will be mirrored to dev@ automatically. Look at past voting threads to see how this proceeds. The email should follow this format.
+The first voting takes place on the Apache TVM developers list (dev@tvm.apache.org). To get more attention, one can create a github issue start with "[VOTE]" instead, it will be mirrored to dev@ automatically. Look at past voting threads to see how this proceeds. The email should follow this format.
- Provide the link to the draft of the release notes in the email
- Provide the link to the release candidate artifacts
@@ -164,14 +162,9 @@ The first voting takes place on the Apache TVM (incubator) developers list (dev@
For the dev@ vote, there must be at least 3 binding +1 votes and more +1 votes than -1 votes. Once the vote is done, you should also send out a summary email with the totals, with a subject that looks something like [VOTE][RESULT] ....
-The voting then moves onto the general@incubator.apache.org. Anyone can contribute a vote, but only "Incubator PMC" (IPMC) votes are binding.
-To pass, there must be 3 binding +1 votes and more +1 votes than -1 votes.
-
In ASF, votes are open "at least" 72hrs (3 days). If you don't get enough number of binding votes within that time, you cannot close the voting deadline. You need to extend it.
-Same as the one on dev@, send out a summary email to general@ once the vote passes.
-
-If either voting fails, the community needs to modified the release accordingly, create a new release candidate and re-run the voting process.
+If the voting fails, the community needs to modified the release accordingly, create a new release candidate and re-run the voting process.
Post the Release
@@ -182,12 +175,12 @@ After the vote passes, to upload the binaries to Apache mirrors, you move the bi
.. code-block:: bash
export SVN_EDITOR=vim
- svn mkdir https://dist.apache.org/repos/dist/release/incubator/tvm
- svn mv https://dist.apache.org/repos/dist/dev/incubator/tvm/tvm-v0.6.0-rc2 https://dist.apache.org/repos/dist/release/incubator/tvm/tvm-v0.6.0
+ svn mkdir https://dist.apache.org/repos/dist/release/tvm
+ svn mv https://dist.apache.org/repos/dist/dev/tvm/tvm-v0.6.0-rc2 https://dist.apache.org/repos/dist/release/tvm/tvm-v0.6.0
# If you've added your signing key to the KEYS file, also update the release copy.
- svn co --depth=files "https://dist.apache.org/repos/dist/release/incubator/tvm" svn-tvm
- curl "https://dist.apache.org/repos/dist/dev/incubator/tvm/KEYS" > svn-tvm/KEYS
+ svn co --depth=files "https://dist.apache.org/repos/dist/release/tvm" svn-tvm
+ curl "https://dist.apache.org/repos/dist/dev/tvm/KEYS" > svn-tvm/KEYS
(cd svn-tvm && svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m"Update KEYS")
Remember to create a new release TAG (v0.6.0 in this case) on Github and remove the pre-release candidate TAG.
@@ -200,10 +193,10 @@ Remember to create a new release TAG (v0.6.0 in this case) on Github and remove
Update the TVM Website
----------------------
-The website repository is located at `https://github.com/apache/incubator-tvm-site `_. Modify the download page to include the release artifacts as well as the GPG signature and SHA hash.
+The website repository is located at `https://github.com/apache/tvm-site `_. Modify the download page to include the release artifacts as well as the GPG signature and SHA hash.
Post the Announcement
---------------------
-Send out an announcement email to general@incubator.apache.org, announce@apache.org, and dev@tvm.apache.org. The announcement should include the link to release note and download page.
+Send out an announcement email to announce@apache.org, and dev@tvm.apache.org. The announcement should include the link to release note and download page.
diff --git a/docs/deploy/android.rst b/docs/deploy/android.rst
index e28eef383164..8c8fcfb49679 100644
--- a/docs/deploy/android.rst
+++ b/docs/deploy/android.rst
@@ -38,5 +38,5 @@ deploy_lib.so, deploy_graph.json, deploy_param.params will go to android target.
TVM Runtime for Android Target
------------------------------
-Refer `here `_ to build CPU/OpenCL version flavor TVM runtime for android target.
-From android java TVM API to load model & execute can be referred at this `java `_ sample source.
+Refer `here `_ to build CPU/OpenCL version flavor TVM runtime for android target.
+From android java TVM API to load model & execute can be referred at this `java `_ sample source.
diff --git a/docs/deploy/arm_compute_lib.rst b/docs/deploy/arm_compute_lib.rst
index 5dd00764bcbc..a2eaa5fb5662 100644
--- a/docs/deploy/arm_compute_lib.rst
+++ b/docs/deploy/arm_compute_lib.rst
@@ -36,7 +36,7 @@ determine the architecture by looking online.
We recommend two different ways to build and install ACL:
-* Use the script located at `docker/install/ubuntu_install_arm_compute_library.sh`. You can use this
+* Use the script located at `docker/install/ubuntu_install_arm_compute_lib.sh`. You can use this
script for building ACL from source natively or for cross-compiling the library on an x86 machine.
You may need to change the architecture of the device you wish to compile for by altering the
`target_arch` variable. Binaries will be built from source and installed to the location denoted by
diff --git a/docs/deploy/cpp_deploy.rst b/docs/deploy/cpp_deploy.rst
index f3de69db2d1c..44df1e55b58e 100644
--- a/docs/deploy/cpp_deploy.rst
+++ b/docs/deploy/cpp_deploy.rst
@@ -19,7 +19,7 @@
Deploy TVM Module using C++ API
===============================
-We provide an example on how to deploy TVM modules in `apps/howto_deploy `_
+We provide an example on how to deploy TVM modules in `apps/howto_deploy `_
To run the example, you can use the following command
@@ -38,17 +38,17 @@ TVM provides a minimum runtime, which costs around 300K to 600K depending on how
In most cases, we can use ``libtvm_runtime.so`` that comes with the build.
If somehow you find it is hard to build ``libtvm_runtime``, checkout
-`tvm_runtime_pack.cc `_.
+`tvm_runtime_pack.cc `_.
It is an example all in one file that gives you TVM runtime.
You can compile this file using your build system and include this into your project.
-You can also checkout `apps `_ for example applications build with TVM on iOS, Android and others.
+You can also checkout `apps `_ for example applications build with TVM on iOS, Android and others.
Dynamic Library vs. System Module
---------------------------------
TVM provides two ways to use the compiled library.
-You can checkout `prepare_test_libs.py `_
-on how to generate the library and `cpp_deploy.cc `_ on how to use them.
+You can checkout `prepare_test_libs.py `_
+on how to generate the library and `cpp_deploy.cc `_ on how to use them.
- Store library as a shared library and dynamically load the library into your project.
- Bundle the compiled library into your project in system module mode.
diff --git a/docs/deploy/index.rst b/docs/deploy/index.rst
index b38a7f561ab3..2b37f734c3c3 100644
--- a/docs/deploy/index.rst
+++ b/docs/deploy/index.rst
@@ -38,7 +38,7 @@ on a Linux based embedded system such as Raspberry Pi:
.. code:: bash
- git clone --recursive https://github.com/apache/incubator-tvm tvm
+ git clone --recursive https://github.com/apache/tvm tvm
cd tvm
mkdir build
cp cmake/config.cmake build
@@ -69,3 +69,5 @@ target device without relying on RPC. see the following resources on how to do s
integrate
hls
arm_compute_lib
+ tensorrt
+ vitis_ai
diff --git a/docs/deploy/tensorrt.rst b/docs/deploy/tensorrt.rst
new file mode 100644
index 000000000000..27f11e9b5377
--- /dev/null
+++ b/docs/deploy/tensorrt.rst
@@ -0,0 +1,297 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+Relay TensorRT Integration
+==========================
+**Author**: `Trevor Morris `_
+
+Introduction
+------------
+
+NVIDIA TensorRT is a library for optimized deep learning inference. This integration will offload as
+many operators as possible from Relay to TensorRT, providing a performance boost on NVIDIA GPUs
+without the need to tune schedules.
+
+This guide will demonstrate how to install TensorRT and build TVM with TensorRT BYOC and runtime
+enabled. It will also provide example code to compile and run a ResNet-18 model using TensorRT and
+how to configure the compilation and runtime settings. Finally, we document the supported operators
+and how to extend the integration to support other operators.
+
+Installing TensorRT
+-------------------
+
+In order to download TensorRT, you will need to create an NVIDIA Developer program account. Please
+see NVIDIA's documentation for more info:
+https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html. If you have a Jetson device
+such as a TX1, TX2, Xavier, or Nano, TensorRT will already be installed on the device via the
+JetPack SDK.
+
+There are two methods to install TensorRT:
+
+* System install via deb or rpm package.
+* Tar file installation.
+
+With the tar file installation method, you must provide the path of the extracted tar archive to
+USE_TENSORRT_RUNTIME=/path/to/TensorRT. With the system install method,
+USE_TENSORRT_RUNTIME=ON will automatically locate your installation.
+
+Building TVM with TensorRT support
+----------------------------------
+
+There are two separate build flags for TensorRT integration in TVM. These flags also enable
+cross-compilation: USE_TENSORRT_CODEGEN=ON will also you to build a module with TensorRT support on
+a host machine, while USE_TENSORRT_RUNTIME=ON will enable the TVM runtime on an edge device to
+execute the TensorRT module. You should enable both if you want to compile and also execute models
+with the same TVM build.
+
+* USE_TENSORRT_CODEGEN=ON/OFF - This flag will enable compiling a TensorRT module, which does not require any
+ TensorRT library.
+* USE_TENSORRT_RUNTIME=ON/OFF/path-to-TensorRT - This flag will enable the TensorRT runtime module.
+ This will build TVM against the installed TensorRT library.
+
+Example setting in config.cmake file:
+
+.. code:: cmake
+
+ set(USE_TENSORRT_CODEGEN ON)
+ set(USE_TENSORRT_RUNTIME /home/ubuntu/TensorRT-7.0.0.11)
+
+
+Build and Deploy ResNet-18 with TensorRT
+----------------------------------------
+
+Create a Relay graph from a MXNet ResNet-18 model.
+
+.. code:: python
+
+ import tvm
+ from tvm import relay
+ import mxnet
+ from mxnet.gluon.model_zoo.vision import get_model
+
+ dtype = "float32"
+ input_shape = (1, 3, 224, 224)
+ block = get_model('resnet18_v1', pretrained=True)
+ mod, params = relay.frontend.from_mxnet(block, shape={'data': input_shape}, dtype=dtype)
+
+
+Annotate and partition the graph for TensorRT. All ops which are supported by the TensorRT
+integration will be marked and offloaded to TensorRT. The rest of the ops will go through the
+regular TVM CUDA compilation and code generation.
+
+.. code:: python
+
+ from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt
+ mod, config = partition_for_tensorrt(mod, params)
+
+
+Build the Relay graph, using the new module and config returned by partition_for_tensorrt. The
+target must always be a cuda target. ``partition_for_tensorrt`` will automatically fill out the
+required values in the config, so there is no need to modify it - just pass it along to the
+PassContext so the values can be read during compilation.
+
+.. code:: python
+
+ target = "cuda"
+ with tvm.transform.PassContext(opt_level=3, config={'relay.ext.tensorrt.options': config}):
+ lib = relay.build(mod, target=target, params=params)
+
+
+Export the module.
+
+.. code:: python
+
+ lib.export_library('compiled.so')
+
+
+Load module and run inference on the target machine, which must be built with
+``USE_TENSORRT_RUNTIME`` enabled. The first run will take longer because the TensorRT engine will
+have to be built.
+
+.. code:: python
+
+ ctx = tvm.gpu(0)
+ loaded_lib = tvm.runtime.load_module('compiled.so')
+ gen_module = tvm.contrib.graph_runtime.GraphModule(loaded_lib['default'](ctx))
+ input_data = np.random.uniform(0, 1, input_shape).astype(dtype)
+ gen_module.run(data=input_data)
+
+
+Partitioning and Compilation Settings
+-------------------------------------
+
+There are some options which can be configured in ``partition_for_tensorrt``.
+
+* ``version`` - TensorRT version to target as tuple of (major, minor, patch). If TVM is compiled
+ with USE_TENSORRT_RUNTIME=ON, the linked TensorRT version will be used instead. The version
+ will affect which ops can be partitioned to TensorRT.
+* ``use_implicit_batch`` - Use TensorRT implicit batch mode (default true). Setting to false will
+ enable explicit batch mode which will widen supported operators to include those which modify the
+ batch dimension, but may reduce performance for some models.
+* ``remove_no_mac_subgraphs`` - A heuristic to improve performance. Removes subgraphs which have
+ been partitioned for TensorRT if they do not have any multiply-accumulate operations. The removed
+ subgraphs will go through TVM's standard compilation instead.
+* ``max_workspace_size`` - How many bytes of workspace size to allow each subgraph to use for
+ TensorRT engine creation. See TensorRT documentation for more info. Can be overriden at runtime.
+
+
+Runtime Settings
+----------------
+
+There are some additional options which can be configured at runtime using environment variables.
+
+* Automatic FP16 Conversion - Environment variable ``TVM_TENSORRT_USE_FP16=1`` can be set to
+ automatically convert the TensorRT components of your model to 16-bit floating point precision.
+ This can greatly increase performance, but may cause some slight loss in the model accuracy.
+* Caching TensorRT Engines - During the first inference, the runtime will invoke the TensorRT API
+ to build an engine. This can be time consuming, so you can set ``TVM_TENSORRT_CACHE_DIR`` to
+ point to a directory to save these built engines to on the disk. The next time you load the model
+ and give it the same directory, the runtime will load the already built engines to avoid the long
+ warmup time. A unique directory is required for each model.
+* TensorRT has a paramter to configure the maximum amount of scratch space that each layer in the
+ model can use. It is generally best to use the highest value which does not cause you to run out
+ of memory. You can use ``TVM_TENSORRT_MAX_WORKSPACE_SIZE`` to override this by specifying the
+ workspace size in bytes you would like to use.
+
+
+Operator support
+----------------
++------------------------+------------------------------------+
+| Relay Node | Remarks |
++========================+====================================+
+| nn.relu | |
++------------------------+------------------------------------+
+| sigmoid | |
++------------------------+------------------------------------+
+| tanh | |
++------------------------+------------------------------------+
+| nn.batch_norm | |
++------------------------+------------------------------------+
+| nn.softmax | |
++------------------------+------------------------------------+
+| nn.conv2d | |
++------------------------+------------------------------------+
+| nn.dense | |
++------------------------+------------------------------------+
+| nn.bias_add | |
++------------------------+------------------------------------+
+| add | |
++------------------------+------------------------------------+
+| subtract | |
++------------------------+------------------------------------+
+| multiply | |
++------------------------+------------------------------------+
+| divide | |
++------------------------+------------------------------------+
+| power | |
++------------------------+------------------------------------+
+| maximum | |
++------------------------+------------------------------------+
+| minimum | |
++------------------------+------------------------------------+
+| nn.max_pool2d | |
++------------------------+------------------------------------+
+| nn.avg_pool2d | |
++------------------------+------------------------------------+
+| nn.global_max_pool2d | |
++------------------------+------------------------------------+
+| nn.global_avg_pool2d | |
++------------------------+------------------------------------+
+| exp | |
++------------------------+------------------------------------+
+| log | |
++------------------------+------------------------------------+
+| sqrt | |
++------------------------+------------------------------------+
+| abs | |
++------------------------+------------------------------------+
+| negative | |
++------------------------+------------------------------------+
+| nn.batch_flatten | |
++------------------------+------------------------------------+
+| expand_dims | |
++------------------------+------------------------------------+
+| squeeze | |
++------------------------+------------------------------------+
+| concatenate | |
++------------------------+------------------------------------+
+| nn.conv2d_transpose | |
++------------------------+------------------------------------+
+| transpose | |
++------------------------+------------------------------------+
+| layout_transform | |
++------------------------+------------------------------------+
+| reshape | |
++------------------------+------------------------------------+
+| nn.pad | |
++------------------------+------------------------------------+
+| sum | |
++------------------------+------------------------------------+
+| prod | |
++------------------------+------------------------------------+
+| max | |
++------------------------+------------------------------------+
+| min | |
++------------------------+------------------------------------+
+| mean | |
++------------------------+------------------------------------+
+| nn.adaptive_max_pool2d | |
++------------------------+------------------------------------+
+| nn.adaptive_avg_pool2d | |
++------------------------+------------------------------------+
+| clip | Requires TensorRT 5.1.5 or greater |
++------------------------+------------------------------------+
+| nn.leaky_relu | Requires TensorRT 5.1.5 or greater |
++------------------------+------------------------------------+
+| sin | Requires TensorRT 5.1.5 or greater |
++------------------------+------------------------------------+
+| cos | Requires TensorRT 5.1.5 or greater |
++------------------------+------------------------------------+
+| atan | Requires TensorRT 5.1.5 or greater |
++------------------------+------------------------------------+
+| ceil | Requires TensorRT 5.1.5 or greater |
++------------------------+------------------------------------+
+| floor | Requires TensorRT 5.1.5 or greater |
++------------------------+------------------------------------+
+| strided_slice | Requires TensorRT 5.1.5 or greater |
++------------------------+------------------------------------+
+| nn.conv3d | Requires TensorRT 6.0.1 or greater |
++------------------------+------------------------------------+
+| nn.max_pool3d | Requires TensorRT 6.0.1 or greater |
++------------------------+------------------------------------+
+| nn.avg_pool3d | Requires TensorRT 6.0.1 or greater |
++------------------------+------------------------------------+
+| nn.conv3d_transpose | Requires TensorRT 6.0.1 or greater |
++------------------------+------------------------------------+
+
+
+Adding a new operator
+---------------------
+To add support for a new operator, there are a series of files we need to make changes to:
+
+* `src/runtime/contrib/tensorrt/tensorrt_ops.cc` Create a new op converter class which
+ implements the ``TensorRTOpConverter`` interface. You must implement the constructor to specify how
+ many inputs there are and whether they are tensors or weights. You must also implement the
+ ``Convert`` method to perform the conversion. This is done by using the inputs, attributes, and
+ network from params to add the new TensorRT layers and push the layer outputs. You can use the
+ existing converters as an example. Finally, register your new op conventer in the
+ ``GetOpConverters()`` map.
+* `python/relay/op/contrib/tensorrt.py` This file contains the annotation rules for TensorRT. These
+ determine which operators and their attributes that are supported. You must register an annotation
+ function for the relay operator and specify which attributes are supported by your converter, by
+ checking the attributes are returning true or false.
+* `tests/python/contrib/test_tensorrt.py` Add unit tests for the given operator.
diff --git a/docs/deploy/vitis_ai.rst b/docs/deploy/vitis_ai.rst
new file mode 100755
index 000000000000..df29f16f9d8d
--- /dev/null
+++ b/docs/deploy/vitis_ai.rst
@@ -0,0 +1,652 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+
+Vitis-AI Integration
+====================
+
+`Vitis-AI `__ is Xilinx's
+development stack for hardware-accelerated AI inference on Xilinx
+platforms, including both edge devices and Alveo cards. It consists of
+optimized IP, tools, libraries, models, and example designs. It is
+designed with high efficiency and ease of use in mind, unleashing the
+full potential of AI acceleration on Xilinx FPGA and ACAP.
+
+The current Vitis-AI Byoc flow inside TVM enables acceleration of Neural
+Network model inference on edge and cloud. The identifiers for the
+supported edge and cloud Deep Learning Processor Units (DPU's) are
+DPUCZDX8G respectively DPUCADX8G. DPUCZDX8G and DPUCADX8G are hardware
+accelerators for convolutional neural networks (CNN's) on top of the
+Xilinx `Zynq Ultrascale+
+MPSoc `__
+respectively
+`Alveo `__
+(U200/U250) platforms. For more information about the DPU identifiers
+see the section on `DPU naming information <#dpu-naming-information>`__.
+
+On this page you will find information on how to
+`build <#build-instructions>`__ TVM with Vitis-AI and on how to `get
+started <#getting-started>`__ with an example.
+
+DPU naming information
+----------------------
+
++---------------------------------+-----------------+-------------------------------------------------------------------------+------------------------------------------------------------+---------------------------------------------------+--------------------------------------------------------------------------+
+| DPU | Application | HW Platform | Quantization Method | Quantization Bitwidth | Design Target |
++=================================+=================+=========================================================================+============================================================+===================================================+==========================================================================+
+| Deep Learning Processing Unit | C: CNN R: RNN | AD: Alveo DDR AH: Alveo HBM VD: Versal DDR with AIE & PL ZD: Zynq DDR | X: DECENT I: Integer threshold F: Float threshold R: RNN | 4: 4-bit 8: 8-bit 16: 16-bit M: Mixed Precision | G: General purpose H: High throughput L: Low latency C: Cost optimized |
++---------------------------------+-----------------+-------------------------------------------------------------------------+------------------------------------------------------------+---------------------------------------------------+--------------------------------------------------------------------------+
+
+Build instructions
+------------------
+
+This section lists the instructions for building TVM with Vitis-AI for
+both `cloud <#cloud-dpucadx8g>`__ and `edge <#edge-dpuczdx8g>`__.
+
+Cloud (DPUCADX8G)
+~~~~~~~~~~~~~~~~~
+
+For Vitis-AI acceleration in the cloud TVM has to be built on top of the
+Xilinx Alveo platform.
+
+System requirements
+^^^^^^^^^^^^^^^^^^^
+
+The following table lists system requirements for running docker
+containers as well as Alveo cards.
+
++-----------------------------------------------------+----------------------------------------------------------+
+| **Component** | **Requirement** |
++=====================================================+==========================================================+
+| Motherboard | PCI Express 3.0-compliant with one dual-width x16 slot |
++-----------------------------------------------------+----------------------------------------------------------+
+| System Power Supply | 225W |
++-----------------------------------------------------+----------------------------------------------------------+
+| Operating System | Ubuntu 16.04, 18.04 |
++-----------------------------------------------------+----------------------------------------------------------+
+| | CentOS 7.4, 7.5 |
++-----------------------------------------------------+----------------------------------------------------------+
+| | RHEL 7.4, 7.5 |
++-----------------------------------------------------+----------------------------------------------------------+
+| CPU | Intel i3/i5/i7/i9/Xeon 64-bit CPU |
++-----------------------------------------------------+----------------------------------------------------------+
+| GPU (Optional to accelerate quantization) | NVIDIA GPU with a compute capability > 3.0 |
++-----------------------------------------------------+----------------------------------------------------------+
+| CUDA Driver (Optional to accelerate quantization) | nvidia-410 |
++-----------------------------------------------------+----------------------------------------------------------+
+| FPGA | Xilinx Alveo U200 or U250 |
++-----------------------------------------------------+----------------------------------------------------------+
+| Docker Version | 19.03.1 |
++-----------------------------------------------------+----------------------------------------------------------+
+
+Hardware setup and docker build
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+1. Clone the Vitis AI repository:
+
+ .. code:: bash
+
+ git clone --recurse-submodules https://github.com/Xilinx/Vitis-AI
+
+2. Install Docker, and add the user to the docker group. Link the user
+ to docker installation instructions from the following docker's
+ website:
+
+
+ - https://docs.docker.com/install/linux/docker-ce/ubuntu/
+ - https://docs.docker.com/install/linux/docker-ce/centos/
+ - https://docs.docker.com/install/linux/linux-postinstall/
+
+3. Download the latest Vitis AI Docker with the following command. This container runs on CPU.
+
+ .. code:: bash
+
+ docker pull xilinx/vitis-ai:latest
+
+ To accelerate the quantization, you can optionally use the Vitis-AI GPU docker image. Use the below commands to build the Vitis-AI GPU docker container:
+
+ .. code:: bash
+
+ cd Vitis-AI/docker
+ ./docker_build_gpu.sh
+
+4. Set up Vitis AI to target Alveo cards. To target Alveo cards with
+ Vitis AI for machine learning workloads, you must install the
+ following software components:
+
+ - Xilinx Runtime (XRT)
+ - Alveo Deployment Shells (DSAs)
+ - Xilinx Resource Manager (XRM) (xbutler)
+ - Xilinx Overlaybins (Accelerators to Dynamically Load - binary
+ programming files)
+
+ While it is possible to install all of these software components
+ individually, a script has been provided to automatically install
+ them at once. To do so:
+
+ - Run the following commands:
+
+ .. code:: bash
+
+ cd Vitis-AI/alveo/packages
+ sudo su
+ ./install.sh
+
+ - Power cycle the system.
+
+5. Clone tvm repo and pyxir repo
+
+ .. code:: bash
+
+ git clone --recursive https://github.com/apache/tvm.git
+ git clone --recursive https://github.com/Xilinx/pyxir.git
+
+6. Build and start the tvm runtime Vitis-AI Docker Container.
+
+ .. code:: bash
+
+ ./tvm/docker/build.sh demo_vitis_ai bash
+ ./tvm/docker/bash.sh tvm.demo_vitis_ai
+
+ #Setup inside container
+ source /opt/xilinx/xrt/setup.sh
+ . $VAI_ROOT/conda/etc/profile.d/conda.sh
+ conda activate vitis-ai-tensorflow
+
+7. Install PyXIR
+
+ .. code:: bash
+
+ cd pyxir
+ python3 setup.py install --use_vai_rt_dpucadx8g --user
+
+
+8. Build TVM inside the container with Vitis-AI
+
+ .. code:: bash
+
+ cd tvm
+ mkdir build
+ cp cmake/config.cmake build
+ cd build
+ echo set\(USE_LLVM ON\) >> config.cmake
+ echo set\(USE_VITIS_AI ON\) >> config.cmake
+ cmake ..
+ make -j$(nproc)
+
+9. Install TVM
+
+ .. code:: bash
+
+ cd tvm/python
+ pip3 install -e . --user
+
+Edge (DPUCZDX8G)
+^^^^^^^^^^^^^^^^
+
+
+For edge deployment we make use of two systems referred to as host and
+edge. The `host <#host-requirements>`__ system is responsible for
+quantization and compilation of the neural network model in a first
+offline step. Afterwards, the model will de deployed on the
+`edge <#edge-requirements>`__ system.
+
+Host requirements
+^^^^^^^^^^^^^^^^^
+
+The following table lists system requirements for running the TVM -
+Vitis-AI docker container.
+
++-----------------------------------------------------+----------------------------------------------+
+| **Component** | **Requirement** |
++=====================================================+==============================================+
+| Operating System | Ubuntu 16.04, 18.04 |
++-----------------------------------------------------+----------------------------------------------+
+| | CentOS 7.4, 7.5 |
++-----------------------------------------------------+----------------------------------------------+
+| | RHEL 7.4, 7.5 |
++-----------------------------------------------------+----------------------------------------------+
+| CPU | Intel i3/i5/i7/i9/Xeon 64-bit CPU |
++-----------------------------------------------------+----------------------------------------------+
+| GPU (Optional to accelerate quantization) | NVIDIA GPU with a compute capability > 3.0 |
++-----------------------------------------------------+----------------------------------------------+
+| CUDA Driver (Optional to accelerate quantization) | nvidia-410 |
++-----------------------------------------------------+----------------------------------------------+
+| FPGA | Not necessary on host |
++-----------------------------------------------------+----------------------------------------------+
+| Docker Version | 19.03.1 |
++-----------------------------------------------------+----------------------------------------------+
+
+Host setup and docker build
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+1. Clone tvm repo
+
+ .. code:: bash
+
+ git clone --recursive https://github.com/apache/tvm.git
+2. Build and start the tvm runtime Vitis-AI Docker Container.
+
+ .. code:: bash
+
+ cd tvm
+ ./tvm/docker/build.sh demo_vitis_ai bash
+ ./tvm/docker/bash.sh tvm.demo_vitis_ai
+
+ #Setup inside container
+ . $VAI_ROOT/conda/etc/profile.d/conda.sh
+ conda activate vitis-ai-tensorflow
+
+3. Install PyXIR
+
+ .. code:: bash
+
+ git clone --recursive https://github.com/Xilinx/pyxir.git
+ cd pyxir
+ python3 setup.py install --user
+
+
+4. Build TVM inside the container with Vitis-AI.
+
+ .. code:: bash
+
+ cd tvm
+ mkdir build
+ cp cmake/config.cmake build
+ cd build
+ echo set\(USE_LLVM ON\) >> config.cmake
+ echo set\(USE_VITIS_AI ON\) >> config.cmake
+ cmake ..
+ make -j$(nproc)
+
+5. Install TVM
+
+ .. code:: bash
+
+ cd tvm/python
+ pip3 install -e . --user
+
+Edge requirements
+^^^^^^^^^^^^^^^^^
+
+The DPUCZDX8G can be deployed on the `Zynq Ultrascale+
+MPSoc `__
+platform. The following development boards can be used out-of-the-box:
+
++--------------------+----------------------+-----------------------------------------------------------------------+
+| **Target board** | **TVM identifier** | **Info** |
++====================+======================+=======================================================================+
+| Ultra96 | DPUCZDX8G-ultra96 | https://www.xilinx.com/products/boards-and-kits/1-vad4rl.html |
++--------------------+----------------------+-----------------------------------------------------------------------+
+| ZCU104 | DPUCZDX8G-zcu104 | https://www.xilinx.com/products/boards-and-kits/zcu104.html |
++--------------------+----------------------+-----------------------------------------------------------------------+
+| ZCU102 | DPUCZDX8G-zcu102 | https://www.xilinx.com/products/boards-and-kits/ek-u1-zcu102-g.html |
++--------------------+----------------------+-----------------------------------------------------------------------+
+
+Edge hardware setup
+^^^^^^^^^^^^^^^^^^^
+.. note::
+
+ This section provides instructions for setting up with the `Pynq `__ platform but
+ Petalinux based flows are also supported.
+
+1. Download the Pynq v2.5 image for your target (use Z1 or Z2 for
+ Ultra96 target depending on board version) Link to image:
+ https://github.com/Xilinx/PYNQ/releases/tag/v2.5
+2. Follow Pynq instructions for setting up the board: `pynq
+ setup `__
+3. After connecting to the board, make sure to run as root. Execute
+ ``su``
+4. Set up DPU on Pynq by following the steps here: `DPU Pynq
+ setup `__
+5. Run the following command to download the DPU bitstream:
+
+ .. code:: bash
+
+ python3 -c 'from pynq_dpu import DpuOverlay ; overlay = DpuOverlay("dpu.bit")'
+
+6. Check whether the DPU kernel is alive:
+
+ .. code:: bash
+
+ dexplorer -w
+
+Edge TVM setup
+^^^^^^^^^^^^^^
+
+.. note::
+
+ When working on Petalinux instead of Pynq, the following steps might take more manual work (e.g building
+ hdf5 from source). Also, TVM has a scipy dependency which you then might have to build from source or
+ circumvent. We don't depend on scipy in our flow.
+
+Building TVM depends on the Xilinx
+`PyXIR `__ package. PyXIR acts as an
+interface between TVM and Vitis-AI tools.
+
+1. First install the PyXIR h5py and pydot dependencies:
+
+ .. code:: bash
+
+ apt-get install libhdf5-dev
+ pip3 install pydot h5py
+
+2. Install PyXIR
+
+ .. code:: bash
+
+ git clone --recursive https://github.com/Xilinx/pyxir.git
+ cd pyxir
+ sudo python3 setup.py install --use_vai_rt_dpuczdx8g
+
+3. Build TVM with Vitis-AI
+
+ .. code:: bash
+
+ git clone --recursive https://github.com/apache/tvm
+ cd tvm
+ mkdir build
+ cp cmake/config.cmake build
+ cd build
+ echo set\(USE_VITIS_AI ON\) >> config.cmake
+ cmake ..
+ make
+
+4. Install TVM
+
+ .. code:: bash
+
+ cd tvm/python
+ pip3 install -e . --user
+
+5. Check whether the setup was successful in the Python shell:
+
+ .. code:: bash
+
+ python3 -c 'import pyxir; import tvm'
+
+
+Getting started
+---------------
+
+This section shows how to use TVM with Vitis-AI. For this it's important
+to understand that neural network models are quantized for Vitis-AI
+execution in fixed point arithmetic. The approach we take here is to
+quantize on-the-fly using the first N inputs as explained in the next
+section.
+
+On-the-fly quantization
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Usually, to be able to accelerate inference of Neural Network models
+with Vitis-AI DPU accelerators, those models need to quantized upfront.
+In TVM - Vitis-AI flow, we make use of on-the-fly quantization to remove
+this additional preprocessing step. In this flow, one doesn't need to
+quantize his/her model upfront but can make use of the typical inference
+execution calls (module.run) to quantize the model on-the-fly using the
+first N inputs that are provided (see more information below). This will
+set up and calibrate the Vitis-AI DPU and from that point onwards
+inference will be accelerated for all next inputs. Note that the edge
+flow deviates slightly from the explained flow in that inference won't
+be accelerated after the first N inputs but the model will have been
+quantized and compiled and can be moved to the edge device for
+deployment. Please check out the `edge <#Edge%20usage>`__ usage
+instructions below for more information.
+
+Config/Settings
+~~~~~~~~~~~~~~~
+
+A couple of environment variables can be used to customize the Vitis-AI
+Byoc flow.
+
++----------------------------+----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| **Environment Variable** | **Default if unset** | **Explanation** |
++============================+========================================+============================================================================================================================================================================================================================================================================================================================================+
+| PX\_QUANT\_SIZE | 128 | The number of inputs that will be used for quantization (necessary for Vitis-AI acceleration) |
++----------------------------+----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| PX\_BUILD\_DIR | Use the on-the-fly quantization flow | Loads the quantization and compilation information from the provided build directory and immediately starts Vitis-AI hardware acceleration. This configuration can be used if the model has been executed before using on-the-fly quantization during which the quantization and comilation information was cached in a build directory. |
++----------------------------+----------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+Cloud usage
+~~~~~~~~~~~
+
+This section shows how to accelerate a convolutional neural network
+model in TVM with Vitis-AI on the cloud.
+
+To be able to target the Vitis-AI cloud DPUCADX8G target we first have
+to import the target in PyXIR. This PyXIR package is the interface being
+used by TVM to integrate with the Vitis-AI stack. Additionaly, import
+the typical TVM and Relay modules and the Vitis-AI contrib module inside
+TVM.
+
+.. code:: python
+
+ import pyxir
+ import pyxir.contrib.target.DPUCADX8G
+
+ import tvm
+ import tvm.relay as relay
+ from tvm.contrib.target import vitis_ai
+ from tvm.contrib import util, graph_runtime
+ from tvm.relay.build_module import bind_params_by_name
+ from tvm.relay.op.contrib.vitis_ai import annotation
+
+After importing a convolutional neural network model using the usual
+Relay API's, annotate the Relay expression for the given Vitis-AI DPU
+target and partition the graph.
+
+.. code:: python
+
+ mod["main"] = bind_params_by_name(mod["main"], params)
+ mod = annotation(mod, params, target)
+ mod = relay.transform.MergeCompilerRegions()(mod)
+ mod = relay.transform.PartitionGraph()(mod)
+
+Now, we can build the TVM runtime library for executing the model. The
+TVM target is 'llvm' as the operations that can't be handled by the DPU
+are executed on the CPU. The Vitis-AI target is DPUCADX8G as we are
+targeting the cloud DPU and this target is passed as a config to the TVM
+build call.
+
+.. code:: python
+
+ tvm_target = 'llvm'
+ target='DPUCADX8G'
+
+ with tvm.transform.PassContext(opt_level=3, config= {'relay.ext.vitis_ai.options.target': target}):
+ lib = relay.build(mod, tvm_target, params=params)
+
+As one more step before we can accelerate a model with Vitis-AI in TVM
+we have to quantize and compile the model for execution on the DPU. We
+make use of on-the-fly quantization for this. Using this method one
+doesn’t need to quantize their model upfront and can make use of the
+typical inference execution calls (module.run) to calibrate the model
+on-the-fly using the first N inputs that are provided. After the first N
+iterations, computations will be accelerated on the DPU. So now we will
+feed N inputs to the TVM runtime module. Note that these first N inputs
+will take a substantial amount of time.
+
+.. code:: python
+
+ module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+
+ # First N (default = 128) inputs are used for quantization calibration and will
+ # be executed on the CPU
+ # This config can be changed by setting the 'PX_QUANT_SIZE' (e.g. export PX_QUANT_SIZE=64)
+ for i in range(128):
+ module.set_input(input_name, inputs[i])
+ module.run()
+
+Afterwards, inference will be accelerated on the DPU.
+
+.. code:: python
+
+ module.set_input(name, data)
+ module.run()
+
+To save and load the built module, one can use the typical TVM API's:
+
+.. code:: python
+
+ lib_path = "deploy_lib.so"
+ lib.export_library(lib_path)
+
+Load the module from compiled files and run inference
+
+.. code:: python
+
+ # load the module into memory
+ loaded_lib = tvm.runtime.load_module(lib_path)
+
+ module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+ module.set_input(name, data)
+ module.run()
+
+Edge usage
+~~~~~~~~~~
+
+This section shows how to accelerate a convolutional neural network
+model in TVM with Vitis-AI at the edge. The first couple of steps will
+have to be run on the host machine and take care of quantization and
+compilation for deployment at the edge.
+
+Host steps
+^^^^^^^^^^
+
+To be able to target the Vitis-AI cloud DPUCZDX8G target we first have
+to import the target in PyXIR. This PyXIR package is the interface being
+used by TVM to integrate with the Vitis-AI stack. Additionaly, import
+the typical TVM and Relay modules and the Vitis-AI contrib module inside
+TVM.
+
+.. code:: python
+
+ import pyxir
+ import pyxir.contrib.target.DPUCZDX8G
+
+ import tvm
+ import tvm.relay as relay
+ from tvm.contrib.target import vitis_ai
+ from tvm.contrib import util, graph_runtime
+ from tvm.relay.build_module import bind_params_by_name
+ from tvm.relay.op.contrib.vitis_ai import annotation
+
+After importing a convolutional neural network model using the usual
+Relay API's, annotate the Relay expression for the given Vitis-AI DPU
+target and partition the graph.
+
+.. code:: python
+
+ mod["main"] = bind_params_by_name(mod["main"], params)
+ mod = annotation(mod, params, target)
+ mod = relay.transform.MergeCompilerRegions()(mod)
+ mod = relay.transform.PartitionGraph()(mod)
+
+Now, we can build the TVM runtime library for executing the model. The
+TVM target is 'llvm' as the operations that can't be handled by the DPU
+are executed on the CPU. At this point that means the CPU on the host machine.
+The Vitis-AI target is DPUCZDX8G-zcu104 as we are targeting the edge DPU
+on the ZCU104 board and this target is passed as a config to the TVM
+build call. Note that different identifiers can be passed for different
+targets, see `edge targets info <#edge-requirements>`__. Additionally, we
+provide the 'export_runtime_module' config that points to a file to which we
+can export the Vitis-AI runtime module. We have to do this because we will
+first be compiling and quantizing the model on the host machine before building
+the model for edge deployment. As you will see later on, the exported runtime
+module will be passed to the edge build so that the Vitis-AI runtime module
+can be included.
+
+.. code:: python
+
+ from tvm.contrib import util
+
+ temp = util.tempdir()
+
+ tvm_target = 'llvm'
+ target='DPUCZDX8G-zcu104'
+ export_rt_mod_file = temp.relpath("vitis_ai.rtmod")
+
+ with tvm.transform.PassContext(opt_level=3, config= {'relay.ext.vitis_ai.options.target': target,
+ 'relay.ext.vitis_ai.options.export_runtime_module': export_rt_mod_file}):
+ lib = relay.build(mod, tvm_target, params=params)
+
+We will quantize and compile the model for execution on the DPU using on-the-fly
+quantization on the host machine. This makes use of TVM inference calls
+(module.run) to quantize the model on the host with the first N inputs.
+
+.. code:: python
+
+ module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+
+ # First N (default = 128) inputs are used for quantization calibration and will
+ # be executed on the CPU
+ # This config can be changed by setting the 'PX_QUANT_SIZE' (e.g. export PX_QUANT_SIZE=64)
+ for i in range(128):
+ module.set_input(input_name, inputs[i])
+ module.run()
+
+Save the TVM lib module so that the Vitis-AI runtime module will also be exported
+(to the 'export_runtime_module' path we previously passed as a config).
+
+.. code:: python
+
+ from tvm.contrib import util
+
+ temp = util.tempdir()
+ lib.export_library(temp.relpath("tvm_lib.so"))
+
+After quantizing and compiling the model for Vitis-AI acceleration using the
+first N inputs we can build the model for execution on the ARM edge device.
+Here we pass the previously exported Vitis-AI runtime module so it can be included
+in the TVM build.
+
+.. code:: python
+
+ # Export lib for aarch64 target
+ tvm_target = tvm.target.arm_cpu('ultra96')
+ lib_kwargs = {
+ 'fcompile': contrib.cc.create_shared,
+ 'cc': "/usr/aarch64-linux-gnu/bin/ld"
+ }
+
+ with tvm.transform.PassContext(opt_level=3,
+ config={'relay.ext.vitis_ai.options.load_runtime_module': export_rt_mod_file}):
+ lib_arm = relay.build(mod, tvm_target, params=params)
+
+ lib_dpuv2.export_library('tvm_dpu_arm.so', **lib_kwargs)
+
+Now, move the TVM build files (tvm\_dpu\_arm.json, tvm\_dpu\_arm.so,
+tvm\_dpu\_arm.params) to the edge device. For information on setting
+up the edge device check out the `edge setup <#edge-dpuczdx8g>`__
+section.
+
+Edge steps
+^^^^^^^^^^
+
+After setting up TVM with Vitis-AI on the edge device, you can now load
+the TVM runtime module into memory and feed inputs for inference.
+
+.. code:: python
+
+ ctx = tvm.cpu()
+
+ # load the module into memory
+ lib = tvm.runtime.load_module("tvm_dpu_arm.so")
+
+ module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
+ module.set_input(name, data)
+ module.run()
diff --git a/docs/dev/convert_layout.rst b/docs/dev/convert_layout.rst
index 07ebc2048dd3..53038e9605e8 100644
--- a/docs/dev/convert_layout.rst
+++ b/docs/dev/convert_layout.rst
@@ -157,7 +157,7 @@ First example is for layout agnostic operators. These operators do not have any
Layout ret;
if (new_in_layouts.defined()) {
- CHECK_GE(new_in_layouts.size(), 1);
+ ICHECK_GE(new_in_layouts.size(), 1);
ret = new_in_layouts[0];
} else {
for (size_t i = 0; i < old_in_layouts.size(); ++i) {
@@ -227,6 +227,7 @@ Second example is for a lightly-layout sensitive operator - batch normalization.
********
4. Usage
********
+.. _convert-layout-usage:
ConvertLayout pass is extremely easy to use. The pass is not a part of default relay.build pipeline. The intended usage is to call it between the framework-to-relay parser and relay.build module call.
@@ -264,5 +265,5 @@ The ordering of the layouts is defined by the implementation of `register_conver
Current implementation has support for almost all the operators commonly used in image classification models. However, if one encounters too many data layout transforms in the graph, it is highly likely that there is an operator whose layouts need special handling as described in Section 3. Some pull requests that can help in such a situation are
-- Layout inference for `Batch Norm `_ - Batch normalization falls into the category of lightly-sensitive operator. The PR shows how to handle the layout inference for batch norm.
-- Python Callback for `Convolution `_- For highly-sensitive operators, one might have to do python callback as well. The PR shows how to define a python callback function for Convolution operator.
+- Layout inference for `Batch Norm `_ - Batch normalization falls into the category of lightly-sensitive operator. The PR shows how to handle the layout inference for batch norm.
+- Python Callback for `Convolution `_- For highly-sensitive operators, one might have to do python callback as well. The PR shows how to define a python callback function for Convolution operator.
diff --git a/docs/dev/frontend/tensorflow.rst b/docs/dev/frontend/tensorflow.rst
index b234ed7b0466..dde7179d90db 100644
--- a/docs/dev/frontend/tensorflow.rst
+++ b/docs/dev/frontend/tensorflow.rst
@@ -57,7 +57,7 @@ Export
TensorFlow frontend expects a frozen protobuf (.pb) or saved model as input. It currently does not support checkpoint (.ckpt). The graphdef needed by the TensorFlow frontend can be extracted from the active session, or by using the `TFParser`_ helper class.
-.. _TFParser: https://github.com/apache/incubator-tvm/blob/main/python/tvm/relay/frontend/tensorflow_parser.py
+.. _TFParser: https://github.com/apache/tvm/blob/main/python/tvm/relay/frontend/tensorflow_parser.py
The model should be exported with a number of transformations to prepare the model for inference. It is also important to set ```add_shapes=True```, as this will embed the output shapes of each node into the graph. Here is one function to export a model as a protobuf given a session:
@@ -101,7 +101,7 @@ Import the Model
Explicit Shape:
~~~~~~~~~~~~~~~
-To ensure shapes can be known throughout the entire graph, pass the ```shape``` argument to ```from_tensorflow```. This dictionary maps input names to input shapes. Please refer to these `test cases `_ for examples.
+To ensure shapes can be known throughout the entire graph, pass the ```shape``` argument to ```from_tensorflow```. This dictionary maps input names to input shapes. Please refer to these `test cases `_ for examples.
Data Layout
~~~~~~~~~~~
diff --git a/docs/dev/inferbound.rst b/docs/dev/inferbound.rst
index 7d0127a6c039..010d0d42d37e 100644
--- a/docs/dev/inferbound.rst
+++ b/docs/dev/inferbound.rst
@@ -22,7 +22,7 @@ InferBound Pass
*******************************************
-The InferBound pass is run after normalize, and before ScheduleOps `build_module.py `_. The main job of InferBound is to create the bounds map, which specifies a Range for each IterVar in the program. These bounds are then passed to ScheduleOps, where they are used to set the extents of For loops, see `MakeLoopNest `_, and to set the sizes of allocated buffers (`BuildRealize `_), among other uses.
+The InferBound pass is run after normalize, and before ScheduleOps `build_module.py `_. The main job of InferBound is to create the bounds map, which specifies a Range for each IterVar in the program. These bounds are then passed to ScheduleOps, where they are used to set the extents of For loops, see `MakeLoopNest `_, and to set the sizes of allocated buffers (`BuildRealize `_), among other uses.
The output of InferBound is a map from IterVar to Range:
@@ -53,9 +53,9 @@ Therefore, let's review the Range and IterVar classes:
};
}
-Note that IterVarNode also contains a Range ``dom``. This ``dom`` may or may not have a meaningful value, depending on when the IterVar was created. For example, when ``tvm.compute`` is called, an `IterVar is created `_ for each axis and reduce axis, with dom's equal to the shape supplied in the call to ``tvm.compute``.
+Note that IterVarNode also contains a Range ``dom``. This ``dom`` may or may not have a meaningful value, depending on when the IterVar was created. For example, when ``tvm.compute`` is called, an `IterVar is created `_ for each axis and reduce axis, with dom's equal to the shape supplied in the call to ``tvm.compute``.
-On the other hand, when ``tvm.split`` is called, `IterVars are created `_ for the inner and outer axes, but these IterVars are not given a meaningful ``dom`` value.
+On the other hand, when ``tvm.split`` is called, `IterVars are created `_ for the inner and outer axes, but these IterVars are not given a meaningful ``dom`` value.
In any case, the ``dom`` member of an IterVar is never modified during InferBound. However, keep in mind that the ``dom`` member of an IterVar is sometimes used as default value for the Ranges InferBound computes.
@@ -117,7 +117,7 @@ Tensors haven't been mentioned yet, but in the context of TVM, a Tensor represen
int value_index;
};
-In the Operation class declaration above, we can see that each operation also has a list of InputTensors. Thus the stages of the schedule form a DAG, where each stage is a node in the graph. There is an edge in the graph from Stage A to Stage B, if the operation of Stage B has an input tensor whose source operation is the op of Stage A. Put simply, there is an edge from A to B, if B consumes a tensor produced by A. See the diagram below. This graph is created at the beginning of InferBound, by a call to `CreateReadGraph `_.
+In the Operation class declaration above, we can see that each operation also has a list of InputTensors. Thus the stages of the schedule form a DAG, where each stage is a node in the graph. There is an edge in the graph from Stage A to Stage B, if the operation of Stage B has an input tensor whose source operation is the op of Stage A. Put simply, there is an edge from A to B, if B consumes a tensor produced by A. See the diagram below. This graph is created at the beginning of InferBound, by a call to `CreateReadGraph `_.
.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/docs/inferbound/stage_graph.png
:align: center
diff --git a/docs/dev/introduction_to_module_serialization.rst b/docs/dev/introduction_to_module_serialization.rst
index 5451b84c9b8c..6b2f2addaf9a 100644
--- a/docs/dev/introduction_to_module_serialization.rst
+++ b/docs/dev/introduction_to_module_serialization.rst
@@ -32,7 +32,7 @@ Let us build one ResNet-18 workload for GPU as an example first.
from tvm import relay
from tvm.relay import testing
- from tvm.contrib import util
+ from tvm.contrib import utils
import tvm
# Resnet18 workload
@@ -43,7 +43,7 @@ Let us build one ResNet-18 workload for GPU as an example first.
_, resnet18_lib, _ = relay.build_module.build(resnet18_mod, "cuda", params=resnet18_params)
# create one tempory directory
- temp = util.tempdir()
+ temp = utils.tempdir()
# path lib
file_name = "deploy.so"
diff --git a/docs/dev/pass_infra.rst b/docs/dev/pass_infra.rst
index 1427608a4574..3680cb886952 100644
--- a/docs/dev/pass_infra.rst
+++ b/docs/dev/pass_infra.rst
@@ -276,12 +276,12 @@ order that they were appended to the pass list.
const PassContext& pass_ctx) const {
Module mod = module;
for (const Pass& pass : passes) {
- CHECK(pass.defined()) << "Found undefined pass for optimization.";
+ ICHECK(pass.defined()) << "Found undefined pass for optimization.";
const PassInfo& pass_info = pass->Info();
if (!PassEnabled(pass_info)) continue;
for (const auto& it : pass_info->required) {
const auto* name = it.as();
- CHECK(name);
+ ICHECK(name);
mod = GetPass(name->value)(mod, pass_ctx);
}
mod = pass(mod, pass_ctx);
@@ -306,7 +306,7 @@ pass is registered with an API endpoint as we will show later.
using tvm::runtime::Registry;
std::string fpass_name = "relay._transform." + pass_name;
const auto* f = Registry::Get(fpass_name);
- CHECK(f != nullptr) << "Cannot find " << fpass_name
+ ICHECK(f != nullptr) << "Cannot find " << fpass_name
<< "to create the pass " << pass_name;
return (*f)();
}
@@ -528,22 +528,22 @@ optimization pipeline and debug Relay and tir passes, please refer to the
.. _Sequential: https://pytorch.org/docs/stable/nn.html?highlight=sequential#torch.nn.Sequential
-.. _Block: https://mxnet.incubator.apache.org/api/python/docs/api/gluon/block.html#gluon-block
+.. _Block: https://mxnet.apache.org/api/python/docs/api/gluon/block.html#gluon-block
-.. _include/tvm/ir/transform.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/ir/transform.h
+.. _include/tvm/ir/transform.h: https://github.com/apache/tvm/blob/main/include/tvm/ir/transform.h
-.. _src/relay/ir/transform.cc: https://github.com/apache/incubator-tvm/blob/main/src/relay/ir/transform.cc
+.. _src/relay/ir/transform.cc: https://github.com/apache/tvm/blob/main/src/relay/ir/transform.cc
-.. _src/ir/transform.cc: https://github.com/apache/incubator-tvm/blob/main/src/ir/transform.cc
+.. _src/ir/transform.cc: https://github.com/apache/tvm/blob/main/src/ir/transform.cc
-.. _src/relay/pass/fold_constant.cc: https://github.com/apache/incubator-tvm/blob/main/src/relay/pass/fold_constant.cc
+.. _src/relay/pass/fold_constant.cc: https://github.com/apache/tvm/blob/main/src/relay/pass/fold_constant.cc
-.. _python/tvm/relay/transform.py: https://github.com/apache/incubator-tvm/blob/main/python/tvm/relay/transform.py
+.. _python/tvm/relay/transform.py: https://github.com/apache/tvm/blob/main/python/tvm/relay/transform.py
-.. _include/tvm/relay/transform.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/relay/transform.h
+.. _include/tvm/relay/transform.h: https://github.com/apache/tvm/blob/main/include/tvm/relay/transform.h
-.. _python/tvm/ir/transform.py: https://github.com/apache/incubator-tvm/blob/main/python/tvm/ir/transform.py
+.. _python/tvm/ir/transform.py: https://github.com/apache/tvm/blob/main/python/tvm/ir/transform.py
-.. _src/tir/transforms/unroll_loop.cc: https://github.com/apache/incubator-tvm/blob/main/src/tir/transforms/unroll_loop.cc
+.. _src/tir/transforms/unroll_loop.cc: https://github.com/apache/tvm/blob/main/src/tir/transforms/unroll_loop.cc
-.. _use pass infra: https://github.com/apache/incubator-tvm/blob/main/tutorials/dev/use_pass_infra.py
+.. _use pass infra: https://github.com/apache/tvm/blob/main/tutorials/dev/use_pass_infra.py
diff --git a/docs/dev/relay_add_op.rst b/docs/dev/relay_add_op.rst
index 7dca251dd532..0697939be162 100644
--- a/docs/dev/relay_add_op.rst
+++ b/docs/dev/relay_add_op.rst
@@ -231,7 +231,7 @@ Adding a Gradient in C++
Adding a gradient in C++ is similar to adding one in Python, but the
interface for registering is slightly different.
-First, make sure ``src/relay/pass/pattern_util.h`` is included. It provides
+First, make sure ``src/relay/pass/pattern_utils.h`` is included. It provides
helper functions for creating nodes in the Relay AST. Then, define the
gradient in a similar fashion as in the Python example:
diff --git a/docs/dev/relay_add_pass.rst b/docs/dev/relay_add_pass.rst
index 02c0ba2808ad..0661df0ae35a 100644
--- a/docs/dev/relay_add_pass.rst
+++ b/docs/dev/relay_add_pass.rst
@@ -399,8 +399,8 @@ information about the pass manager interface can be found in :ref:`pass-infra`.
Relay's standard passes are listed in `include/tvm/relay/transform.h`_ and implemented
in `src/relay/pass/`_.
-.. _include/tvm/relay/transform.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/relay/transform.h
+.. _include/tvm/relay/transform.h: https://github.com/apache/tvm/blob/main/include/tvm/relay/transform.h
-.. _src/relay/pass/: https://github.com/apache/incubator-tvm/tree/main/src/relay/pass
+.. _src/relay/pass/: https://github.com/apache/tvm/tree/main/src/relay/pass
-.. _src/relay/transforms/fold_constant.cc: https://github.com/apache/incubator-tvm/blob/main/src/relay/transforms/fold_constant.cc
+.. _src/relay/transforms/fold_constant.cc: https://github.com/apache/tvm/blob/main/src/relay/transforms/fold_constant.cc
diff --git a/docs/dev/relay_bring_your_own_codegen.rst b/docs/dev/relay_bring_your_own_codegen.rst
index f4ee58a6902b..3fcd3365c82f 100644
--- a/docs/dev/relay_bring_your_own_codegen.rst
+++ b/docs/dev/relay_bring_your_own_codegen.rst
@@ -137,7 +137,7 @@ Here we highlight the notes marked in the above code:
* **Note 3** is a TVM runtime compatible wrapper function. It accepts a list of input tensors and one output tensor (the last argument), casts them to the right data type, and invokes the subgraph function described in Note 2. In addition, ``TVM_DLL_EXPORT_TYPED_FUNC`` is a TVM macro that generates another function ``gcc_0`` with unified the function arguments by packing all tensors to ``TVMArgs``. As a result, the TVM runtime can directly invoke ``gcc_0`` to execute the subgraph without additional efforts. With the above code generated, TVM is able to compile it along with the rest parts of the graph and export a single library for deployment.
-In the rest of this section, we will implement a codegen step-by-step to generate the above code. Your own codegen has to be located at ``src/relay/backend/contrib//``. In our example, we name our codegen "codegen_c" and put it under `/src/relay/backend/contrib/codegen_c/ `_. Feel free to check this file for a complete implementation.
+In the rest of this section, we will implement a codegen step-by-step to generate the above code. Your own codegen has to be located at ``src/relay/backend/contrib//``. In our example, we name our codegen "codegen_c" and put it under `/src/relay/backend/contrib/codegen_c/ `_. Feel free to check this file for a complete implementation.
Specifically, we are going to implement two classes in this file and here is their relationship:
@@ -296,7 +296,7 @@ As mentioned in the previous step, in addition to the subgraph input and output
// This example only supports single output.
auto type_node = call->checked_type().as();
- CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32))
+ ICHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32))
<< "Only support single output tensor with float type";
// Generate a unique buffer name.
@@ -410,7 +410,7 @@ Implement GenCFunc
.. code-block:: c++
void GenCFunc(const Function& func) {
- CHECK(func.defined()) << "Input error: expect a Relay function.";
+ ICHECK(func.defined()) << "Input error: expect a Relay function.";
// Record the external symbol for runtime lookup.
auto sid = GetExtSymbol(func);
@@ -474,7 +474,7 @@ This function creates a runtime module for the external library. In this example
// Create a CSourceModule
const auto* pf = runtime::Registry::Get("module.csource_module_create");
- CHECK(pf != nullptr) << "Cannot find csource module to create the external runtime module";
+ ICHECK(pf != nullptr) << "Cannot find csource module to create the external runtime module";
return (*pf)(code_stream_.str(), "cc");
}
@@ -556,7 +556,7 @@ In this section, our goal is to implement the following customized TVM runtime m
ExampleJsonCodeGen codegen(ref);
std::string code = codegen.gen(); // Note 1
const auto* pf = runtime::Registry::Get("module.examplejson_module_create"); // Note 2
- CHECK(pf != nullptr) << "Cannot find ExampleJson module to create the external runtime module";
+ ICHECK(pf != nullptr) << "Cannot find ExampleJson module to create the external runtime module";
return (*pf)(code);
}
TVM_REGISTER_GLOBAL("relay.ext.examplejsoncompiler").set_body_typed(ExampleJsonCompiler);
@@ -785,7 +785,7 @@ After the construction, we should have the above class variables ready. We then
// Copy input tensors to corresponding data entries.
for (auto i = 0; i < args.size(); ++i) {
- CHECK(args[i].type_code() == kNDArrayContainer || args[i].type_code() == kArrayHandle)
+ ICHECK(args[i].type_code() == kNDArrayContainer || args[i].type_code() == kArrayHandle)
<< "Expect NDArray or DLTensor as inputs\n";
if (args[i].type_code() == kArrayHandle) {
DLTensor* arg = args[i];
@@ -800,7 +800,7 @@ After the construction, we should have the above class variables ready. We then
for (const auto& it : this->graph_[this->curr_subgraph_]) {
this->Run(it.id, it.inputs, it.output);
}
- CHECK_GT(graph_.count(this->curr_subgraph_), 0U);
+ ICHECK_GT(graph_.count(this->curr_subgraph_), 0U);
// Copy the output from a data entry back to TVM runtime argument.
auto out_idx = graph_[this->curr_subgraph_].back().output;
diff --git a/docs/dev/runtime.rst b/docs/dev/runtime.rst
index 91b19eee3230..c77b693f0749 100644
--- a/docs/dev/runtime.rst
+++ b/docs/dev/runtime.rst
@@ -45,7 +45,7 @@ PackedFunc
`PackedFunc`_ is a simple but elegant solution
we find to solve the challenges listed. The following code block provides an example in C++
-.. _PackedFunc: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/packed_func.h
+.. _PackedFunc: https://github.com/apache/tvm/blob/main/include/tvm/runtime/packed_func.h
.. code:: c
@@ -131,9 +131,9 @@ which allows us to embed the PackedFunc into any languages. Besides python, so f
`java`_ and `javascript`_.
This philosophy of embedded API is very like Lua, except that we don't have a new language but use C++.
-.. _minimum C API: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/c_runtime_api.h
-.. _java: https://github.com/apache/incubator-tvm/tree/main/jvm
-.. _javascript: https://github.com/apache/incubator-tvm/tree/main/web
+.. _minimum C API: https://github.com/apache/tvm/blob/main/include/tvm/runtime/c_runtime_api.h
+.. _java: https://github.com/apache/tvm/tree/main/jvm
+.. _javascript: https://github.com/apache/tvm/tree/main/web
One fun fact about PackedFunc is that we use it for both compiler and deployment stack.
@@ -141,7 +141,7 @@ One fun fact about PackedFunc is that we use it for both compiler and deployment
- All TVM's compiler pass functions are exposed to frontend as PackedFunc, see `here`_
- The compiled module also returns the compiled function as PackedFunc
-.. _here: https://github.com/apache/incubator-tvm/tree/main/src/api
+.. _here: https://github.com/apache/tvm/tree/main/src/api
To keep the runtime minimum, we isolated the IR Object support from the deployment runtime. The resulting runtime takes around 200K - 600K depending on how many runtime driver modules (e.g., CUDA) get included.
@@ -162,7 +162,7 @@ TVM defines the compiled object as `Module`_.
The user can get the compiled function from Module as PackedFunc.
The generated compiled code can dynamically get function from Module in runtime. It caches the function handle in the first call and reuses in subsequent calls. We use this to link device code and callback into any PackedFunc(e.g., python) from generated code.
-.. _Module: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/module.h
+.. _Module: https://github.com/apache/tvm/blob/main/include/tvm/runtime/module.h
The ModuleNode is an abstract class that can be implemented by each type of device.
So far we support modules for CUDA, Metal, OpenCL and loading dynamic shared libraries. This abstraction makes introduction
@@ -198,7 +198,7 @@ All the language object in the compiler stack is a subclass of ``Object``. Each
the type of object. We choose string instead of int as type key so new ``Object`` class can be added in the decentralized fashion without
adding the code back to the central repo. To ease the speed of dispatching, we allocate an integer type_index at runtime for each type_key.
-.. _Object: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/object.h
+.. _Object: https://github.com/apache/tvm/blob/main/include/tvm/runtime/object.h
Since usually one ``Object`` could be referenced in multiple places in the language, we use a shared_ptr to keep
track of reference. We use ``ObjectRef`` class to represent a reference to the ``Object``.
@@ -279,17 +279,17 @@ Each argument in PackedFunc contains a union value `TVMValue`_
and a type code. This design allows the dynamically typed language to convert to the corresponding type directly, and statically typed language to
do runtime type checking during conversion.
-.. _TVMValue: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/c_runtime_api.h#L122
+.. _TVMValue: https://github.com/apache/tvm/blob/main/include/tvm/runtime/c_runtime_api.h#L122
The relevant files are
- `packed_func.h`_ for C++ API
- `c_runtime_api.cc`_ for C API and how to provide callback.
-.. _packed_func.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/packed_func.h
-.. _c_runtime_api.cc: https://github.com/apache/incubator-tvm/blob/main/src/runtime/c_runtime_api.cc#L262
+.. _packed_func.h: https://github.com/apache/tvm/blob/main/include/tvm/runtime/packed_func.h
+.. _c_runtime_api.cc: https://github.com/apache/tvm/blob/main/src/runtime/c_runtime_api.cc#L262
To support extension types, we used a registry system to register type related information, like support of any
in C++, see `Extension types`_ for more details.
-.. _Extension types: https://github.com/apache/incubator-tvm/tree/main/apps/extension
+.. _Extension types: https://github.com/apache/tvm/tree/main/apps/extension
diff --git a/docs/dev/virtual_machine.rst b/docs/dev/virtual_machine.rst
index 0986328811dc..9081d50b92ef 100644
--- a/docs/dev/virtual_machine.rst
+++ b/docs/dev/virtual_machine.rst
@@ -278,11 +278,11 @@ to represent tensor, tuple/list, and closure data, respectively. More details
for each of them can be found at `include/tvm/runtime/ndarray.h`_,
`include/tvm/runtime/vm/vm.h`_, and `include/tvm/runtime/container.h`_, respectively.
-.. _include/tvm/runtime/ndarray.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/ndarray.h
+.. _include/tvm/runtime/ndarray.h: https://github.com/apache/tvm/blob/main/include/tvm/runtime/ndarray.h
-.. _include/tvm/runtime/vm/vm.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/vm/vm.h
+.. _include/tvm/runtime/vm/vm.h: https://github.com/apache/tvm/blob/main/include/tvm/runtime/vm/vm.h
-.. _include/tvm/runtime/container.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/container.h
+.. _include/tvm/runtime/container.h: https://github.com/apache/tvm/blob/main/include/tvm/runtime/container.h
Stack and State
~~~~~~~~~~~~~~~
@@ -326,7 +326,7 @@ The functions contain metadata about the function as well as its compiled byteco
object then can be loaded and run by a ``tvm::relay::vm::VirtualMachine`` object. For full definitions of the
data structures, please see `include/tvm/runtime/vm/executable.h`_ and `include/tvm/runtime/vm/vm.h`_.
-.. _include/tvm/runtime/vm/executable.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/vm/executable.h
+.. _include/tvm/runtime/vm/executable.h: https://github.com/apache/tvm/blob/main/include/tvm/runtime/vm/executable.h
Optimizations
~~~~~~~~~~~~~
@@ -343,11 +343,11 @@ Optimizations marked with `TODO` are not implemented yet.
- Tail Call Optimization (TODO)
- Liveness Analysis (TODO)
-.. _src/relay/vm/lambda_lift.cc: https://github.com/apache/incubator-tvm/blob/main/src/relay/backend/vm/lambda_lift.cc
+.. _src/relay/vm/lambda_lift.cc: https://github.com/apache/tvm/blob/main/src/relay/backend/vm/lambda_lift.cc
-.. _src/relay/vm/inline_primitives.cc: https://github.com/apache/incubator-tvm/blob/main/src/relay/backend/vm/inline_primitives.cc
+.. _src/relay/vm/inline_primitives.cc: https://github.com/apache/tvm/blob/main/src/relay/backend/vm/inline_primitives.cc
-.. _src/relay/backend/vm/compiler.cc: https://github.com/apache/incubator-tvm/blob/main/src/relay/backend/vm/compiler.cc
+.. _src/relay/backend/vm/compiler.cc: https://github.com/apache/tvm/blob/main/src/relay/backend/vm/compiler.cc
Serialization
~~~~~~~~~~~~~
@@ -386,7 +386,7 @@ load the serialized kernel binary and executable related binary code, which will
instantiate a VM object. Please refer to the `test_vm_serialization.py`_ file for more
examples.
-.. _test_vm_serialization.py: https://github.com/apache/incubator-tvm/blob/main/tests/python/relay/test_vm_serialization.py
+.. _test_vm_serialization.py: https://github.com/apache/tvm/blob/main/tests/python/relay/test_vm_serialization.py
Unresolved Questions
~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/index.rst b/docs/index.rst
index 18b2da7fc387..f407fa2d4f29 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -25,7 +25,7 @@ Get Started
-----------
- Follow the :doc:`instructions ` to install TVM.
-- Checkout the :doc:`Tutorials `.
+- Checkout the :doc:`tutorials `.
For Developers
--------------
diff --git a/docs/install/docker.rst b/docs/install/docker.rst
index 243e438b6d0c..768cad2057f8 100644
--- a/docs/install/docker.rst
+++ b/docs/install/docker.rst
@@ -28,7 +28,7 @@ Get a tvm source distribution or clone the github repo to get the auxiliary scri
.. code:: bash
- git clone --recursive https://github.com/apache/incubator-tvm tvm
+ git clone --recursive https://github.com/apache/tvm tvm
We can then use the following command to launch a docker image.
@@ -67,7 +67,7 @@ with ``localhost`` when pasting it into browser.
Docker Source
-------------
-Check out `The docker source `_ if you are interested in
+Check out `The docker source `_ if you are interested in
building your own docker images.
diff --git a/docs/install/from_source.rst b/docs/install/from_source.rst
index 2bb6e551b1a0..3cf0a78f244f 100644
--- a/docs/install/from_source.rst
+++ b/docs/install/from_source.rst
@@ -34,7 +34,7 @@ It is important to clone the submodules along, with ``--recursive`` option.
.. code:: bash
- git clone --recursive https://github.com/apache/incubator-tvm tvm
+ git clone --recursive https://github.com/apache/tvm tvm
For windows users who use github tools, you can open the git shell, and type the following command.
@@ -90,7 +90,7 @@ The configuration of TVM can be modified by `config.cmake`.
you want to build for (OpenCL, RCOM, METAL, VULKAN, ...).
- To help with debugging, ensure the embedded graph runtime and debugging functions are enabled with ``set(USE_GRAPH_RUNTIME ON)`` and ``set(USE_GRAPH_RUNTIME_DEBUG ON)``
-- TVM optionally depends on LLVM. LLVM is required for CPU codegen that needs LLVM.
+- TVM requires LLVM for for CPU codegen. We highly recommend you to build with the LLVM support on.
- LLVM 4.0 or higher is needed for build with LLVM. Note that version of LLVM from default apt may lower than 4.0.
- Since LLVM takes long time to build from source, you can download pre-built version of LLVM from
@@ -102,7 +102,7 @@ The configuration of TVM can be modified by `config.cmake`.
- You can also use `LLVM Nightly Ubuntu Build `_
- Note that apt-package append ``llvm-config`` with version number.
- For example, set ``set(LLVM_CONFIG llvm-config-4.0)`` if you installed 4.0 package
+ For example, set ``set(USE_LLVM llvm-config-10)`` if you installed LLVM 10 package
- We can then build tvm and related libraries.
@@ -122,27 +122,58 @@ The configuration of TVM can be modified by `config.cmake`.
If everything goes well, we can go to :ref:`python-package-installation`
+.. _build-with-conda:
+
+Building with a Conda Environment
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Conda is a very handy way to the necessary obtain dependencies needed for running TVM.
+First, follow the `conda's installation guide `_
+to install miniconda or anaconda if you do not yet have conda in your system. Run the following command in a conda environment:
+
+.. code:: bash
+
+ # Create a conda environment with the dependencies specified by the yaml
+ conda env create --file conda/build-environment.yaml
+ # Activate the created environment
+ conda activate tvm-build
+
+The above command will install all necessary build dependencies such as cmake and LLVM. You can then run the standard build process in the last section.
+
+If you want to use the compiled binary outside the conda environment,
+you can set LLVM to static linking mode ``set(USE_LLVM "llvm-config --link-static")``.
+In this way, the resulting library won't depend on the dynamic LLVM libraries in the conda environment.
+
+The above instructions show how to use conda to provide the necessary build dependencies to build libtvm.
+If you are already using conda as your package manager and wish to directly build and install tvm as a conda package, you can follow the instructions below:
+
+.. code:: bash
+
+ conda build --output-folder=conda/pkg conda/recipe
+ # Run conda/build_cuda.sh to build with cuda enabled
+ conda install tvm -c ./conda/pkg
+
Building on Windows
~~~~~~~~~~~~~~~~~~~
-
-TVM support build via MSVC using cmake. The minimum required VS version is **Visual Studio Community 2015 Update 3**.
-In order to generate the VS solution file using cmake, make sure you have a recent version of cmake added to your path and then from the TVM directory:
+TVM support build via MSVC using cmake. You will need to ontain a visual studio compiler.
+The minimum required VS version is **Visual Studio Community 2015 Update 3**.
+We recommend following :ref:`build-with-conda` to obtain necessary dependencies and
+get an activated tvm-build environment. Then you can run the following command to build
.. code:: bash
- mkdir build
- cd build
- cmake -G "Visual Studio 14 2015 Win64" -DCMAKE_BUILD_TYPE=Release -DCMAKE_CONFIGURATION_TYPES="Release" ..
+ mkdir build
+ cd build
+ cmake -A x64 -Thost=x64 ..
+ cd ..
-Starting with Visual Studio 2019 the architecture is specified differently so use this command
+The above command generates the solution file under the build directory.
+You can then run the following command to build
.. code:: bash
- cmake -G "Visual Studio 16 2019" -A x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_CONFIGURATION_TYPES="Release" ..
+ cmake --build build --config Release -- /m
-This will generate the VS project using the MSVC 64 bit generator.
-Open the .sln file in the build directory and build with Visual Studio.
-In order to build with LLVM in windows, you will need to build LLVM from source.
Building ROCm support
~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/install/nnpack.rst b/docs/install/nnpack.rst
index 10497ba05654..2afd95a5ef3f 100644
--- a/docs/install/nnpack.rst
+++ b/docs/install/nnpack.rst
@@ -105,7 +105,7 @@ Build TVM with NNPACK support
.. code:: bash
- git clone --recursive https://github.com/apache/incubator-tvm tvm
+ git clone --recursive https://github.com/apache/tvm tvm
- Set `set(USE_NNPACK ON)` in config.cmake.
- Set `NNPACK_PATH` to the $(YOUR_NNPACK_INSTALL_PATH)
diff --git a/docs/langref/relay_adt.rst b/docs/langref/relay_adt.rst
index a53c7515c62a..dab2e3e70678 100644
--- a/docs/langref/relay_adt.rst
+++ b/docs/langref/relay_adt.rst
@@ -387,7 +387,7 @@ The following left fold flattens a list of lists (using concatenation):
Note that these iteration constructs can be implemented directly in Relay's
source language and more can easily be defined (and for more data types, like trees),
rather than being constructs built into the language (e.g.,
-`"foreach" in MXNet `__).
+`"foreach" in MXNet `__).
ADTs and their extensibility allow for a broad range of iterations and data structures to be expressed
in Relay and supported by the type system without having to modify the language implementation.
diff --git a/docs/langref/relay_pattern.rst b/docs/langref/relay_pattern.rst
index 17282e142b2a..8b34b7619840 100644
--- a/docs/langref/relay_pattern.rst
+++ b/docs/langref/relay_pattern.rst
@@ -35,7 +35,7 @@ There are quite a few properties of operators that are worth matching. Below we
demonstrates how to write patterns. It is recommended to check `tests/python/relay/test_dataflow_pattern.py`_
for more use cases.
-.. _tests/python/relay/test_dataflow_pattern.py: https://github.com/apache/incubator-tvm/blob/main/tests/python/relay/test_dataflow_pattern.py
+.. _tests/python/relay/test_dataflow_pattern.py: https://github.com/apache/tvm/blob/main/tests/python/relay/test_dataflow_pattern.py
.. note::
diff --git a/docs/vta/dev/hardware.rst b/docs/vta/dev/hardware.rst
index c8d543330728..1e3c0acdb185 100644
--- a/docs/vta/dev/hardware.rst
+++ b/docs/vta/dev/hardware.rst
@@ -36,7 +36,7 @@ In addition the design adopts decoupled access-execute to hide memory access lat
To a broader extent, VTA can serve as a template deep learning accelerator design for full stack optimization, exposing a generic tensor computation interface to the compiler stack.
-.. image:: https://raw.githubusercontent.com/uwsaml/web-data/main/vta/blogpost/vta_overview.png
+.. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/blogpost/vta_overview.png
:align: center
:width: 80%
@@ -175,7 +175,7 @@ Finally, the ``STORE`` instructions are executed by the store module exclusively
The fields of each instruction is described in the figure below.
The meaning of each field will be further explained in the :ref:`vta-uarch` section.
-.. image:: https://raw.githubusercontent.com/uwsaml/web-data/main/vta/developer/vta_instructions.png
+.. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/developer/vta_instructions.png
:align: center
:width: 100%
@@ -191,7 +191,7 @@ VTA relies on dependence FIFO queues between hardware modules to synchronize the
The figure below shows how a given hardware module can execute concurrently from its producer and consumer modules in a dataflow fashion through the use of dependence FIFO queues, and single-reader/single-writer SRAM buffers.
Each module is connected to its consumer and producer via read-after-write (RAW) and write-after-read (WAR) dependence queues.
-.. image:: https://raw.githubusercontent.com/uwsaml/web-data/main/vta/developer/dataflow.png
+.. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/developer/dataflow.png
:align: center
:width: 100%
@@ -258,7 +258,7 @@ There are two types of compute micro-ops: ALU and GEMM operations.
To minimize the footprint of micro-op kernels, while avoiding the need for control-flow instructions such as conditional jumps, the compute module executes micro-op sequences inside a two-level nested loop that computes the location of each tensor register location via an affine function.
This compression approach helps reduce the micro-kernel instruction footprint, and applies to both matrix multiplication and 2D convolution, commonly found in neural network operators.
-.. image:: https://raw.githubusercontent.com/uwsaml/web-data/main/vta/developer/gemm_core.png
+.. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/developer/gemm_core.png
:align: center
:width: 100%
@@ -269,7 +269,7 @@ This tensorization intrinsic is defined by the dimensions of the input, weight a
Each data type can have a different integer precision: typically both weight and input types are low-precision (8-bits or less), while the accumulator tensor has a wider type to prevent overflows (32-bits).
In order to keep the GEMM core busy, each of the input buffer, weight buffer, and register file have to expose sufficient read/write bandwidth.
-.. image:: https://raw.githubusercontent.com/uwsaml/web-data/main/vta/developer/alu_core.png
+.. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/developer/alu_core.png
:align: center
:width: 100%
@@ -289,7 +289,7 @@ The micro-code in the context of tensor ALU computation only takes care of speci
Load and Store Modules
~~~~~~~~~~~~~~~~~~~~~~
-.. image:: https://raw.githubusercontent.com/uwsaml/web-data/main/vta/developer/2d_dma.png
+.. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/developer/2d_dma.png
:align: center
:width: 100%
diff --git a/docs/vta/dev/index.rst b/docs/vta/dev/index.rst
index d95f6e23d90d..2b715740ed29 100644
--- a/docs/vta/dev/index.rst
+++ b/docs/vta/dev/index.rst
@@ -20,7 +20,7 @@ VTA Design and Developer Guide
This developer guide details the complete VTA-TVM hardware-software stack.
-.. image:: https://raw.githubusercontent.com/uwsaml/web-data/main/vta/blogpost/vta_stack.png
+.. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/blogpost/vta_stack.png
:align: center
:width: 60%
diff --git a/docs/vta/install.rst b/docs/vta/install.rst
index 4cd1ee93a6e6..2248975b61b1 100644
--- a/docs/vta/install.rst
+++ b/docs/vta/install.rst
@@ -135,7 +135,7 @@ Because the direct board-to-computer connection prevents the board from directly
mkdir
sshfs xilinx@192.168.2.99:/home/xilinx
cd
- git clone --recursive https://github.com/apache/incubator-tvm tvm
+ git clone --recursive https://github.com/apache/tvm tvm
# When finished, you can leave the moutpoint and unmount the directory
cd ~
sudo umount
@@ -202,7 +202,7 @@ This time again, we will run the 2D convolution testbench.
Beforehand, we need to program the Pynq board FPGA with a VTA bitstream, and build the VTA runtime via RPC.
The following ``test_program_rpc.py`` script will perform two operations:
-* FPGA programming, by downloading a pre-compiled bitstream from a `VTA bitstream repository `_ that matches the default ``vta_config.json`` configuration set by the host, and sending it over to the Pynq via RPC to program the Pynq's FPGA.
+* FPGA programming, by downloading a pre-compiled bitstream from a `VTA bitstream repository `_ that matches the default ``vta_config.json`` configuration set by the host, and sending it over to the Pynq via RPC to program the Pynq's FPGA.
* Runtime building on the Pynq, which needs to be run every time the ``vta_config.json`` configuration is modified. This ensures that the VTA software runtime that generates the accelerator's executable via just-in-time (JIT) compilation matches the specifications of the VTA design that is programmed on the FPGA. The build process takes about 30 seconds to complete so be patient!
.. code:: bash
@@ -466,7 +466,7 @@ This would add quartus binary path into your ``PATH`` environment variable, so y
Chisel-based Custom VTA Bitstream Compilation for DE10-Nano
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Similar to the HLS-based design, high-level hardware parameters in Chisel-based design are listed in the VTA configuration file `Configs.scala `_, and they can be customized by the user.
+Similar to the HLS-based design, high-level hardware parameters in Chisel-based design are listed in the VTA configuration file `Configs.scala `_, and they can be customized by the user.
For Intel FPGA, bitstream generation is driven by a top-level ``Makefile`` under ``/3rdparty/vta-hw/hardware/intel``.
diff --git a/golang/sample/deploy.py b/golang/sample/deploy.py
index a0553cfe0211..98820195511c 100644
--- a/golang/sample/deploy.py
+++ b/golang/sample/deploy.py
@@ -51,7 +51,7 @@
# Save Compiled Module
# --------------------
from tvm.contrib import cc
-from tvm.contrib import util
+from tvm.contrib import utils
fadd.save("deploy.o")
cc.create_shared("deploy.so", ["deploy.o"])
diff --git a/golang/src/tvm_runtime_pack.cc b/golang/src/tvm_runtime_pack.cc
index 644249fa75c9..7dd6dd5e94c5 100644
--- a/golang/src/tvm_runtime_pack.cc
+++ b/golang/src/tvm_runtime_pack.cc
@@ -23,7 +23,7 @@
*/
#include "src/runtime/c_runtime_api.cc"
#include "src/runtime/cpu_device_api.cc"
-#include "src/runtime/file_util.cc"
+#include "src/runtime/file_utils.cc"
#include "src/runtime/library_module.cc"
#include "src/runtime/module.cc"
#include "src/runtime/ndarray.cc"
diff --git a/golang/src/util.go b/golang/src/utils.go
similarity index 98%
rename from golang/src/util.go
rename to golang/src/utils.go
index d3846d1db452..2da4138a1e66 100644
--- a/golang/src/util.go
+++ b/golang/src/utils.go
@@ -19,7 +19,7 @@
/*!
* \brief gotvm package source for common utilities
- * \file util.go
+ * \file utils.go
*/
package gotvm
diff --git a/include/tvm/arith/analyzer.h b/include/tvm/arith/analyzer.h
index a9a0bed6712a..cd20bdcf4d1a 100644
--- a/include/tvm/arith/analyzer.h
+++ b/include/tvm/arith/analyzer.h
@@ -320,10 +320,10 @@ class CanonicalSimplifier {
* arith::Analyzer analyzer;
* {
* With scope(&analyzer, x % 3 == 0);
- * CHECK_EQ(analyzer.modular_set(x)->coeff, 3);
+ * ICHECK_EQ(analyzer.modular_set(x)->coeff, 3);
* }
* // constraint no longer in effect.
- * CHECK_NE(analyzer.modular_set(x)->coeff, 3);
+ * ICHECK_NE(analyzer.modular_set(x)->coeff, 3);
*
* \endcode
*/
diff --git a/include/tvm/arith/iter_affine_map.h b/include/tvm/arith/iter_affine_map.h
new file mode 100644
index 000000000000..e2e081d2be89
--- /dev/null
+++ b/include/tvm/arith/iter_affine_map.h
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tvm/arith/iter_affine_map.h
+ * \brief Iterator quasi-affine mapping patterns.
+ *
+ * This file defines a collection of mapping patterns
+ * maps a collection of independent iterators to another
+ * collection of independent iterators.
+ *
+ * There are two main kinds of mapping patterns:
+ *
+ * - Fuse: fuse a collection of iterators into a single one
+ *
+ * domain(x0) = [0, 4), domain(x1) = [0, 3), domain(x2) = [0, 2)
+ * fuse(x0, x1, x2): y = x2 * 12 + x1 * 4 + x0
+ * domain(y) = [0, 24)
+ *
+ * - Split: split an iterator into multiple ones
+ *
+ * domain(x) = [0, 24)
+ * split(x, 3, 12): [y0, y1, y2] = [x % 3, (x % 12) / 3, x / 12]
+ * domain(y0) = [0, 3), domain(y1) = [0, 4), domain(y2) = [0, 2)
+ *
+ * We use the name "(quasi)affine" to be consistent with
+ * the terminology used in the polyhedral compilation.
+ * Notably, fuse is an affine transformation,
+ * while split corresponds to additional floordiv/mod operations
+ * that can appear in quasi-affine transformations.
+ */
+#ifndef TVM_ARITH_ITER_AFFINE_MAP_H_
+#define TVM_ARITH_ITER_AFFINE_MAP_H_
+
+#include
+#include
+#include
+
+namespace tvm {
+namespace arith {
+
+/*!
+ * \brief Base class of all iter map expressions.
+ *
+ * An IterMapExpr is a special expression to store
+ * the result of IterMapDetection.
+ * It should not appear in a legal TIR PrimFunc.
+ */
+class IterMapExprNode : public PrimExprNode {
+ public:
+ // overrides
+ void VisitAttrs(tvm::AttrVisitor* v) {}
+
+ static constexpr const char* _type_key = "arith.IterMapExpr";
+ static constexpr const uint32_t _type_child_slots = 3;
+ TVM_DECLARE_BASE_OBJECT_INFO(IterMapExprNode, PrimExprNode);
+};
+
+/*!
+ * \brief Managed reference to IterMapExprNode.
+ * \sa IterMapExprNode
+ */
+class IterMapExpr : public PrimExpr {
+ public:
+ TVM_DEFINE_OBJECT_REF_METHODS(IterMapExpr, PrimExpr, IterMapExprNode);
+};
+
+/*!
+ * \brief Mark the source as an iterator in [0, extent).
+ *
+ * IterMark is used to mark source expression as a valid
+ * iterator to make future analysis easy.
+ */
+class IterMarkNode : public Object {
+ public:
+ /*!
+ * \brief The source expression, can either be
+ * a IterSumExpr or a Var.
+ */
+ PrimExpr source;
+ /*!
+ * \brief The extent of the iteration.
+ */
+ PrimExpr extent;
+
+ // overrides
+ void VisitAttrs(tvm::AttrVisitor* v) {
+ v->Visit("source", &source);
+ v->Visit("extent", &extent);
+ }
+
+ bool SEqualReduce(const IterMarkNode* other, SEqualReducer equal) const {
+ equal->MarkGraphNode();
+ return equal(source, other->source) && equal(extent, other->extent);
+ }
+
+ void SHashReduce(SHashReducer hash_reduce) const {
+ hash_reduce->MarkGraphNode();
+ hash_reduce(source);
+ hash_reduce(extent);
+ }
+
+ static constexpr const bool _type_has_method_sequal_reduce = true;
+ static constexpr const bool _type_has_method_shash_reduce = true;
+ static constexpr const char* _type_key = "arith.IterMark";
+ TVM_DECLARE_FINAL_OBJECT_INFO(IterMarkNode, Object);
+};
+
+/*!
+ * \brief Managed reference to IterMarkExprNode.
+ * \sa IterMarkExprNode
+ */
+class IterMark : public ObjectRef {
+ public:
+ /*!
+ * \brief constructor.
+ * \param source The source expression.
+ * \param extent The extent of the iterator.
+ */
+ TVM_DLL IterMark(PrimExpr source, PrimExpr extent);
+
+ TVM_DEFINE_OBJECT_REF_METHODS(IterMark, ObjectRef, IterMarkNode);
+};
+
+/*!
+ * \brief Split of an iterator.
+ *
+ * result = floormod(floordiv(source, lower_factor), extent) * scale
+ */
+class IterSplitExprNode : public IterMapExprNode {
+ public:
+ /*! \brief The source marked iterator. */
+ IterMark source;
+ /*! \brief The lower factor to split the source. */
+ PrimExpr lower_factor;
+ /*! \brief The extent of the split. */
+ PrimExpr extent;
+ /*! \brief Additional scale. */
+ PrimExpr scale;
+
+ // overrides
+ void VisitAttrs(tvm::AttrVisitor* v) {
+ v->Visit("source", &source);
+ v->Visit("lower_factor", &lower_factor);
+ v->Visit("extent", &extent);
+ v->Visit("scale", &scale);
+ }
+
+ bool SEqualReduce(const IterSplitExprNode* other, SEqualReducer equal) const {
+ return equal(source, other->source) && equal(lower_factor, other->lower_factor) &&
+ equal(extent, other->extent) && equal(scale, other->scale);
+ }
+
+ void SHashReduce(SHashReducer hash_reduce) const {
+ hash_reduce(source);
+ hash_reduce(lower_factor);
+ hash_reduce(extent);
+ hash_reduce(scale);
+ }
+
+ static constexpr const char* _type_key = "arith.IterSplitExpr";
+ TVM_DECLARE_FINAL_OBJECT_INFO(IterSplitExprNode, IterMapExprNode);
+};
+
+/*!
+ * \brief Managed reference to IterSplitExprNode.
+ * \sa IterSplitExprNode
+ */
+class IterSplitExpr : public IterMapExpr {
+ public:
+ /*!
+ * \brief constructor from just source.
+ * \param source The source expression.
+ */
+ TVM_DLL explicit IterSplitExpr(IterMark source);
+ /*!
+ * \brief constructor from just source.
+ * \param source The source expression.
+ * \param scale The additional scaling factor.
+ */
+ TVM_DLL explicit IterSplitExpr(IterMark source, PrimExpr scale);
+ /*!
+ * \brief constructor
+ * \param source The source expression.
+ * \param lower_factor The lower factor to split the source.
+ * \param extent The extent of the split.
+ * \param scale The additional scaling factor.
+ */
+ TVM_DLL explicit IterSplitExpr(IterMark source, PrimExpr lower_factor, PrimExpr extent,
+ PrimExpr scale);
+
+ TVM_DEFINE_OBJECT_REF_METHODS(IterSplitExpr, IterMapExpr, IterSplitExprNode);
+ TVM_DEFINE_OBJECT_REF_COW_METHOD(IterSplitExprNode);
+};
+
+/*!
+ * \brief Fuse multiple iterators by summing them with scaling.
+ *
+ * result = sum(args) + base
+ */
+class IterSumExprNode : public IterMapExprNode {
+ public:
+ /*! \brief The args to the sum. */
+ Array args;
+ /*! \brief The base offset. */
+ PrimExpr base;
+
+ // overrides
+ void VisitAttrs(tvm::AttrVisitor* v) {
+ v->Visit("args", &args);
+ v->Visit("base", &base);
+ }
+
+ bool SEqualReduce(const IterSumExprNode* other, SEqualReducer equal) const {
+ return equal(args, other->args) && equal(base, other->base);
+ }
+
+ void SHashReduce(SHashReducer hash_reduce) const {
+ hash_reduce(args);
+ hash_reduce(base);
+ }
+
+ static constexpr const char* _type_key = "arith.IterSumExpr";
+ TVM_DECLARE_FINAL_OBJECT_INFO(IterSumExprNode, IterMapExprNode);
+};
+
+/*!
+ * \brief Managed reference to IterSumExprNode.
+ * \sa IterSumExprNode
+ */
+class IterSumExpr : public IterMapExpr {
+ public:
+ /*!
+ * \brief constructor.
+ * \param args The args to the sum.
+ * \param base The base offset.
+ */
+ TVM_DLL IterSumExpr(Array args, PrimExpr base);
+
+ TVM_DEFINE_OBJECT_REF_METHODS(IterSumExpr, IterMapExpr, IterSumExprNode);
+ TVM_DEFINE_OBJECT_REF_COW_METHOD(IterSumExprNode);
+};
+
+/*!
+ * \brief Detect if indices can be written as
+ *
+ * [y_0 + c_0, y_1 + c_1, ..., y_n + c_n]
+ *
+ * Here y = some-quasi-affine-iter-map(input_iters)
+ * and c are symbolic constants.
+ *
+ * We also requires that y_i and y_j to be independent for i != j.
+ *
+ * For returned value rv, the following is always true:
+ * - rv[i]->args.size() <=1: only one iterator per element.
+ *
+ * \param indices The indices to detect pattern for.
+ * \param input_iters Map from variable to iterator's range.
+ * \param analyzer Analyzer used to get context information.
+ *
+ * \return The detected pattern if a match exists,
+ * otherwise return an empty array.
+ */
+Array DetectIterMap(const Array& indices, const Map& input_iters,
+ arith::Analyzer* analyzer);
+
+} // namespace arith
+} // namespace tvm
+#endif // TVM_ARITH_ITER_AFFINE_MAP_H_
diff --git a/include/tvm/auto_scheduler/compute_dag.h b/include/tvm/auto_scheduler/compute_dag.h
index 553008a7fcbf..b9306c64b0b5 100755
--- a/include/tvm/auto_scheduler/compute_dag.h
+++ b/include/tvm/auto_scheduler/compute_dag.h
@@ -194,23 +194,48 @@ class ComputeDAGNode : public Object {
TVM_DECLARE_FINAL_OBJECT_INFO(ComputeDAGNode, Object);
};
+/*!
+ * \brief Options for applying layout rewrite.
+ * This is an optimization to rewrite the layout of input tensors according to the schedule we get.
+ */
+enum class LayoutRewriteOption : int {
+ /*! \brief Do not perform layout rewrite. */
+ NoRewrite = 0,
+ /*! \brief Insert layout transformation stages for input placeholders in the compute DAG */
+ InsertTransformStage = 1,
+ /*!
+ * \brief Do not insert layout transformation stages and assume the input placeholders
+ * are pre-transformed.
+ * \note The lowered function with this option does not accept the origial input shapes,
+ * so this option must be used along with `AutoSchedulerLayoutRewrite` pass in Relay.
+ */
+ RewriteForPreTransformed = 2,
+};
+
/*!
* \brief Managed reference to ComputeDAGNode.
* \sa ComputeDAGNode
*/
class ComputeDAG : public ObjectRef {
public:
- /*! \brief The constructor.
+ /*! \brief Construct a DAG from a list of output tensors.
* \param tensors `te::Tensor`s for a compute declaration.
*/
TVM_DLL explicit ComputeDAG(Array tensors);
+ /*! \brief Construct a DAG based on a schedule.
+ * \param sch `te::Schedule`s for a compute declaration.
+ */
+ TVM_DLL explicit ComputeDAG(const te::Schedule& sch);
+
/*!
* \brief Rewrite the layout of placeholder specified by attr `layout_free_placeholders`
* according to the loop nest derived with `transform_steps`.
* \param transform_steps Transform steps of a state.
+ * \param layout_rewrite Different options in layout rewrite.
+ * \return The updated ComputeDAG after layout rewrite.
*/
- void RewriteLayout(const Array& transform_steps);
+ ComputeDAG RewriteLayout(Array* transform_steps, LayoutRewriteOption layout_rewrite) const;
/*!
* \brief Apply the history transform steps to get a TVM schedule.
@@ -220,14 +245,14 @@ class ComputeDAG : public ObjectRef {
* \param stage_to_axes The map that stores all axes for one stage.
* Pass a valid pointer if this information needs to be used outside this function.
* \param layout_rewrite Rewrite the layout of placeholders specified by
- * attr `layout_free_placeholders`
+ * attr `layout_free_placeholders`.
* \return A `te.schedule` and the an Array of `te.Tensor` to be used in `tvm.lower`
* or `tvm.build`.
*/
- std::pair> ApplySteps(const Array& transform_steps,
- Array* stages = nullptr,
- StageToAxesMap* stage_to_axes = nullptr,
- bool layout_rewrite = false) const;
+ std::pair> ApplySteps(
+ const Array& transform_steps, Array* stages = nullptr,
+ StageToAxesMap* stage_to_axes = nullptr,
+ LayoutRewriteOption layout_rewrite = LayoutRewriteOption::NoRewrite) const;
/*!
* \brief Print transform steps as equivalent python schedule API.
diff --git a/include/tvm/auto_scheduler/measure.h b/include/tvm/auto_scheduler/measure.h
index 349f4f8c7d51..e8c01e84f289 100755
--- a/include/tvm/auto_scheduler/measure.h
+++ b/include/tvm/auto_scheduler/measure.h
@@ -43,6 +43,7 @@
#include
#include
+#include
#include
namespace tvm {
@@ -423,7 +424,7 @@ class RPCRunner : public ProgramRunner {
/*!
* \brief Measurer that measures the time costs of tvm programs
- * This class combines ProgramBuilder and ProgramRunner and provides a simpler API */
+ * This class combines ProgramBuilder and ProgramRunner, and provides a simpler API */
class ProgramMeasurerNode : public Object {
public:
/*! \brief Measured programs counter. */
@@ -436,6 +437,8 @@ class ProgramMeasurerNode : public Object {
std::unordered_map best_state;
/*! \brief Workload key to best state's count index map. */
std::unordered_map best_ct;
+ /*! \brief The set of workloads that have at least one valid schedule */
+ std::unordered_set has_valid;
/*! \brief The ProgramBuilder to build each program. */
ProgramBuilder builder;
/*! \brief The ProgramRunner to measure each program. */
@@ -444,7 +447,7 @@ class ProgramMeasurerNode : public Object {
Optional> callbacks;
/*! \brief Verbosity level. 0 for silent, 1 to output information during program measuring. */
int verbose;
- /*! \brief The number of max continuous error. */
+ /*! \brief The number of allowed maximum continuous error before forcely stopping the tuning */
int max_continuous_error;
/*! \brief Reset book keeping variables */
@@ -454,13 +457,12 @@ class ProgramMeasurerNode : public Object {
* \brief Do measurement.
* \param task The current SearchTask.
* \param policy The current SearchPolicy.
- * \param inputs The MeasureInputs.
- * \param results A pointer to a MeasureResult Array, this is used as output.
+ * \param inputs The inputs of measurement.
* \param batch_size Number of programs to be measured in one batch.
+ * \return results The results of measurement.
*/
- void Measure(const SearchTask& task, const SearchPolicy& policy,
- const Array& inputs, Array* results,
- int batch_size = -1);
+ Array Measure(const SearchTask& task, const SearchPolicy& policy,
+ const Array& inputs, int batch_size = -1);
/*!
* \brief Do measurement silently.
* This API will not print the measure results to screen.
@@ -486,12 +488,13 @@ class ProgramMeasurer : public ObjectRef {
public:
/*!
* \brief The constructor.
- * \param builder The ProgramBuilder to build each program.
- * \param runner The ProgramRunner to measure each program.
- * \param callbacks MeasureCallback to be called after each measure batch.
+ * \param builder The ProgramBuilder to build programs.
+ * \param runner The ProgramRunner to measure programs.
+ * \param callbacks MeasureCallback to be called after each measurement batch.
* \param verbose Verbosity level. 0 for silent, 1 to output information during program
* measuring.
- * \param max_continuous_error The number of allowed maximum continuous error.
+ * \param max_continuous_error The number of allowed maximum continuous error before
+ * forcely stopping the tuning.
*/
ProgramMeasurer(ProgramBuilder builder, ProgramRunner runner,
Optional> callbacks, int verbose,
diff --git a/include/tvm/auto_scheduler/search_policy.h b/include/tvm/auto_scheduler/search_policy.h
index ddb0dd284875..e433799b7fa5 100755
--- a/include/tvm/auto_scheduler/search_policy.h
+++ b/include/tvm/auto_scheduler/search_policy.h
@@ -22,26 +22,6 @@
* \brief The base class of search policies, including the abstract definition of search policy and
* other supporting data structures.
*
- * The basic schedule search process for the auto-scheduler is design to be:
- * `Program sampling` -> `Performance Tuning`.
- *
- * In `Program sampling`, we use some predefined precise or heuristic rules to generate several
- * initial schedules. Based on these initial starting points, we perform `Performance Tuning` which
- * uses cost model based evolutionary search to select schedules with the best performance.
- *
- * Candidate schedules are measured against the specific hardware target.
- *
- * We intend to introduce different level of automation on the schedule generation process:
- * - Level 0(the default level): For all kinds of ops/subgraphs, the search policy should be able
- * to generate schedule automatically.
- * - Level 1: For some complicated ops/subgraphs(e.g. conv2d windograd), the default search space
- * of level 0 may be too large to find a high performance schedule efficiently. We provide some
- * op attributes to help reduce the total search space, see `SearchPolicyKey` below for more
- * information.
- * - Level 2: For some further special ops/subgraphs, users may more likely to write their own
- * template(just like AutoTVM). Search policy should be able to provide a flexible approach as
- * well.
- *
* \note How to add a new search policy.
* In design, there's no need for users to implement their own search policy, our formal search
* policy(will be brought later) should be enough to cover most use cases. Meanwhile, a custom rule
@@ -62,11 +42,13 @@
#ifndef TVM_AUTO_SCHEDULER_SEARCH_POLICY_H_
#define TVM_AUTO_SCHEDULER_SEARCH_POLICY_H_
+#include
#include
#include
#include
#include
+#include
#include
namespace tvm {
@@ -171,6 +153,15 @@ class SearchPolicyNode : public Object {
virtual State Search(int num_measure_trials, int early_stopping, int num_measures_per_round,
ProgramMeasurer measurer) = 0;
+ /*!
+ * \brief Continue the search by doing an additional search round.
+ * \param num_measure The number of measurements
+ * \param measurer The measurer to measure programs
+ * \return The measurement records for measurements in this search round
+ */
+ virtual std::pair, Array> ContinueSearchOneRound(
+ int num_measure, ProgramMeasurer measurer) = 0;
+
/*!
* \brief Preload measured states from a log file to resume the state of the search policy.
* \param log_file The name of the record log file.
diff --git a/include/tvm/auto_scheduler/search_task.h b/include/tvm/auto_scheduler/search_task.h
index 85154b5e406b..6d85835d2e4b 100755
--- a/include/tvm/auto_scheduler/search_task.h
+++ b/include/tvm/auto_scheduler/search_task.h
@@ -44,17 +44,16 @@ class HardwareParamsNode : public Object {
int cache_line_bytes;
// GPU related parameters got from device query API
-
- /*! \brief The max shared memory per block. */
- int max_shared_memory_per_block{INT32_MAX};
- /*! \brief The max register memory per block. */
- int max_registers_per_block{INT32_MAX};
- /*! \brief The max threads per block. */
- int max_threads_per_block{INT32_MAX};
+ /*! \brief The max shared memory per block in bytes. */
+ int max_shared_memory_per_block;
+ /*! \brief The max number of register per block. */
+ int max_registers_per_block;
+ /*! \brief The max number of threads per block. */
+ int max_threads_per_block;
/*! \brief The max vthread extent. */
- int max_vthread_extent{INT32_MAX};
+ int max_vthread_extent;
/*! \brief The thread numbers of a warp. */
- int warp_size{INT32_MAX};
+ int warp_size;
void VisitAttrs(tvm::AttrVisitor* v) {
v->Visit("num_cores", &num_cores);
@@ -90,8 +89,15 @@ class HardwareParams : public ObjectRef {
* \param num_cores The number of cores.
* \param vector_unit_bytes The width of vector units in bytes.
* \param cache_line_bytes The size of cache line in bytes.
+ * \param max_shared_memory_per_block The max amount of shared memory per block for GPU.
+ * \param max_registers_per_block The max number of registers per block for GPU.
+ * \param max_threads_per_block The max number of threads per block for GPU.
+ * \param max_vthread_extent The max extent of vthread for GPU.
+ * \param warp_size The warp size for GPU
*/
- HardwareParams(int num_cores, int vector_unit_bytes, int cache_line_bytes);
+ HardwareParams(int num_cores, int vector_unit_bytes, int cache_line_bytes,
+ int max_shared_memory_per_block, int max_registers_per_block,
+ int max_threads_per_block, int max_vthread_extent, int warp_size);
TVM_DEFINE_OBJECT_REF_METHODS(HardwareParams, ObjectRef, HardwareParamsNode);
TVM_DEFINE_OBJECT_REF_COW_METHOD(HardwareParamsNode);
diff --git a/include/tvm/auto_scheduler/transform_step.h b/include/tvm/auto_scheduler/transform_step.h
index 7be3554c7c5d..4cc1551e76fc 100755
--- a/include/tvm/auto_scheduler/transform_step.h
+++ b/include/tvm/auto_scheduler/transform_step.h
@@ -182,7 +182,23 @@ class StepNode : public Object {
*/
class Step : public ObjectRef {
public:
- TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Step, ObjectRef, StepNode);
+ /*!
+ * \brief CopyOnWrite function for Step.
+ * This works almost the same as a normal ObjectRef.CopyOnWrite(), but can dispatch to different
+ * steps.
+ * \return A base StepNode pointer, need to cast to its real StepNode type before doing any
+ * modifications.
+ * \code
+ *
+ * SplitStep ref;
+ * StepNode* mutable_ref = ref.CopyOnWrite();
+ * dynamic_cast(mutable_ref)->... = ...;
+ *
+ * \endcode
+ */
+ StepNode* CopyOnWrite();
+
+ TVM_DEFINE_OBJECT_REF_METHODS(Step, ObjectRef, StepNode);
};
// Forward declaration
@@ -267,7 +283,7 @@ class AnnotationStepNode : public StepNode {
static constexpr const char* record_prefix_str = "AN";
static constexpr const char* _type_key = "auto_scheduler.AnnotationStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(AnnotationStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(AnnotationStepNode, StepNode);
};
/*!
@@ -330,7 +346,7 @@ class FuseStepNode : public StepNode {
static constexpr const char* record_prefix_str = "FU";
static constexpr const char* _type_key = "auto_scheduler.FuseStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(FuseStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(FuseStepNode, StepNode);
};
/*!
@@ -390,7 +406,7 @@ class PragmaStepNode : public StepNode {
static constexpr const char* record_prefix_str = "PR";
static constexpr const char* _type_key = "auto_scheduler.PragmaStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(PragmaStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(PragmaStepNode, StepNode);
};
/*!
@@ -452,7 +468,7 @@ class ReorderStepNode : public StepNode {
static constexpr const char* record_prefix_str = "RE";
static constexpr const char* _type_key = "auto_scheduler.ReorderStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(ReorderStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(ReorderStepNode, StepNode);
};
/*!
@@ -527,7 +543,7 @@ class SplitStepNode : public StepNode {
static constexpr const char* record_prefix_str = "SP";
static constexpr const char* _type_key = "auto_scheduler.SplitStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(SplitStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(SplitStepNode, StepNode);
};
/*!
@@ -607,7 +623,7 @@ class FollowSplitStepNode : public StepNode {
static constexpr const char* record_prefix_str = "FSP";
static constexpr const char* _type_key = "auto_scheduler.FollowSplitStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(FollowSplitStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(FollowSplitStepNode, StepNode);
};
/*!
@@ -688,7 +704,7 @@ class FollowFusedSplitStepNode : public StepNode {
static constexpr const char* record_prefix_str = "FFSP";
static constexpr const char* _type_key = "auto_scheduler.FollowFusedSplitStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(FollowFusedSplitStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(FollowFusedSplitStepNode, StepNode);
};
/*!
@@ -754,7 +770,7 @@ class StorageAlignStepNode : public StepNode {
static constexpr const char* record_prefix_str = "SA";
static constexpr const char* _type_key = "auto_scheduler.StorageAlignStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(StorageAlignStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(StorageAlignStepNode, StepNode);
};
/*!
@@ -822,7 +838,7 @@ class ComputeAtStepNode : public StepNode {
static constexpr const char* record_prefix_str = "CA";
static constexpr const char* _type_key = "auto_scheduler.ComputeAtStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(ComputeAtStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(ComputeAtStepNode, StepNode);
};
/*!
@@ -879,7 +895,7 @@ class ComputeInlineStepNode : public StepNode {
static constexpr const char* record_prefix_str = "CI";
static constexpr const char* _type_key = "auto_scheduler.ComputeInlineStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(ComputeInlineStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(ComputeInlineStepNode, StepNode);
};
/*!
@@ -938,7 +954,7 @@ class ComputeRootStepNode : public StepNode {
static constexpr const char* record_prefix_str = "CR";
static constexpr const char* _type_key = "auto_scheduler.ComputeRootStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(ComputeRootStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(ComputeRootStepNode, StepNode);
};
/*!
@@ -1010,7 +1026,7 @@ class CacheReadStepNode : public StepNode {
static constexpr const char* record_prefix_str = "CHR";
static constexpr const char* _type_key = "auto_scheduler.CacheReadStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(CacheReadStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(CacheReadStepNode, StepNode);
};
/*!
@@ -1081,7 +1097,7 @@ class CacheWriteStepNode : public StepNode {
static constexpr const char* record_prefix_str = "CHW";
static constexpr const char* _type_key = "auto_scheduler.CacheWriteStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(CacheWriteStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(CacheWriteStepNode, StepNode);
};
/*!
@@ -1148,7 +1164,7 @@ class RfactorStepNode : public StepNode {
static constexpr const char* record_prefix_str = "RF";
static constexpr const char* _type_key = "auto_scheduler.RfactorStep";
- TVM_DECLARE_FINAL_OBJECT_INFO(RfactorStepNode, Object);
+ TVM_DECLARE_FINAL_OBJECT_INFO(RfactorStepNode, StepNode);
};
/*!
diff --git a/include/tvm/ir/attrs.h b/include/tvm/ir/attrs.h
index e92baf12b05f..13bfd715cdfb 100644
--- a/include/tvm/ir/attrs.h
+++ b/include/tvm/ir/attrs.h
@@ -413,6 +413,12 @@ inline void SetIntValue(T* ptr, const TVMArgValue& val) {
}
}
+// Workaround for GCC8.1 / GCC8.2
+template <>
+inline void SetValue(DataType* ptr, const TVMArgValue& val) {
+ *ptr = val.operator DataType();
+}
+
template <>
inline void SetValue(std::string* ptr, const TVMArgValue& val) {
if (String::CanConvertFrom(val)) {
@@ -428,7 +434,7 @@ inline void SetValue(double* ptr, const TVMArgValue& val) {
*ptr = val.operator double();
} else {
ObjectRef expr = val;
- CHECK(expr.defined());
+ ICHECK(expr.defined());
if (const IntImmNode* op = expr.as()) {
*ptr = static_cast(op->value);
} else if (const FloatImmNode* op = expr.as()) {
@@ -664,7 +670,7 @@ class AttrsNode : public BaseAttrsNode {
}
void InitByPackedArgs(const runtime::TVMArgs& args, bool allow_unknown) final {
- CHECK_EQ(args.size() % 2, 0);
+ ICHECK_EQ(args.size() % 2, 0);
const int kLinearSearchBound = 16;
int hit_count = 0;
// applies two stratgies to lookup
@@ -672,7 +678,7 @@ class AttrsNode : public BaseAttrsNode {
// linear search.
auto ffind = [&args](const char* key, runtime::TVMArgValue* val) {
for (int i = 0; i < args.size(); i += 2) {
- CHECK_EQ(args.type_codes[i], kTVMStr);
+ ICHECK_EQ(args.type_codes[i], kTVMStr);
if (!std::strcmp(key, args.values[i].v_str)) {
*val = args[i + 1];
return true;
@@ -687,7 +693,7 @@ class AttrsNode : public BaseAttrsNode {
// construct a map then do lookup.
std::unordered_map kwargs;
for (int i = 0; i < args.size(); i += 2) {
- CHECK_EQ(args.type_codes[i], kTVMStr);
+ ICHECK_EQ(args.type_codes[i], kTVMStr);
kwargs[args[i].operator std::string()] = args[i + 1];
}
auto ffind = [&kwargs](const char* key, runtime::TVMArgValue* val) {
diff --git a/include/tvm/ir/diagnostic.h b/include/tvm/ir/diagnostic.h
index 6b9807487bae..2053a295a3b8 100644
--- a/include/tvm/ir/diagnostic.h
+++ b/include/tvm/ir/diagnostic.h
@@ -21,68 +21,22 @@
* \file diagnostic.h
* \brief A new diagnostic interface for TVM error reporting.
*
- * A prototype of the new diagnostic reporting interface for TVM.
- *
- * Eventually we hope to promote this file to the top-level and
- * replace the existing errors.h.
*/
#ifndef TVM_IR_DIAGNOSTIC_H_
#define TVM_IR_DIAGNOSTIC_H_
#include
-#include
#include
-#include
-#include
-#include
-#include
+#include
#include
-#include
-#include
namespace tvm {
using tvm::parser::SourceMap;
using tvm::runtime::TypedPackedFunc;
-extern const char* kTVM_INTERNAL_ERROR_MESSAGE;
-
-#define ICHECK_INDENT " "
-
-#define ICHECK_BINARY_OP(name, op, x, y) \
- if (dmlc::LogCheckError _check_err = dmlc::LogCheck##name(x, y)) \
- dmlc::LogMessageFatal(__FILE__, __LINE__).stream() \
- << kTVM_INTERNAL_ERROR_MESSAGE << std::endl \
- << ICHECK_INDENT << "Check failed: " << #x " " #op " " #y << *(_check_err.str) << ": "
-
-#define ICHECK(x) \
- if (!(x)) \
- dmlc::LogMessageFatal(__FILE__, __LINE__).stream() \
- << kTVM_INTERNAL_ERROR_MESSAGE << ICHECK_INDENT << "Check failed: " #x << " == false: "
-
-#define ICHECK_LT(x, y) ICHECK_BINARY_OP(_LT, <, x, y)
-#define ICHECK_GT(x, y) ICHECK_BINARY_OP(_GT, >, x, y)
-#define ICHECK_LE(x, y) ICHECK_BINARY_OP(_LE, <=, x, y)
-#define ICHECK_GE(x, y) ICHECK_BINARY_OP(_GE, >=, x, y)
-#define ICHECK_EQ(x, y) ICHECK_BINARY_OP(_EQ, ==, x, y)
-#define ICHECK_NE(x, y) ICHECK_BINARY_OP(_NE, !=, x, y)
-#define ICHECK_NOTNULL(x) \
- ((x) == nullptr ? dmlc::LogMessageFatal(__FILE__, __LINE__).stream() \
- << kTVM_INTERNAL_ERROR_MESSAGE << __INDENT << "Check not null: " #x \
- << ' ', \
- (x) : (x)) // NOLINT(*)
-
-/*! \brief The diagnostic level, controls the printing of the message. */
-enum class DiagnosticLevel : int {
- kBug = 10,
- kError = 20,
- kWarning = 30,
- kNote = 40,
- kHelp = 50,
-};
-
class DiagnosticBuilder;
/*! \brief A compiler diagnostic. */
@@ -195,7 +149,7 @@ class DiagnosticRenderer : public ObjectRef {
void Render(const DiagnosticContext& ctx);
DiagnosticRendererNode* operator->() {
- CHECK(get() != nullptr);
+ ICHECK(get() != nullptr);
return static_cast(get_mutable());
}
@@ -249,7 +203,7 @@ class DiagnosticContext : public ObjectRef {
void Render();
DiagnosticContextNode* operator->() {
- CHECK(get() != nullptr);
+ ICHECK(get() != nullptr);
return static_cast(get_mutable());
}
diff --git a/include/tvm/ir/env_func.h b/include/tvm/ir/env_func.h
index 65653b75562d..386666a2c50c 100644
--- a/include/tvm/ir/env_func.h
+++ b/include/tvm/ir/env_func.h
@@ -83,7 +83,7 @@ class EnvFunc : public ObjectRef {
template
runtime::TVMRetValue operator()(Args&&... args) const {
const EnvFuncNode* n = operator->();
- CHECK(n != nullptr);
+ ICHECK(n != nullptr);
return n->func(std::forward(args)...);
}
/*!
@@ -137,7 +137,7 @@ class TypedEnvFunc : public ObjectRef {
*/
R operator()(Args... args) const {
const EnvFuncNode* n = operator->();
- CHECK(n != nullptr);
+ ICHECK(n != nullptr);
return runtime::detail::typed_packed_call_dispatcher::run(n->func,
std::forward(args)...);
}
diff --git a/include/tvm/ir/expr.h b/include/tvm/ir/expr.h
index d6cfc5a64121..1c470fae51ee 100644
--- a/include/tvm/ir/expr.h
+++ b/include/tvm/ir/expr.h
@@ -45,10 +45,16 @@ using tvm::runtime::String;
*/
class BaseExprNode : public Object {
public:
+ /*!
+ * \brief Span that points to the original source code.
+ * Reserved debug information.
+ */
+ mutable Span span;
+
static constexpr const char* _type_key = "BaseExpr";
static constexpr const bool _type_has_method_sequal_reduce = true;
static constexpr const bool _type_has_method_shash_reduce = true;
- static constexpr const uint32_t _type_child_slots = 58;
+ static constexpr const uint32_t _type_child_slots = 62;
TVM_DECLARE_BASE_OBJECT_INFO(BaseExprNode, Object);
};
@@ -92,7 +98,7 @@ class PrimExprNode : public BaseExprNode {
DataType dtype;
static constexpr const char* _type_key = "PrimExpr";
- static constexpr const uint32_t _type_child_slots = 34;
+ static constexpr const uint32_t _type_child_slots = 38;
TVM_DECLARE_BASE_OBJECT_INFO(PrimExprNode, BaseExprNode);
};
@@ -135,11 +141,6 @@ class PrimExpr : public BaseExpr {
*/
class RelayExprNode : public BaseExprNode {
public:
- /*!
- * \brief Span that points to the original source code.
- * Reserved debug information.
- */
- mutable Span span;
/*!
* \brief Stores the result of type inference(type checking).
*
@@ -263,8 +264,9 @@ class IntImm : public PrimExpr {
* \brief Constructor.
* \param dtype The data type of the value.
* \param value The internal value.
+ * \param span The location of this object in the source code.
*/
- TVM_DLL IntImm(DataType dtype, int64_t value);
+ TVM_DLL IntImm(DataType dtype, int64_t value, Span span = Span());
TVM_DEFINE_OBJECT_REF_METHODS(IntImm, PrimExpr, IntImmNode);
};
@@ -307,8 +309,9 @@ class FloatImm : public PrimExpr {
* \brief Constructor.
* \param dtype The data type of the value.
* \param value The internal value.
+ * \param span The location in the source code.
*/
- TVM_DLL FloatImm(DataType dtype, double value);
+ TVM_DLL FloatImm(DataType dtype, double value, Span span = Span());
TVM_DEFINE_OBJECT_REF_METHODS(FloatImm, PrimExpr, FloatImmNode);
};
@@ -321,7 +324,7 @@ class FloatImm : public PrimExpr {
*/
class Bool : public IntImm {
public:
- explicit Bool(bool value) : IntImm(DataType::Bool(), value) {}
+ explicit Bool(bool value, Span span = Span()) : IntImm(DataType::Bool(), value, span) {}
Bool operator!() const { return Bool((*this)->value == 0); }
operator bool() const { return (*this)->value != 0; }
@@ -358,7 +361,7 @@ class Integer : public IntImm {
/*!
* \brief Construct integer from int value.
*/
- Integer(int value) : IntImm(DataType::Int(32), value) {} // NOLINT(*)
+ Integer(int value, Span span = Span()) : IntImm(DataType::Int(32), value, span) {} // NOLINT(*)
/*!
* \brief Construct integer from int imm.
* \param other The other value.
@@ -386,7 +389,7 @@ class Integer : public IntImm {
* \brief convert to int64_t
*/
operator int64_t() const {
- CHECK(data_ != nullptr) << " Trying to reference a null Integer";
+ ICHECK(data_ != nullptr) << " Trying to reference a null Integer";
return (*this)->value;
}
// comparators
@@ -461,9 +464,9 @@ class Range : public ObjectRef {
// implementataions
inline const Type& RelayExprNode::checked_type() const {
- CHECK(checked_type_.defined()) << "internal error: the type checker has "
- << "not populated the checked_type "
- << "field for " << GetRef(this);
+ ICHECK(checked_type_.defined()) << "internal error: the type checker has "
+ << "not populated the checked_type "
+ << "field for " << GetRef(this);
return this->checked_type_;
}
@@ -471,11 +474,11 @@ template
inline const TTypeNode* RelayExprNode::type_as() const {
static_assert(std::is_base_of::value,
"TType must be a special case of type");
- CHECK(checked_type_.defined())
+ ICHECK(checked_type_.defined())
<< "Type inference for this Expr has not completed. Try to call infer_type pass.";
const TTypeNode* node = checked_type_.as();
- CHECK(node != nullptr) << "Expected type to be " << TTypeNode::_type_key << ", but get "
- << checked_type_->GetTypeKey();
+ ICHECK(node != nullptr) << "Expected type to be " << TTypeNode::_type_key << ", but get "
+ << checked_type_->GetTypeKey();
return node;
}
@@ -522,7 +525,7 @@ struct PackedFuncValueConverter {
}
if (val.type_code() == kTVMArgInt) {
int v = val.operator int();
- CHECK(v == 0 || v == 1) << "ValueError: boolean value can only be 0 or 1, but get " << v;
+ ICHECK(v == 0 || v == 1) << "ValueError: boolean value can only be 0 or 1, but get " << v;
return Bool(static_cast(v));
}
return val.AsObjectRef();
diff --git a/include/tvm/ir/module.h b/include/tvm/ir/module.h
index b3f8438f6ec9..d6fb6a20b58a 100644
--- a/include/tvm/ir/module.h
+++ b/include/tvm/ir/module.h
@@ -300,7 +300,7 @@ class IRModule : public ObjectRef {
/*! \return mutable pointers to the node. */
IRModuleNode* operator->() const {
auto* ptr = get_mutable();
- CHECK(ptr != nullptr);
+ ICHECK(ptr != nullptr);
return static_cast(ptr);
}
diff --git a/include/tvm/ir/op.h b/include/tvm/ir/op.h
index e7b35778d500..c73be3c1e564 100644
--- a/include/tvm/ir/op.h
+++ b/include/tvm/ir/op.h
@@ -146,7 +146,7 @@ class OpNode : public RelayExprNode {
// Internal function to compute if it is primitive op
bool IsPrimitiveOp_() const {
const auto& fn_ty = this->op_type;
- CHECK(fn_ty.get() != nullptr);
+ ICHECK(fn_ty.get() != nullptr);
if (fn_ty->type_constraints.size() != 1) return false;
const TypeRelationNode* rel = fn_ty->type_constraints[0].as();
if (rel == nullptr) return false;
@@ -462,7 +462,7 @@ inline OpRegEntry& OpRegEntry::set_support_level(int32_t n) { // NOLINT(*)
template
inline OpRegEntry& OpRegEntry::set_attr( // NOLINT(*)
const std::string& attr_name, const ValueType& value, int plevel) {
- CHECK_GT(plevel, 0) << "plevel in set_attr must be greater than 0";
+ ICHECK_GT(plevel, 0) << "plevel in set_attr must be greater than 0";
runtime::TVMRetValue rv;
rv = value;
UpdateAttr(attr_name, rv, plevel);
@@ -473,7 +473,7 @@ inline OpRegEntry& OpRegEntry::set_attr( // NOLINT(*)
template
inline ValueType OpAttrMap::get(const RelayExpr& expr, ValueType def_value) const {
- CHECK(expr.defined());
+ ICHECK(expr.defined());
if (const OpNode* op = expr.as()) {
return this->map_.get(GetRef(op), def_value);
} else {
diff --git a/include/tvm/ir/transform.h b/include/tvm/ir/transform.h
index 2bbf28311b30..56905ded5201 100644
--- a/include/tvm/ir/transform.h
+++ b/include/tvm/ir/transform.h
@@ -166,7 +166,7 @@ class PassContext : public ObjectRef {
* \return const access pointer.
*/
const PassContextNode* operator->() const {
- CHECK(get() != nullptr);
+ ICHECK(get() != nullptr);
return static_cast(get());
}
/*!
@@ -174,7 +174,7 @@ class PassContext : public ObjectRef {
* \return mutable access pointer.
*/
PassContextNode* operator->() {
- CHECK(get() != nullptr);
+ ICHECK(get() != nullptr);
return static_cast(get_mutable());
}
@@ -197,6 +197,13 @@ class PassContext : public ObjectRef {
*/
TVM_DLL void Trace(const IRModule& module, const PassInfo& info, bool is_before) const;
+ /*!
+ * \brief Check whether a pass is enabled.
+ * \param info The pass information.
+ * \return true if the pass is enabled. Otherwise, false.
+ */
+ TVM_DLL bool PassEnabled(const PassInfo& info) const;
+
/*!
* \brief Register a valid configuration option and its ValueType for validation.
*
@@ -344,7 +351,7 @@ class Pass : public ObjectRef {
*/
IRModule operator()(IRModule mod) const {
const PassNode* node = operator->();
- CHECK(node != nullptr);
+ ICHECK(node != nullptr);
return node->operator()(std::move(mod));
}
/*!
@@ -357,7 +364,7 @@ class Pass : public ObjectRef {
*/
IRModule operator()(IRModule mod, const PassContext& pass_ctx) const {
const PassNode* node = operator->();
- CHECK(node != nullptr);
+ ICHECK(node != nullptr);
return node->operator()(std::move(mod), pass_ctx);
}
diff --git a/include/tvm/ir/type_functor.h b/include/tvm/ir/type_functor.h
index 2a6314cf7644..11bf7d4740d0 100644
--- a/include/tvm/ir/type_functor.h
+++ b/include/tvm/ir/type_functor.h
@@ -71,7 +71,7 @@ class TypeFunctor {
* \return The result of the call
*/
virtual R VisitType(const Type& n, Args... args) {
- CHECK(n.defined());
+ ICHECK(n.defined());
static FType vtable = InitVTable();
return vtable(n, this, std::forward(args)...);
}
diff --git a/include/tvm/ir/type_relation.h b/include/tvm/ir/type_relation.h
index 83323b01e419..462588006c9b 100644
--- a/include/tvm/ir/type_relation.h
+++ b/include/tvm/ir/type_relation.h
@@ -29,6 +29,7 @@
#include
#include
#include
+#include
namespace tvm {
diff --git a/include/tvm/node/attr_registry_map.h b/include/tvm/node/attr_registry_map.h
index 9c554af9bc21..552aa7114657 100644
--- a/include/tvm/node/attr_registry_map.h
+++ b/include/tvm/node/attr_registry_map.h
@@ -56,9 +56,9 @@ class AttrRegistryMapContainerMap {
* \return the const reference to the content value.
*/
const runtime::TVMRetValue& operator[](const KeyType& key) const {
- CHECK(key.defined());
+ ICHECK(key.defined());
const uint32_t idx = key->AttrRegistryIndex();
- CHECK(idx < data_.size() && data_[idx].second != 0)
+ ICHECK(idx < data_.size() && data_[idx].second != 0)
<< "Attribute " << attr_name_ << " has not been registered for " << key->name;
return data_[idx].first;
}
@@ -71,7 +71,7 @@ class AttrRegistryMapContainerMap {
*/
template
ValueType get(const KeyType& key, ValueType def_value) const {
- CHECK(key.defined());
+ ICHECK(key.defined());
const uint32_t idx = key->AttrRegistryIndex();
if (idx < data_.size() && data_[idx].second != 0) {
return data_[idx].first;
diff --git a/include/tvm/node/container.h b/include/tvm/node/container.h
index 74dabc168924..209bb9e72f33 100644
--- a/include/tvm/node/container.h
+++ b/include/tvm/node/container.h
@@ -351,7 +351,7 @@ class SmallMapNode : public MapNode,
*/
const mapped_type& at(const key_type& key) const {
iterator itr = find(key);
- CHECK(itr.index < size_) << "IndexError: key is not in Map";
+ ICHECK(itr.index < size_) << "IndexError: key is not in Map";
return itr->second;
}
/*!
@@ -361,7 +361,7 @@ class SmallMapNode : public MapNode,
*/
mapped_type& at(const key_type& key) {
iterator itr = find(key);
- CHECK(itr.index < size_) << "IndexError: key is not in Map";
+ ICHECK(itr.index < size_) << "IndexError: key is not in Map";
return itr->second;
}
/*! \return begin iterator */
@@ -466,7 +466,7 @@ class SmallMapNode : public MapNode,
}
uint64_t next_size = std::max(map_node->slots_ * 2, uint64_t(kInitSize));
next_size = std::min(next_size, uint64_t(kMaxSize));
- CHECK_GT(next_size, map_node->slots_);
+ ICHECK_GT(next_size, map_node->slots_);
ObjectPtr