From 543041513aa409b0b30a9d4c85cb4ed0c8ecbd01 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Wed, 28 Jul 2021 10:25:38 -0700 Subject: [PATCH 01/23] Try pytest-xdist (constrained to 2 CPU max for CI) --- tests/scripts/setup-pytest-env.sh | 27 +++++++++++++++++++++++ tests/scripts/task_python_frontend.sh | 16 +++++++------- tests/scripts/task_python_frontend_cpu.sh | 6 ++--- tests/scripts/task_python_integration.sh | 18 +++++++-------- tests/scripts/task_python_topi.sh | 2 +- tests/scripts/task_python_unittest.sh | 8 +++---- tests/scripts/task_python_vta_fsim.sh | 4 ++-- tests/scripts/task_python_vta_tsim.sh | 4 ++-- 8 files changed, 56 insertions(+), 29 deletions(-) diff --git a/tests/scripts/setup-pytest-env.sh b/tests/scripts/setup-pytest-env.sh index bcd27a16f659..05522f1b6fd9 100755 --- a/tests/scripts/setup-pytest-env.sh +++ b/tests/scripts/setup-pytest-env.sh @@ -32,7 +32,33 @@ export PYTHONPATH="${TVM_PATH}/python" export TVM_PYTEST_RESULT_DIR="${TVM_PATH}/build/pytest-results" mkdir -p "${TVM_PYTEST_RESULT_DIR}" +if [ -n "${CI_PYTEST_NUM_CPUS-}" ]; then + PYTEST_NUM_CPUS=${CI_PYTEST_NUM_CPUS} +else + PYTEST_NUM_CPUS=$(nproc) + if [ -z "${PYTEST_NUM_CPUS}" ]; then + PYTEST_NUM_CPUS=1 + elif [ ${PYTEST_NUM_CPUS} -gt 1 ]; then + PYTEST_NUM_CPUS=$(expr ${PYTEST_NUM_CPUS} - 1) # Don't nuke interactive work. + fi +fi + +if [ ${PYTEST_NUM_CPUS} -gt 8 ]; then + PYTEST_NUM_CPUS=8 # It usually doesn't make sense to launch > 8 workers +fi + +# DNS: remove after we actually fix up CI_PYTEST_NUM_CPUS in Jenkinsfile +if [ ${PYTEST_NUM_CPUS} -gt 2 ]; then + PYTEST_NUM_CPUS=2 # Fix to 2 CPUs for Jenkins +fi + + function run_pytest() { + local extra_args=( ) + if [ "$1" == "--parallel" ]; then + extra_args=( "${extra_args[@]}" -n "${PYTEST_NUM_CPUS}" ) + shift + fi local ffi_type="$1" shift local test_suite_name="$1" @@ -46,5 +72,6 @@ function run_pytest() { -o "junit_suite_name=${test_suite_name}-${ffi_type}" \ "--junit-xml=${TVM_PYTEST_RESULT_DIR}/${test_suite_name}-${ffi_type}.xml" \ "--junit-prefix=${ffi_type}" \ + "${extra_args[@]}" \ "$@" } diff --git a/tests/scripts/task_python_frontend.sh b/tests/scripts/task_python_frontend.sh index a2f6d706a163..cf4ebaedd1fa 100755 --- a/tests/scripts/task_python_frontend.sh +++ b/tests/scripts/task_python_frontend.sh @@ -32,25 +32,25 @@ find . -type f -path "*.pyc" | xargs rm -f make cython3 echo "Running relay MXNet frontend test..." -run_pytest cython python-frontend-mxnet tests/python/frontend/mxnet +run_pytest --parallel cython python-frontend-mxnet tests/python/frontend/mxnet echo "Running relay ONNX frontend test..." -run_pytest cython python-frontend-onnx tests/python/frontend/onnx +run_pytest --parallel cython python-frontend-onnx tests/python/frontend/onnx echo "Running relay CoreML frontend test..." -run_pytest cython python-frontend-coreml tests/python/frontend/coreml +run_pytest --parallel cython python-frontend-coreml tests/python/frontend/coreml echo "Running relay Tensorflow frontend test..." -run_pytest cython python-frontend-tensorflow tests/python/frontend/tensorflow +run_pytest --parallel cython python-frontend-tensorflow tests/python/frontend/tensorflow echo "Running relay caffe2 frontend test..." -run_pytest cython python-frontend-caffe2 tests/python/frontend/caffe2 +run_pytest --parallel cython python-frontend-caffe2 tests/python/frontend/caffe2 echo "Running relay DarkNet frontend test..." -run_pytest cython python-frontend-darknet tests/python/frontend/darknet +run_pytest --parallel cython python-frontend-darknet tests/python/frontend/darknet echo "Running relay PyTorch frontend test..." -run_pytest cython python-frontend-pytorch tests/python/frontend/pytorch +run_pytest --parallel cython python-frontend-pytorch tests/python/frontend/pytorch echo "Running relay PaddlePaddle frontend test..." -run_pytest cython python-frontend-paddlepaddle tests/python/frontend/paddlepaddle +run_pytest --parallel cython python-frontend-paddlepaddle tests/python/frontend/paddlepaddle diff --git a/tests/scripts/task_python_frontend_cpu.sh b/tests/scripts/task_python_frontend_cpu.sh index 208714c64988..542a98d90c30 100755 --- a/tests/scripts/task_python_frontend_cpu.sh +++ b/tests/scripts/task_python_frontend_cpu.sh @@ -33,10 +33,10 @@ find . -type f -path "*.pyc" | xargs rm -f make cython3 echo "Running relay TFLite frontend test..." -run_pytest cython python-frontend-tflite tests/python/frontend/tflite +run_pytest --parallel cython python-frontend-tflite tests/python/frontend/tflite echo "Running relay Keras frontend test..." -run_pytest cython python-frontend-keras tests/python/frontend/keras +run_pytest --parallel cython python-frontend-keras tests/python/frontend/keras echo "Running relay Caffe frontend test..." -run_pytest cython python-frontend-caffe tests/python/frontend/caffe +run_pytest --parallel cython python-frontend-caffe tests/python/frontend/caffe diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh index 00b63af48646..e525286aeaa0 100755 --- a/tests/scripts/task_python_integration.sh +++ b/tests/scripts/task_python_integration.sh @@ -44,33 +44,33 @@ rm -rf lib make cd ../.. -run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-extensions apps/extension/tests -run_pytest cython ${TVM_INTEGRATION_TESTSUITE_NAME}-extensions apps/extension/tests +run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-extensions apps/extension/tests +run_pytest --parallel cython ${TVM_INTEGRATION_TESTSUITE_NAME}-extensions apps/extension/tests # Test dso plugin cd apps/dso_plugin_module rm -rf lib make cd ../.. -run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module apps/dso_plugin_module -run_pytest cython ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module apps/dso_plugin_module +run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module apps/dso_plugin_module +run_pytest --parallel cython ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module apps/dso_plugin_module # Do not enable TensorFlow op # TVM_FFI=cython sh prepare_and_test_tfop_module.sh # TVM_FFI=ctypes sh prepare_and_test_tfop_module.sh -run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME} tests/python/integration +run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME} tests/python/integration if python -c "import tvm; from tvm.relay.op.contrib.ethosn import ethosn_available; print(ethosn_available().name)" -eq "SW_ONLY"; then - ETHOSN_VARIANT_CONFIG=ETHOSN78_1TOPS_4PLE_448KSRAM run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib-test_ethosn tests/python/contrib/test_ethosn + ETHOSN_VARIANT_CONFIG=ETHOSN78_1TOPS_4PLE_448KSRAM run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib-test_ethosn tests/python/contrib/test_ethosn fi -run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib tests/python/contrib +run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib tests/python/contrib # forked is needed because the global registry gets contaminated TVM_TEST_TARGETS="${TVM_RELAY_TEST_TARGETS:-llvm;cuda}" \ - run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-relay tests/python/relay + run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-relay tests/python/relay # Command line driver test -run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-driver tests/python/driver +run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-driver tests/python/driver # Do not enable OpenGL # run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-webgl tests/webgl diff --git a/tests/scripts/task_python_topi.sh b/tests/scripts/task_python_topi.sh index 9a5991e6a766..a6983e1b763e 100755 --- a/tests/scripts/task_python_topi.sh +++ b/tests/scripts/task_python_topi.sh @@ -31,4 +31,4 @@ make cython3 # cleanup pycache find . -type f -path "*.pyc" | xargs rm -f -run_pytest cython python-topi tests/python/topi/ +run_pytest --parallel cython python-topi tests/python/topi/ diff --git a/tests/scripts/task_python_unittest.sh b/tests/scripts/task_python_unittest.sh index 54a36f6dcfd4..7b878dec8574 100755 --- a/tests/scripts/task_python_unittest.sh +++ b/tests/scripts/task_python_unittest.sh @@ -31,9 +31,9 @@ if [ -z "${TVM_UNITTEST_TESTSUITE_NAME:-}" ]; then fi # First run minimal test on both ctypes and cython. -run_pytest ctypes ${TVM_UNITTEST_TESTSUITE_NAME}-platform-minimal-test tests/python/all-platform-minimal-test -run_pytest cython ${TVM_UNITTEST_TESTSUITE_NAME}-platform-minimal-test tests/python/all-platform-minimal-test +run_pytest --parallel ctypes ${TVM_UNITTEST_TESTSUITE_NAME}-platform-minimal-test tests/python/all-platform-minimal-test +run_pytest --parallel cython ${TVM_UNITTEST_TESTSUITE_NAME}-platform-minimal-test tests/python/all-platform-minimal-test # Then run all unittests on both ctypes and cython. -run_pytest ctypes ${TVM_UNITTEST_TESTSUITE_NAME} tests/python/unittest -run_pytest cython ${TVM_UNITTEST_TESTSUITE_NAME} tests/python/unittest +run_pytest --parallel ctypes ${TVM_UNITTEST_TESTSUITE_NAME} tests/python/unittest +run_pytest --parallel cython ${TVM_UNITTEST_TESTSUITE_NAME} tests/python/unittest diff --git a/tests/scripts/task_python_vta_fsim.sh b/tests/scripts/task_python_vta_fsim.sh index 4074fb888351..14eb22965516 100755 --- a/tests/scripts/task_python_vta_fsim.sh +++ b/tests/scripts/task_python_vta_fsim.sh @@ -40,8 +40,8 @@ cp ${VTA_HW_PATH}/config/fsim_sample.json ${VTA_HW_PATH}/config/vta_config.json # Run unit tests in functional/fast simulator echo "Running unittest in fsim..." -run_pytest cython python-vta-fsim-unittest ${TVM_PATH}/vta/tests/python/unittest +run_pytest --parallel cython python-vta-fsim-unittest ${TVM_PATH}/vta/tests/python/unittest # Run unit tests in functional/fast simulator echo "Running integration test in fsim..." -run_pytest cython python-vta-fsim-integration ${TVM_PATH}/vta/tests/python/integration +run_pytest --parallel cython python-vta-fsim-integration ${TVM_PATH}/vta/tests/python/integration diff --git a/tests/scripts/task_python_vta_tsim.sh b/tests/scripts/task_python_vta_tsim.sh index 4c21f46c5f81..b0f7eab064d9 100755 --- a/tests/scripts/task_python_vta_tsim.sh +++ b/tests/scripts/task_python_vta_tsim.sh @@ -58,11 +58,11 @@ make -C ${VTA_HW_PATH}/hardware/chisel USE_THREADS=0 lib # Run unit tests in cycle accurate simulator echo "Running unittest in tsim..." -run_pytest cython python-vta-tsim-unittest ${TVM_PATH}/vta/tests/python/unittest +run_pytest --parallel cython python-vta-tsim-unittest ${TVM_PATH}/vta/tests/python/unittest # Run unit tests in cycle accurate simulator echo "Running integration test in tsim..." -run_pytest cython python-vta-tsim-integration ${TVM_PATH}/vta/tests/python/integration +run_pytest --parallel cython python-vta-tsim-integration ${TVM_PATH}/vta/tests/python/integration # Reset default fsim simulation cp ${VTA_HW_PATH}/config/fsim_sample.json ${VTA_HW_PATH}/config/vta_config.json From a0b17e1f7ba724ea1de35fa7dec1307ed84fa362 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Thu, 29 Jul 2021 16:41:24 -0700 Subject: [PATCH 02/23] serialize test_tvm_testing_features --- python/tvm/testing/plugin.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python/tvm/testing/plugin.py b/python/tvm/testing/plugin.py index 95875acbd82c..ab65f4318275 100644 --- a/python/tvm/testing/plugin.py +++ b/python/tvm/testing/plugin.py @@ -288,3 +288,18 @@ def _parametrize_correlated_parameters(metafunc): names = ",".join(name for name, values in params) value_sets = zip(*[values for name, values in params]) metafunc.parametrize(names, value_sets, indirect=True, ids=ids) + +from xdist.scheduler.loadscope import LoadScopeScheduling + + +class MyScheduler(LoadScopeScheduling): + def _split_scope(self, nodeid): + # NOTE: test_tvm_testing_features contains parametrization-related tests, and must be + # serialized on a single host. + if "test_tvm_testing_features" in nodeid: + return "functional-tests" + return nodeid + + +def pytest_xdist_make_scheduler(config, log): + return MyScheduler(config, log) From daa6849d1fa900fe89512e1e460f18f4162e63f8 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 2 Aug 2021 15:33:05 -0700 Subject: [PATCH 03/23] fix unbound local and only run --parallel for build and CPU integration --- tests/scripts/setup-pytest-env.sh | 2 +- tests/scripts/task_python_frontend.sh | 10 ++++----- tests/scripts/task_python_frontend_cpu.sh | 6 +++--- tests/scripts/task_python_integration.sh | 21 +++++++++++-------- .../task_python_integration_gpuonly.sh | 1 + tests/scripts/task_python_topi.sh | 2 +- 6 files changed, 23 insertions(+), 19 deletions(-) diff --git a/tests/scripts/setup-pytest-env.sh b/tests/scripts/setup-pytest-env.sh index 05522f1b6fd9..c5c9f059dc3a 100755 --- a/tests/scripts/setup-pytest-env.sh +++ b/tests/scripts/setup-pytest-env.sh @@ -56,7 +56,7 @@ fi function run_pytest() { local extra_args=( ) if [ "$1" == "--parallel" ]; then - extra_args=( "${extra_args[@]}" -n "${PYTEST_NUM_CPUS}" ) + extra_args=( -n "${PYTEST_NUM_CPUS}" ) shift fi local ffi_type="$1" diff --git a/tests/scripts/task_python_frontend.sh b/tests/scripts/task_python_frontend.sh index cf4ebaedd1fa..22f7cef33510 100755 --- a/tests/scripts/task_python_frontend.sh +++ b/tests/scripts/task_python_frontend.sh @@ -32,22 +32,22 @@ find . -type f -path "*.pyc" | xargs rm -f make cython3 echo "Running relay MXNet frontend test..." -run_pytest --parallel cython python-frontend-mxnet tests/python/frontend/mxnet +run_pytest cython python-frontend-mxnet tests/python/frontend/mxnet echo "Running relay ONNX frontend test..." run_pytest --parallel cython python-frontend-onnx tests/python/frontend/onnx echo "Running relay CoreML frontend test..." -run_pytest --parallel cython python-frontend-coreml tests/python/frontend/coreml +run_pytest cython python-frontend-coreml tests/python/frontend/coreml echo "Running relay Tensorflow frontend test..." -run_pytest --parallel cython python-frontend-tensorflow tests/python/frontend/tensorflow +run_pytest cython python-frontend-tensorflow tests/python/frontend/tensorflow echo "Running relay caffe2 frontend test..." -run_pytest --parallel cython python-frontend-caffe2 tests/python/frontend/caffe2 +run_pytest cython python-frontend-caffe2 tests/python/frontend/caffe2 echo "Running relay DarkNet frontend test..." -run_pytest --parallel cython python-frontend-darknet tests/python/frontend/darknet +run_pytest cython python-frontend-darknet tests/python/frontend/darknet echo "Running relay PyTorch frontend test..." run_pytest --parallel cython python-frontend-pytorch tests/python/frontend/pytorch diff --git a/tests/scripts/task_python_frontend_cpu.sh b/tests/scripts/task_python_frontend_cpu.sh index 542a98d90c30..208714c64988 100755 --- a/tests/scripts/task_python_frontend_cpu.sh +++ b/tests/scripts/task_python_frontend_cpu.sh @@ -33,10 +33,10 @@ find . -type f -path "*.pyc" | xargs rm -f make cython3 echo "Running relay TFLite frontend test..." -run_pytest --parallel cython python-frontend-tflite tests/python/frontend/tflite +run_pytest cython python-frontend-tflite tests/python/frontend/tflite echo "Running relay Keras frontend test..." -run_pytest --parallel cython python-frontend-keras tests/python/frontend/keras +run_pytest cython python-frontend-keras tests/python/frontend/keras echo "Running relay Caffe frontend test..." -run_pytest --parallel cython python-frontend-caffe tests/python/frontend/caffe +run_pytest cython python-frontend-caffe tests/python/frontend/caffe diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh index e525286aeaa0..ac50737d152f 100755 --- a/tests/scripts/task_python_integration.sh +++ b/tests/scripts/task_python_integration.sh @@ -44,33 +44,36 @@ rm -rf lib make cd ../.. -run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-extensions apps/extension/tests -run_pytest --parallel cython ${TVM_INTEGRATION_TESTSUITE_NAME}-extensions apps/extension/tests +# Only run in parallel for CPU integration tests. +PYTEST_PARALLEL="${PYTEST_PARALLEL:---parallel}" + +run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-extensions apps/extension/tests +run_pytest "${PYTEST_PARALELL}" cython ${TVM_INTEGRATION_TESTSUITE_NAME}-extensions apps/extension/tests # Test dso plugin cd apps/dso_plugin_module rm -rf lib make cd ../.. -run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module apps/dso_plugin_module -run_pytest --parallel cython ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module apps/dso_plugin_module +run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module apps/dso_plugin_module +run_pytest "${PYTEST_PARALELL}" cython ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module apps/dso_plugin_module # Do not enable TensorFlow op # TVM_FFI=cython sh prepare_and_test_tfop_module.sh # TVM_FFI=ctypes sh prepare_and_test_tfop_module.sh -run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME} tests/python/integration +run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME} tests/python/integration if python -c "import tvm; from tvm.relay.op.contrib.ethosn import ethosn_available; print(ethosn_available().name)" -eq "SW_ONLY"; then - ETHOSN_VARIANT_CONFIG=ETHOSN78_1TOPS_4PLE_448KSRAM run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib-test_ethosn tests/python/contrib/test_ethosn + ETHOSN_VARIANT_CONFIG=ETHOSN78_1TOPS_4PLE_448KSRAM run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib-test_ethosn tests/python/contrib/test_ethosn fi -run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib tests/python/contrib +run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib tests/python/contrib # forked is needed because the global registry gets contaminated TVM_TEST_TARGETS="${TVM_RELAY_TEST_TARGETS:-llvm;cuda}" \ - run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-relay tests/python/relay + run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-relay tests/python/relay # Command line driver test -run_pytest --parallel ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-driver tests/python/driver +run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-driver tests/python/driver # Do not enable OpenGL # run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-webgl tests/webgl diff --git a/tests/scripts/task_python_integration_gpuonly.sh b/tests/scripts/task_python_integration_gpuonly.sh index ac09cb5a14a3..0e2820454278 100755 --- a/tests/scripts/task_python_integration_gpuonly.sh +++ b/tests/scripts/task_python_integration_gpuonly.sh @@ -20,5 +20,6 @@ export TVM_TEST_TARGETS="cuda;opencl;metal;rocm;vulkan;nvptx;opencl -device=mali export PYTEST_ADDOPTS="-m gpu $PYTEST_ADDOPTS" export TVM_RELAY_TEST_TARGETS="cuda" export TVM_INTEGRATION_TESTSUITE_NAME=python-integration-gpu +export TVM_GPU_INTEGRATION_TESTS=1 ./tests/scripts/task_python_integration.sh diff --git a/tests/scripts/task_python_topi.sh b/tests/scripts/task_python_topi.sh index a6983e1b763e..9a5991e6a766 100755 --- a/tests/scripts/task_python_topi.sh +++ b/tests/scripts/task_python_topi.sh @@ -31,4 +31,4 @@ make cython3 # cleanup pycache find . -type f -path "*.pyc" | xargs rm -f -run_pytest --parallel cython python-topi tests/python/topi/ +run_pytest cython python-topi tests/python/topi/ From 43b9f31105826945288220eea941660b51c69883 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 2 Aug 2021 16:03:23 -0700 Subject: [PATCH 04/23] fix typo --- tests/scripts/task_python_integration.sh | 18 +++++++++--------- .../scripts/task_python_integration_gpuonly.sh | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh index ac50737d152f..7979a5d74dd6 100755 --- a/tests/scripts/task_python_integration.sh +++ b/tests/scripts/task_python_integration.sh @@ -47,33 +47,33 @@ cd ../.. # Only run in parallel for CPU integration tests. PYTEST_PARALLEL="${PYTEST_PARALLEL:---parallel}" -run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-extensions apps/extension/tests -run_pytest "${PYTEST_PARALELL}" cython ${TVM_INTEGRATION_TESTSUITE_NAME}-extensions apps/extension/tests +run_pytest "${PYTEST_PARALLEL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-extensions apps/extension/tests +run_pytest "${PYTEST_PARALLEL}" cython ${TVM_INTEGRATION_TESTSUITE_NAME}-extensions apps/extension/tests # Test dso plugin cd apps/dso_plugin_module rm -rf lib make cd ../.. -run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module apps/dso_plugin_module -run_pytest "${PYTEST_PARALELL}" cython ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module apps/dso_plugin_module +run_pytest "${PYTEST_PARALLEL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module apps/dso_plugin_module +run_pytest "${PYTEST_PARALLEL}" cython ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module apps/dso_plugin_module # Do not enable TensorFlow op # TVM_FFI=cython sh prepare_and_test_tfop_module.sh # TVM_FFI=ctypes sh prepare_and_test_tfop_module.sh -run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME} tests/python/integration +run_pytest "${PYTEST_PARALLEL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME} tests/python/integration if python -c "import tvm; from tvm.relay.op.contrib.ethosn import ethosn_available; print(ethosn_available().name)" -eq "SW_ONLY"; then - ETHOSN_VARIANT_CONFIG=ETHOSN78_1TOPS_4PLE_448KSRAM run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib-test_ethosn tests/python/contrib/test_ethosn + ETHOSN_VARIANT_CONFIG=ETHOSN78_1TOPS_4PLE_448KSRAM run_pytest "${PYTEST_PARALLEL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib-test_ethosn tests/python/contrib/test_ethosn fi -run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib tests/python/contrib +run_pytest "${PYTEST_PARALLEL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib tests/python/contrib # forked is needed because the global registry gets contaminated TVM_TEST_TARGETS="${TVM_RELAY_TEST_TARGETS:-llvm;cuda}" \ - run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-relay tests/python/relay + run_pytest "${PYTEST_PARALLEL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-relay tests/python/relay # Command line driver test -run_pytest "${PYTEST_PARALELL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-driver tests/python/driver +run_pytest "${PYTEST_PARALLEL}" ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-driver tests/python/driver # Do not enable OpenGL # run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-webgl tests/webgl diff --git a/tests/scripts/task_python_integration_gpuonly.sh b/tests/scripts/task_python_integration_gpuonly.sh index 0e2820454278..30a83a5d21f0 100755 --- a/tests/scripts/task_python_integration_gpuonly.sh +++ b/tests/scripts/task_python_integration_gpuonly.sh @@ -20,6 +20,6 @@ export TVM_TEST_TARGETS="cuda;opencl;metal;rocm;vulkan;nvptx;opencl -device=mali export PYTEST_ADDOPTS="-m gpu $PYTEST_ADDOPTS" export TVM_RELAY_TEST_TARGETS="cuda" export TVM_INTEGRATION_TESTSUITE_NAME=python-integration-gpu -export TVM_GPU_INTEGRATION_TESTS=1 +export PYTEST_PARALLEL= ./tests/scripts/task_python_integration.sh From 86a6a7837b1ebf582252dcc0fb15413f04ae9ce4 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 2 Aug 2021 17:41:37 -0700 Subject: [PATCH 05/23] why is it running so many tests? --- tests/scripts/task_python_unittest.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/scripts/task_python_unittest.sh b/tests/scripts/task_python_unittest.sh index 7b878dec8574..6c35042714a8 100755 --- a/tests/scripts/task_python_unittest.sh +++ b/tests/scripts/task_python_unittest.sh @@ -18,6 +18,7 @@ set -e set -u +set -x source tests/scripts/setup-pytest-env.sh From 77a870bddb7b518f3438e03a3bad159a9ce32620 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 9 Aug 2021 17:01:45 -0700 Subject: [PATCH 06/23] Fix using nvcc from xdist and also whenever stdin is closed :| --- python/tvm/contrib/nvcc.py | 4 +++- .../python/unittest/test_auto_scheduler_search_policy.py | 9 +-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py index 612be292e873..2a7c0de7be98 100644 --- a/python/tvm/contrib/nvcc.py +++ b/python/tvm/contrib/nvcc.py @@ -97,7 +97,9 @@ def compile_cuda(code, target="ptx", arch=None, options=None, path_target=None): # if cxx_compiler_path != "": # cmd += ["-ccbin", cxx_compiler_path] - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + # NOTE(areusch): Per https://github.com/lpereira/lwan/issues/106, stdin must be left open. + proc = subprocess.Popen( + cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (out, _) = proc.communicate() diff --git a/tests/python/unittest/test_auto_scheduler_search_policy.py b/tests/python/unittest/test_auto_scheduler_search_policy.py index a9f6596a8548..84480c9526de 100644 --- a/tests/python/unittest/test_auto_scheduler_search_policy.py +++ b/tests/python/unittest/test_auto_scheduler_search_policy.py @@ -210,11 +210,4 @@ def apply_func(search_policy, state, stage_id): if __name__ == "__main__": - test_workload_registry_empty_policy() - test_sketch_search_policy_basic() - test_sketch_search_policy_basic_spawn() - test_sketch_search_policy_xgbmodel() - test_sketch_search_policy_cuda_rpc_runner() - test_sketch_search_policy_cuda_xgbmodel_rpc_runner() - test_sketch_search_policy_zero_rank() - test_sketch_search_policy_custom_sketch() + sys.exit(pytest.main([__file__] + sys.argv[1:])) From 870e999ee10bd7fdee215cf2c57dd42c78232836 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Wed, 11 Aug 2021 16:27:20 -0700 Subject: [PATCH 07/23] rename scheduler --- python/tvm/testing/plugin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tvm/testing/plugin.py b/python/tvm/testing/plugin.py index ab65f4318275..8f32beb5d6a8 100644 --- a/python/tvm/testing/plugin.py +++ b/python/tvm/testing/plugin.py @@ -292,7 +292,7 @@ def _parametrize_correlated_parameters(metafunc): from xdist.scheduler.loadscope import LoadScopeScheduling -class MyScheduler(LoadScopeScheduling): +class TvmTestScheduler(LoadScopeScheduling): def _split_scope(self, nodeid): # NOTE: test_tvm_testing_features contains parametrization-related tests, and must be # serialized on a single host. @@ -302,4 +302,4 @@ def _split_scope(self, nodeid): def pytest_xdist_make_scheduler(config, log): - return MyScheduler(config, log) + return TvmTestScheduler(config, log) \ No newline at end of file From 8ad193ab3693ec24391c1514de72ca46e8baefe3 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Wed, 11 Aug 2021 16:44:54 -0700 Subject: [PATCH 08/23] commit num cpus hook --- Jenkinsfile | 12 +++--- docker/bash.sh | 66 ++++++++++++++++++++++++++----- tests/scripts/setup-pytest-env.sh | 13 +++--- 3 files changed, 67 insertions(+), 24 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index fa1629205080..b2b84de9877b 100755 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -206,11 +206,11 @@ stage('Build') { ws(per_exec_ws("tvm/build-gpu")) { init_git() sh "${docker_run} ${ci_gpu} ./tests/scripts/task_config_build_gpu.sh" - make(ci_gpu, 'build', '-j2') + make(ci_gpu, 'build', '-j') pack_lib('gpu', tvm_multilib) // compiler test sh "${docker_run} ${ci_gpu} ./tests/scripts/task_config_build_gpu_vulkan.sh" - make(ci_gpu, 'build2', '-j2') + make(ci_gpu, 'build2', '-j') } } }, @@ -219,7 +219,7 @@ stage('Build') { ws(per_exec_ws("tvm/build-cpu")) { init_git() sh "${docker_run} ${ci_cpu} ./tests/scripts/task_config_build_cpu.sh" - make(ci_cpu, 'build', '-j2') + make(ci_cpu, 'build') pack_lib('cpu', tvm_multilib_tsim) timeout(time: max_time, unit: 'MINUTES') { sh "${docker_run} ${ci_cpu} ./tests/scripts/task_ci_setup.sh" @@ -240,7 +240,7 @@ stage('Build') { ws(per_exec_ws("tvm/build-wasm")) { init_git() sh "${docker_run} ${ci_wasm} ./tests/scripts/task_config_build_wasm.sh" - make(ci_wasm, 'build', '-j2') + make(ci_wasm, 'build', '-j') timeout(time: max_time, unit: 'MINUTES') { sh "${docker_run} ${ci_wasm} ./tests/scripts/task_ci_setup.sh" sh "${docker_run} ${ci_wasm} ./tests/scripts/task_web_wasm.sh" @@ -253,7 +253,7 @@ stage('Build') { ws(per_exec_ws("tvm/build-i386")) { init_git() sh "${docker_run} ${ci_i386} ./tests/scripts/task_config_build_i386.sh" - make(ci_i386, 'build', '-j2') + make(ci_i386, 'build') pack_lib('i386', tvm_multilib_tsim) } } @@ -273,7 +273,7 @@ stage('Build') { ws(per_exec_ws("tvm/build-qemu")) { init_git() sh "${docker_run} ${ci_qemu} ./tests/scripts/task_config_build_qemu.sh" - make(ci_qemu, 'build', '-j2') + make(ci_qemu, 'build', '-j') timeout(time: max_time, unit: 'MINUTES') { sh "${docker_run} ${ci_qemu} ./tests/scripts/task_ci_setup.sh" sh "${docker_run} ${ci_qemu} ./tests/scripts/task_python_microtvm.sh" diff --git a/docker/bash.sh b/docker/bash.sh index 372cfded8f89..12a1c8457284 100755 --- a/docker/bash.sh +++ b/docker/bash.sh @@ -38,11 +38,19 @@ set -euo pipefail function show_usage() { cat <] [--mount MOUNT_DIR] [--repo-mount-point REPO_MOUNT_POINT] [--dry-run] [--] [COMMAND] +--cpuset-cpus= + + Restrict docker container to use specific CPUs. See + docker run --help for further documentation of this parameter. + When launched from the CI (the "CI" environment variable is set), + this parameter is inferred from the "NODE_NAME" and "EXECUTOR_NUMBER" + environment variables. + -h, --help Display this help message. @@ -55,13 +63,6 @@ Usage: docker/bash.sh [-i|--interactive] [--net=host] [-t|--tty] Start the docker session with a pseudo terminal (tty). ---net=host - - Expose servers run into the container to the host, passing the - "--net=host" argument through to docker. On MacOS, this is - instead passed as "-p 8888:8888" since the host networking driver - isn't supported. - --mount MOUNT_DIR Expose MOUNT_DIR as an additional mount point inside the docker @@ -69,6 +70,13 @@ Usage: docker/bash.sh [-i|--interactive] [--net=host] [-t|--tty] the folder location outside the container. This option can be specified multiple times. +--net=host + + Expose servers run into the container to the host, passing the + "--net=host" argument through to docker. On MacOS, this is + instead passed as "-p 8888:8888" since the host networking driver + isn't supported. + --repo-mount-point REPO_MOUNT_POINT The directory inside the docker container at which the TVM @@ -111,6 +119,7 @@ EOF SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" REPO_DIR="$(dirname "${SCRIPT_DIR}")" +CPUSET_CPUS= DRY_RUN=false INTERACTIVE=false TTY=false @@ -146,6 +155,16 @@ break_joined_flag='if (( ${#1} == 2 )); then shift; else set -- -"${1#-i}" "${@: while (( $# )); do case "$1" in + --cpuset-cpus=?*) + CPUSET_CPUS="${1#*=}" + shift + ;; + + --dry-run) + DRY_RUN=true + shift + ;; + -h|--help) show_usage exit 0 @@ -180,8 +199,8 @@ while (( $# )); do shift ;; - --dry-run) - DRY_RUN=true + --net=host) + USE_NET_HOST=true shift ;; @@ -276,6 +295,33 @@ DOCKER_ENV+=( --env CI_BUILD_HOME="${REPO_MOUNT_POINT}" --env CI_IMAGE_NAME="${DOCKER_IMAGE_NAME}" ) +# Choose CPUs on which this container will execute. +if [ -n "${CI+x}" -a -z "${CPUSET_CPUS}" ]; then + if [ -n "${CI_NUM_EXECUTORS-}" ]; then + if [ -n "${CI_CPUSET_LOWER_BOUND-}" -a -n "${CI_CPUSET_UPPER_BOUND-}" ]; then + TOTAL_CPUS=$(expr "${CI_CPUSET_UPPER_BOUND}" - "${CI_CPUSET_LOWER_BOUND}" + 1) + else + TOTAL_CPUS=$(nproc) + CI_CPUSET_LOWER_BOUND=0 + fi + CPUS_PER_EXECUTOR=$(expr "${TOTAL_CPUS}" / "${CI_NUM_EXECUTORS}") + CPUSET_CPUS_LOWER_BOUND=$(expr "${CI_CPUSET_LOWER_BOUND}" + \( "${CPUS_PER_EXECUTOR}" '*' \( "${EXECUTOR_NUMBER}" - 1 \) \) ) + CPUSET_CPUS_UPPER_BOUND=$(expr "${CPUSET_CPUS_LOWER_BOUND}" + "${CPUS_PER_EXECUTOR}" - 1) + CPUSET_CPUS="${CPUSET_CPUS_LOWER_BOUND}-${CPUSET_CPUS_UPPER_BOUND}" + echo "COMPUTE TOTAL_CPUS=${TOTAL_CPUS} CPUS_PER_EXECUTOR=${CPUS_PER_EXECUTOR} CPUSET_CPUS_LOWER_BOUND=${CPUSET_CPUS_LOWER_BOUND} CPUSET_CPUS_UPPER_BOUND=${CPUSET_CPUS_UPPER_BOUND}" + else + echo "WARNING: CI_NUM_EXECUTORS environment variable not set." + echo "No CPU parallism will be used in this CI build, so it may be quite slow." + fi +fi + +if [ -n "${CPUSET_CPUS}" ]; then + CI_DOCKER_EXTRA_PARAMS+=( + "--cpuset-cpus=${CPUSET_CPUS}" + "-e CI_CPUSET_CPUS=${CPUSET_CPUS}" + ) + echo "USING CPUSET_CPUS ${CPUSET_CPUS}" +fi # Pass tvm test data folder through to the docker container, to avoid # repeated downloads. Check if we have permissions to write to the diff --git a/tests/scripts/setup-pytest-env.sh b/tests/scripts/setup-pytest-env.sh index c5c9f059dc3a..51605d4ac358 100755 --- a/tests/scripts/setup-pytest-env.sh +++ b/tests/scripts/setup-pytest-env.sh @@ -32,11 +32,14 @@ export PYTHONPATH="${TVM_PATH}/python" export TVM_PYTEST_RESULT_DIR="${TVM_PATH}/build/pytest-results" mkdir -p "${TVM_PYTEST_RESULT_DIR}" -if [ -n "${CI_PYTEST_NUM_CPUS-}" ]; then - PYTEST_NUM_CPUS=${CI_PYTEST_NUM_CPUS} +if [ -n "${CI_CPUSET_CPUS-}" ]; then + # When --cpuset-cpus has been passed to docker (this is set by docker/bash.sh), + # use all possible CPUs. + PYTEST_NUM_CPUS=$(nproc) else PYTEST_NUM_CPUS=$(nproc) if [ -z "${PYTEST_NUM_CPUS}" ]; then + echo "WARNING: nproc failed; running pytest with only 1 CPU" PYTEST_NUM_CPUS=1 elif [ ${PYTEST_NUM_CPUS} -gt 1 ]; then PYTEST_NUM_CPUS=$(expr ${PYTEST_NUM_CPUS} - 1) # Don't nuke interactive work. @@ -47,12 +50,6 @@ if [ ${PYTEST_NUM_CPUS} -gt 8 ]; then PYTEST_NUM_CPUS=8 # It usually doesn't make sense to launch > 8 workers fi -# DNS: remove after we actually fix up CI_PYTEST_NUM_CPUS in Jenkinsfile -if [ ${PYTEST_NUM_CPUS} -gt 2 ]; then - PYTEST_NUM_CPUS=2 # Fix to 2 CPUs for Jenkins -fi - - function run_pytest() { local extra_args=( ) if [ "$1" == "--parallel" ]; then From 4978c90702fd2d0838e13a7aeab48aa099724f4d Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Wed, 11 Aug 2021 19:03:30 -0700 Subject: [PATCH 09/23] black format --- python/tvm/contrib/nvcc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py index 2a7c0de7be98..4e8fc7c2176a 100644 --- a/python/tvm/contrib/nvcc.py +++ b/python/tvm/contrib/nvcc.py @@ -99,7 +99,8 @@ def compile_cuda(code, target="ptx", arch=None, options=None, path_target=None): # NOTE(areusch): Per https://github.com/lpereira/lwan/issues/106, stdin must be left open. proc = subprocess.Popen( - cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) (out, _) = proc.communicate() From cba935810a55cf321de3d82aafdc74b7e5fca03b Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Wed, 11 Aug 2021 19:16:16 -0700 Subject: [PATCH 10/23] EXECUTOR_NUMBER is indeed 0-based --- docker/bash.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/bash.sh b/docker/bash.sh index 12a1c8457284..7e7068b3e546 100755 --- a/docker/bash.sh +++ b/docker/bash.sh @@ -33,7 +33,7 @@ # With -i, execute interactively. # -set -euo pipefail +set -xeuo pipefail function show_usage() { @@ -305,7 +305,7 @@ if [ -n "${CI+x}" -a -z "${CPUSET_CPUS}" ]; then CI_CPUSET_LOWER_BOUND=0 fi CPUS_PER_EXECUTOR=$(expr "${TOTAL_CPUS}" / "${CI_NUM_EXECUTORS}") - CPUSET_CPUS_LOWER_BOUND=$(expr "${CI_CPUSET_LOWER_BOUND}" + \( "${CPUS_PER_EXECUTOR}" '*' \( "${EXECUTOR_NUMBER}" - 1 \) \) ) + CPUSET_CPUS_LOWER_BOUND=$(expr "${CI_CPUSET_LOWER_BOUND}" + \( "${CPUS_PER_EXECUTOR}" '*' "${EXECUTOR_NUMBER}" \) ) CPUSET_CPUS_UPPER_BOUND=$(expr "${CPUSET_CPUS_LOWER_BOUND}" + "${CPUS_PER_EXECUTOR}" - 1) CPUSET_CPUS="${CPUSET_CPUS_LOWER_BOUND}-${CPUSET_CPUS_UPPER_BOUND}" echo "COMPUTE TOTAL_CPUS=${TOTAL_CPUS} CPUS_PER_EXECUTOR=${CPUS_PER_EXECUTOR} CPUSET_CPUS_LOWER_BOUND=${CPUSET_CPUS_LOWER_BOUND} CPUSET_CPUS_UPPER_BOUND=${CPUSET_CPUS_UPPER_BOUND}" From 4166f90fcf9b05c9a9ff75e62aa003d9d0293367 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Wed, 11 Aug 2021 19:16:24 -0700 Subject: [PATCH 11/23] Use all available ARM cpus --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index b2b84de9877b..418e8965f9d0 100755 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -263,7 +263,7 @@ stage('Build') { ws(per_exec_ws("tvm/build-arm")) { init_git() sh "${docker_run} ${ci_arm} ./tests/scripts/task_config_build_arm.sh" - make(ci_arm, 'build', '-j4') + make(ci_arm, 'build', '-j') pack_lib('arm', tvm_multilib) } } From e853b6a6bdc9db338c8d57662ed7cd8d4779ee9c Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Thu, 12 Aug 2021 09:21:15 -0700 Subject: [PATCH 12/23] actually use --cpuset-cpus... * yeah we're gonna have to think about testing this approach. --- docker/bash.sh | 20 +++++++++++++++----- tests/scripts/task_build.sh | 11 ++++++++++- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/docker/bash.sh b/docker/bash.sh index 7e7068b3e546..4ea71d040a73 100755 --- a/docker/bash.sh +++ b/docker/bash.sh @@ -299,14 +299,19 @@ DOCKER_ENV+=( --env CI_BUILD_HOME="${REPO_MOUNT_POINT}" if [ -n "${CI+x}" -a -z "${CPUSET_CPUS}" ]; then if [ -n "${CI_NUM_EXECUTORS-}" ]; then if [ -n "${CI_CPUSET_LOWER_BOUND-}" -a -n "${CI_CPUSET_UPPER_BOUND-}" ]; then - TOTAL_CPUS=$(expr "${CI_CPUSET_UPPER_BOUND}" - "${CI_CPUSET_LOWER_BOUND}" + 1) + TOTAL_CPUS=$(expr "${CI_CPUSET_UPPER_BOUND}" - "${CI_CPUSET_LOWER_BOUND}" + 1) || /bin/true + if [ "${TOTAL_CPUS}" -lt 1 ]; then + echo "ERROR: computed TOTAL_CPUS=${TOTAL_CPUS} based on CI_CPUSET_{UPPER,LOWER}_BOUND!" + exit 2 + fi else TOTAL_CPUS=$(nproc) CI_CPUSET_LOWER_BOUND=0 fi CPUS_PER_EXECUTOR=$(expr "${TOTAL_CPUS}" / "${CI_NUM_EXECUTORS}") - CPUSET_CPUS_LOWER_BOUND=$(expr "${CI_CPUSET_LOWER_BOUND}" + \( "${CPUS_PER_EXECUTOR}" '*' "${EXECUTOR_NUMBER}" \) ) - CPUSET_CPUS_UPPER_BOUND=$(expr "${CPUSET_CPUS_LOWER_BOUND}" + "${CPUS_PER_EXECUTOR}" - 1) + # NOTE: Expr exit status varies by the computed value (good and bad!). + CPUSET_CPUS_LOWER_BOUND=$(expr "${CI_CPUSET_LOWER_BOUND}" + \( "${CPUS_PER_EXECUTOR}" '*' "${EXECUTOR_NUMBER}" \) ) || /bin/true + CPUSET_CPUS_UPPER_BOUND=$(expr "${CPUSET_CPUS_LOWER_BOUND}" + "${CPUS_PER_EXECUTOR}" - 1) || /bin/true CPUSET_CPUS="${CPUSET_CPUS_LOWER_BOUND}-${CPUSET_CPUS_UPPER_BOUND}" echo "COMPUTE TOTAL_CPUS=${TOTAL_CPUS} CPUS_PER_EXECUTOR=${CPUS_PER_EXECUTOR} CPUSET_CPUS_LOWER_BOUND=${CPUSET_CPUS_LOWER_BOUND} CPUSET_CPUS_UPPER_BOUND=${CPUSET_CPUS_UPPER_BOUND}" else @@ -315,10 +320,15 @@ if [ -n "${CI+x}" -a -z "${CPUSET_CPUS}" ]; then fi fi +echo "CPUSET_CPUS ${CPUSET_CPUS}" if [ -n "${CPUSET_CPUS}" ]; then - CI_DOCKER_EXTRA_PARAMS+=( + CPUSET_CPUS_LOWER_BOUND=$(echo "${CPUSET_CPUS}" | sed -E 's/^([0-9]+)-.*$/\1/g') + CPUSET_CPUS_UPPER_BOUND=$(echo "${CPUSET_CPUS}" | sed -E 's/^.*-([0-9]+)$/\1/g') + CPUSET_NUM_CPUS=$(expr "${CPUSET_CPUS_UPPER_BOUND}" - "${CPUSET_CPUS_LOWER_BOUND}" + 1) || /bin/true + DOCKER_FLAGS+=( "--cpuset-cpus=${CPUSET_CPUS}" - "-e CI_CPUSET_CPUS=${CPUSET_CPUS}" + "--env" "CI_CPUSET_CPUS=${CPUSET_CPUS}" + "--env" "CI_CPUSET_NUM_CPUS=${CPUSET_NUM_CPUS}" ) echo "USING CPUSET_CPUS ${CPUSET_CPUS}" fi diff --git a/tests/scripts/task_build.sh b/tests/scripts/task_build.sh index 845b7153ae20..0ff4a4638841 100755 --- a/tests/scripts/task_build.sh +++ b/tests/scripts/task_build.sh @@ -15,5 +15,14 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +set -eu + export VTA_HW_PATH=`pwd`/3rdparty/vta-hw -cd $1 && cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo && make $2 && cd .. +MAKE_ARG="${2-}" +if [ -n "${CI_CPUSET_NUM_CPUS+x}" -a -z "${MAKE_ARG}" ]; then + MAKE_ARG="-j${CI_CPUSET_NUM_CPUS}" +fi + +cd "$1" +cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo +make "${MAKE_ARG}" From 7261c3e3aef9e735e5c0a90b26a1a1dda9e4fd01 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Thu, 12 Aug 2021 09:37:17 -0700 Subject: [PATCH 13/23] hardcode build -j --- tests/scripts/task_build.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/scripts/task_build.sh b/tests/scripts/task_build.sh index 0ff4a4638841..46dc068dc2d3 100755 --- a/tests/scripts/task_build.sh +++ b/tests/scripts/task_build.sh @@ -15,14 +15,14 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -set -eu +set -eux export VTA_HW_PATH=`pwd`/3rdparty/vta-hw -MAKE_ARG="${2-}" -if [ -n "${CI_CPUSET_NUM_CPUS+x}" -a -z "${MAKE_ARG}" ]; then - MAKE_ARG="-j${CI_CPUSET_NUM_CPUS}" -fi +#MAKE_ARG="${2-}" +#if [ -n "${CI_CPUSET_NUM_CPUS+x}" -a -z "${MAKE_ARG}" ]; then +MAKE_ARG="-j${CI_CPUSET_NUM_CPUS}" +#fi cd "$1" -cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo +#cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo make "${MAKE_ARG}" From c9e170925dd8f421b863572c0e4573856343d4ac Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Thu, 12 Aug 2021 09:56:11 -0700 Subject: [PATCH 14/23] uncomment cmake --- tests/scripts/task_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/task_build.sh b/tests/scripts/task_build.sh index 46dc068dc2d3..9b5141df303c 100755 --- a/tests/scripts/task_build.sh +++ b/tests/scripts/task_build.sh @@ -24,5 +24,5 @@ MAKE_ARG="-j${CI_CPUSET_NUM_CPUS}" #fi cd "$1" -#cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo +cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo make "${MAKE_ARG}" From aea9e1ea3207cd6281cdc0cfae7441a7fabb17ce Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Thu, 12 Aug 2021 12:58:10 -0700 Subject: [PATCH 15/23] remove -j flag from Jenkinsfile since it is useless now --- Jenkinsfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 418e8965f9d0..1fa834ac997c 100755 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -162,7 +162,7 @@ stage("Sanity Check") { // Run make. First try to do an incremental make from a previous workspace in hope to // accelerate the compilation. If something wrong, clean the workspace and then // build from scratch. -def make(docker_type, path, make_flag) { +def make(docker_type, path, make_flag='') { timeout(time: max_time, unit: 'MINUTES') { try { sh "${docker_run} ${docker_type} ./tests/scripts/task_build.sh ${path} ${make_flag}" @@ -206,11 +206,11 @@ stage('Build') { ws(per_exec_ws("tvm/build-gpu")) { init_git() sh "${docker_run} ${ci_gpu} ./tests/scripts/task_config_build_gpu.sh" - make(ci_gpu, 'build', '-j') + make(ci_gpu, 'build') pack_lib('gpu', tvm_multilib) // compiler test sh "${docker_run} ${ci_gpu} ./tests/scripts/task_config_build_gpu_vulkan.sh" - make(ci_gpu, 'build2', '-j') + make(ci_gpu, 'build2') } } }, @@ -240,7 +240,7 @@ stage('Build') { ws(per_exec_ws("tvm/build-wasm")) { init_git() sh "${docker_run} ${ci_wasm} ./tests/scripts/task_config_build_wasm.sh" - make(ci_wasm, 'build', '-j') + make(ci_wasm, 'build') timeout(time: max_time, unit: 'MINUTES') { sh "${docker_run} ${ci_wasm} ./tests/scripts/task_ci_setup.sh" sh "${docker_run} ${ci_wasm} ./tests/scripts/task_web_wasm.sh" @@ -263,7 +263,7 @@ stage('Build') { ws(per_exec_ws("tvm/build-arm")) { init_git() sh "${docker_run} ${ci_arm} ./tests/scripts/task_config_build_arm.sh" - make(ci_arm, 'build', '-j') + make(ci_arm, 'build') pack_lib('arm', tvm_multilib) } } @@ -273,7 +273,7 @@ stage('Build') { ws(per_exec_ws("tvm/build-qemu")) { init_git() sh "${docker_run} ${ci_qemu} ./tests/scripts/task_config_build_qemu.sh" - make(ci_qemu, 'build', '-j') + make(ci_qemu, 'build') timeout(time: max_time, unit: 'MINUTES') { sh "${docker_run} ${ci_qemu} ./tests/scripts/task_ci_setup.sh" sh "${docker_run} ${ci_qemu} ./tests/scripts/task_python_microtvm.sh" From 3b76e389480425aef5d8a1362f3c9fb69d05f94d Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Thu, 12 Aug 2021 12:58:25 -0700 Subject: [PATCH 16/23] clean up dockerfile --- docker/bash.sh | 5 ++++- tests/scripts/setup-pytest-env.sh | 19 ++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/docker/bash.sh b/docker/bash.sh index 4ea71d040a73..9404f99295de 100755 --- a/docker/bash.sh +++ b/docker/bash.sh @@ -320,8 +320,11 @@ if [ -n "${CI+x}" -a -z "${CPUSET_CPUS}" ]; then fi fi -echo "CPUSET_CPUS ${CPUSET_CPUS}" if [ -n "${CPUSET_CPUS}" ]; then + if [ -z "$(echo ${CPUSET_CPUS} | sed -E '/^[0-9]+-[0-9]+$/ p; /.*/ d')" ]; then + echo "error: --cpuset-cpus: must specify in the form -; got ${CPUSET_CPUS}" + exit 2 + fi CPUSET_CPUS_LOWER_BOUND=$(echo "${CPUSET_CPUS}" | sed -E 's/^([0-9]+)-.*$/\1/g') CPUSET_CPUS_UPPER_BOUND=$(echo "${CPUSET_CPUS}" | sed -E 's/^.*-([0-9]+)$/\1/g') CPUSET_NUM_CPUS=$(expr "${CPUSET_CPUS_UPPER_BOUND}" - "${CPUSET_CPUS_LOWER_BOUND}" + 1) || /bin/true diff --git a/tests/scripts/setup-pytest-env.sh b/tests/scripts/setup-pytest-env.sh index 51605d4ac358..2186c5b96029 100755 --- a/tests/scripts/setup-pytest-env.sh +++ b/tests/scripts/setup-pytest-env.sh @@ -32,11 +32,13 @@ export PYTHONPATH="${TVM_PATH}/python" export TVM_PYTEST_RESULT_DIR="${TVM_PATH}/build/pytest-results" mkdir -p "${TVM_PYTEST_RESULT_DIR}" -if [ -n "${CI_CPUSET_CPUS-}" ]; then - # When --cpuset-cpus has been passed to docker (this is set by docker/bash.sh), - # use all possible CPUs. - PYTEST_NUM_CPUS=$(nproc) +if [ -n "${CI_CPUSET_NUM_CPUS-}" ]; then + # When the # of CPUs has been restricted (e.g. when --cpuset-cpus has been passed to docker by + # docker/bash.sh), explicitly use all available CPUs. This environment variable is set by + # docker/bash.sh when it sets --cpuset-cpus. + PYTEST_NUM_CPUS="${CI_CPUSET_NUM_CPUS}" else + # Else attempt to use $(nproc) - 1. PYTEST_NUM_CPUS=$(nproc) if [ -z "${PYTEST_NUM_CPUS}" ]; then echo "WARNING: nproc failed; running pytest with only 1 CPU" @@ -44,10 +46,13 @@ else elif [ ${PYTEST_NUM_CPUS} -gt 1 ]; then PYTEST_NUM_CPUS=$(expr ${PYTEST_NUM_CPUS} - 1) # Don't nuke interactive work. fi -fi -if [ ${PYTEST_NUM_CPUS} -gt 8 ]; then - PYTEST_NUM_CPUS=8 # It usually doesn't make sense to launch > 8 workers + # Don't use >4 CPUs--in general, we only use 4 CPUs in testing, so we want to retain this + # maximum for the purposes of reproducing the CI. You can still override this by setting + # --cpuset-cpus in docker/bash.sh. + if [ ${PYTEST_NUM_CPUS} -gt 4 ]; then + PYTEST_NUM_CPUS=4 + fi fi function run_pytest() { From 1ba5b0a2c7591d5b0f05eddfb4fa44e46fba355b Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Thu, 12 Aug 2021 13:03:46 -0700 Subject: [PATCH 17/23] clean up task_build --- tests/scripts/task_build.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/scripts/task_build.sh b/tests/scripts/task_build.sh index 9b5141df303c..4d39cf7938c5 100755 --- a/tests/scripts/task_build.sh +++ b/tests/scripts/task_build.sh @@ -18,10 +18,10 @@ set -eux export VTA_HW_PATH=`pwd`/3rdparty/vta-hw -#MAKE_ARG="${2-}" -#if [ -n "${CI_CPUSET_NUM_CPUS+x}" -a -z "${MAKE_ARG}" ]; then -MAKE_ARG="-j${CI_CPUSET_NUM_CPUS}" -#fi +MAKE_ARG="${2-}" +if [ -n "${CI_CPUSET_NUM_CPUS+x}" -a -z "${MAKE_ARG}" ]; then + MAKE_ARG="-j${CI_CPUSET_NUM_CPUS}" +fi cd "$1" cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo From 8130e8b8a4864b72783a673cff3f9a30deef2126 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Thu, 12 Aug 2021 14:05:09 -0700 Subject: [PATCH 18/23] fix empty string case --- tests/scripts/task_build.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/scripts/task_build.sh b/tests/scripts/task_build.sh index 4d39cf7938c5..79dabdbb34d3 100755 --- a/tests/scripts/task_build.sh +++ b/tests/scripts/task_build.sh @@ -18,11 +18,15 @@ set -eux export VTA_HW_PATH=`pwd`/3rdparty/vta-hw -MAKE_ARG="${2-}" +MAKE_ARG=( ) +if [ -n "${2+x}" ]; then + MAKE_ARG=( "${2}" ) +fi + if [ -n "${CI_CPUSET_NUM_CPUS+x}" -a -z "${MAKE_ARG}" ]; then - MAKE_ARG="-j${CI_CPUSET_NUM_CPUS}" + MAKE_ARG=( "-j${CI_CPUSET_NUM_CPUS}" ) fi cd "$1" cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -make "${MAKE_ARG}" +make "${MAKE_ARG[@]-}" From 1e0660f1399c0d4f68f9244d1c21d16c507d0cf3 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Thu, 12 Aug 2021 14:23:09 -0700 Subject: [PATCH 19/23] fix again --- tests/scripts/task_build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/scripts/task_build.sh b/tests/scripts/task_build.sh index 79dabdbb34d3..f3b389d2a902 100755 --- a/tests/scripts/task_build.sh +++ b/tests/scripts/task_build.sh @@ -23,10 +23,10 @@ if [ -n "${2+x}" ]; then MAKE_ARG=( "${2}" ) fi -if [ -n "${CI_CPUSET_NUM_CPUS+x}" -a -z "${MAKE_ARG}" ]; then +if [ -n "${CI_CPUSET_NUM_CPUS+x}" -a "a${MAKE_ARG[@]:+b}" == "a" ]; then MAKE_ARG=( "-j${CI_CPUSET_NUM_CPUS}" ) fi cd "$1" cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -make "${MAKE_ARG[@]-}" +make ${MAKE_ARG[@]+"${MAKE_ARG[@]}"} From 18424f39c40f3c0f49e7af55992071fb403b3dd4 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Thu, 12 Aug 2021 14:31:35 -0700 Subject: [PATCH 20/23] =?UTF-8?q?is=20prod=20Jenkins=20too=20old=20to=20se?= =?UTF-8?q?t=20CI=3D=3F=3F=3F?= --- Jenkinsfile | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 1fa834ac997c..f78933a10cf1 100755 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -149,6 +149,9 @@ stage('Prepare') { } stage("Sanity Check") { + environment { + CI = 'true' + } timeout(time: max_time, unit: 'MINUTES') { node('CPU') { ws(per_exec_ws("tvm/sanity")) { @@ -201,6 +204,9 @@ def unpack_lib(name, libs) { } stage('Build') { + environment { + CI = 'true' + } parallel 'BUILD: GPU': { node('GPUBUILD') { ws(per_exec_ws("tvm/build-gpu")) { @@ -285,6 +291,9 @@ stage('Build') { } stage('Unit Test') { + environment { + CI = 'true' + } parallel 'python3: GPU': { node('TensorCore') { ws(per_exec_ws("tvm/ut-python-gpu")) { @@ -345,6 +354,9 @@ stage('Unit Test') { } stage('Integration Test') { + environment { + CI = 'true' + } parallel 'topi: GPU': { node('GPU') { ws(per_exec_ws("tvm/topi-python-gpu")) { @@ -401,6 +413,9 @@ stage('Integration Test') { /* stage('Build packages') { + environment { + CI = 'true' + } parallel 'conda CPU': { node('CPU') { sh "${docker_run} tlcpack/conda-cpu ./conda/build_cpu.sh @@ -418,6 +433,9 @@ stage('Build packages') { */ stage('Deploy') { + environment { + CI = 'true' + } node('doc') { ws(per_exec_ws("tvm/deploy-docs")) { if (env.BRANCH_NAME == "main") { From 392ab8d91a90ba5f5bc23a15e02df11b9481a923 Mon Sep 17 00:00:00 2001 From: electriclilies Date: Wed, 15 Sep 2021 14:56:32 -0700 Subject: [PATCH 21/23] black --- python/tvm/testing/plugin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/tvm/testing/plugin.py b/python/tvm/testing/plugin.py index 8f32beb5d6a8..914e0b11d725 100644 --- a/python/tvm/testing/plugin.py +++ b/python/tvm/testing/plugin.py @@ -289,6 +289,7 @@ def _parametrize_correlated_parameters(metafunc): value_sets = zip(*[values for name, values in params]) metafunc.parametrize(names, value_sets, indirect=True, ids=ids) + from xdist.scheduler.loadscope import LoadScopeScheduling @@ -302,4 +303,4 @@ def _split_scope(self, nodeid): def pytest_xdist_make_scheduler(config, log): - return TvmTestScheduler(config, log) \ No newline at end of file + return TvmTestScheduler(config, log) From 5a1aa0202d390d4c98ef5a45314b27f2bec59b2e Mon Sep 17 00:00:00 2001 From: electriclilies Date: Wed, 15 Sep 2021 15:18:44 -0700 Subject: [PATCH 22/23] Fix lint again --- python/tvm/testing/plugin.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/tvm/testing/plugin.py b/python/tvm/testing/plugin.py index 914e0b11d725..fc9bad05cad9 100644 --- a/python/tvm/testing/plugin.py +++ b/python/tvm/testing/plugin.py @@ -36,6 +36,7 @@ import tvm from tvm.testing import utils +from xdist.scheduler.loadscope import LoadScopeScheduling MARKERS = { @@ -290,9 +291,6 @@ def _parametrize_correlated_parameters(metafunc): metafunc.parametrize(names, value_sets, indirect=True, ids=ids) -from xdist.scheduler.loadscope import LoadScopeScheduling - - class TvmTestScheduler(LoadScopeScheduling): def _split_scope(self, nodeid): # NOTE: test_tvm_testing_features contains parametrization-related tests, and must be From 935c520c495771da8cc77279bc587f146641a4ff Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Wed, 22 Sep 2021 15:45:25 -0700 Subject: [PATCH 23/23] Add pytest timeout --- tests/scripts/setup-pytest-env.sh | 1 + tests/scripts/task_ci_setup.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/scripts/setup-pytest-env.sh b/tests/scripts/setup-pytest-env.sh index 2186c5b96029..49f999d8eaf0 100755 --- a/tests/scripts/setup-pytest-env.sh +++ b/tests/scripts/setup-pytest-env.sh @@ -71,6 +71,7 @@ function run_pytest() { exit 2 fi TVM_FFI=${ffi_type} python3 -m pytest \ + --timeout=480 \ -o "junit_suite_name=${test_suite_name}-${ffi_type}" \ "--junit-xml=${TVM_PYTEST_RESULT_DIR}/${test_suite_name}-${ffi_type}.xml" \ "--junit-prefix=${ffi_type}" \ diff --git a/tests/scripts/task_ci_setup.sh b/tests/scripts/task_ci_setup.sh index 01d5587e70ad..8a77d48edc4d 100755 --- a/tests/scripts/task_ci_setup.sh +++ b/tests/scripts/task_ci_setup.sh @@ -30,7 +30,7 @@ set -o pipefail # echo "Addtiional setup in" ${CI_IMAGE_NAME} -python3 -m pip install --user tlcpack-sphinx-addon==0.2.1 synr==0.4.0 +python3 -m pip install --user tlcpack-sphinx-addon==0.2.1 synr==0.4.0 pytest-timeout # Rebuild standalone_crt in build/ tree. This file is not currently archived by pack_lib() in # Jenkinsfile. We expect config.cmake to be present from pack_lib().