diff --git a/buildlib/pr/main.yml b/buildlib/pr/main.yml index 5c6e8186fe1..3a22d761c46 100644 --- a/buildlib/pr/main.yml +++ b/buildlib/pr/main.yml @@ -1,6 +1,6 @@ variables: DOCKER_OPT_VOLUMES: -v /hpc/local:/hpc/local -v /auto/sw_tools:/auto/sw_tools - DOCKER_OPT_IB: --ulimit memlock=-1:-1 --device=/dev/infiniband/ + DOCKER_OPT_IB: --ulimit memlock=-1:-1 --device=/dev/infiniband/ --net=host DOCKER_OPT_GPU: --gpus all $(DOCKER_OPT_IB) DOCKER_OPT_ARGS: --cap-add=SYS_PTRACE @@ -11,10 +11,10 @@ resources: options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) - container: centos7_ib image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos7:5 - options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_IB) --net host + options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_IB) - container: centos7_cuda11 image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos7-mofed5.4-cuda11:1 - options: $(DOCKER_OPT_ARGS) --net=host $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_GPU) + options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_GPU) - container: fedora image: rdmz-harbor.rdmz.labs.mlnx/ucx/fedora33:1 options: $(DOCKER_OPT_ARGS) @@ -168,6 +168,9 @@ resources: - container: ubuntu2004_rocm_5_4_0 image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu2004:rocm_5_4_0 options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) + - container: ubuntu22_cuda12 + image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu22.04-mofed5-cuda12:3 + options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) $(DOCKER_OPT_GPU) - container: ubuntu2204_rocm_6_0_0 image: registry.hub.docker.com/rocm/ucx:rocm-6.0.0 options: $(DOCKER_OPT_ARGS) $(DOCKER_OPT_VOLUMES) @@ -296,11 +299,13 @@ stages: demands: ucx_gpu -equals yes test_perf: 1 container: centos7_cuda11 + valgrind_disable: yes - template: tests.yml parameters: name: new demands: ucx_new -equals yes test_perf: 1 + valgrind_disable: yes - template: tests.yml parameters: name: roce @@ -403,6 +408,19 @@ stages: jobs: - template: cuda/cuda.yml + + - stage: AddressSanitizer + dependsOn: [Static_check] + jobs: + - template: tests.yml + parameters: + name: gpu + demands: ucx_gpu -equals yes + test_perf: 0 + container: ubuntu22_cuda12 + asan_check: yes + valgrind_disable: yes + # - stage: Cuda_compatible # dependsOn: [Static_check] # jobs: diff --git a/buildlib/pr/tests.yml b/buildlib/pr/tests.yml index ba58ef4474d..185045a050f 100644 --- a/buildlib/pr/tests.yml +++ b/buildlib/pr/tests.yml @@ -5,6 +5,7 @@ parameters: name: subtest container: proto_enable: yes + asan_check: no jobs: - job: tests_${{ parameters.name }} @@ -46,5 +47,6 @@ jobs: RUN_TESTS: yes JENKINS_TEST_PERF: ${{ parameters.test_perf }} PROTO_ENABLE: ${{ parameters.proto_enable }} + ASAN_CHECK: ${{ parameters.asan_check }} JENKINS_NO_VALGRIND: ${{ parameters.valgrind_disable }} RUNNING_IN_AZURE: yes diff --git a/config/m4/compiler.m4 b/config/m4/compiler.m4 index 6aa3360e76a..5a0cba8062a 100644 --- a/config/m4/compiler.m4 +++ b/config/m4/compiler.m4 @@ -424,8 +424,8 @@ AS_IF([test "x$enable_asan" = xyes], [-fsanitize=address -fno-omit-frame-pointer], [AC_LANG_SOURCE([[int main(int argc, char** argv){return 0;}]])], [AS_MESSAGE([compiling with sanitizer]) - BASE_CXXFLAGS="-fsanitize=address -fno-omit-frame-pointer $BASE_CXXFLAGS" - LDFLAGS="-fsanitize=address -fno-omit-frame-pointer $LDFLAGS"], + BASE_CXXFLAGS="-fsanitize=address -static-libasan -fno-omit-frame-pointer $BASE_CXXFLAGS" + LDFLAGS="-fsanitize=address -static-libasan -fno-omit-frame-pointer $LDFLAGS"], [AC_MSG_ERROR([ASAN check is requested but not supported. Check libasan package existance])]) AC_RUN_IFELSE([AC_LANG_PROGRAM([[#include ]], diff --git a/contrib/lsan.supp b/contrib/lsan.supp new file mode 100644 index 00000000000..845ac94ab5b --- /dev/null +++ b/contrib/lsan.supp @@ -0,0 +1 @@ +leak:libcuda diff --git a/contrib/test_jenkins.sh b/contrib/test_jenkins.sh index 265e9994a35..3e74bdb5cde 100755 --- a/contrib/test_jenkins.sh +++ b/contrib/test_jenkins.sh @@ -1050,10 +1050,7 @@ run_release_mode_tests() { test_ucm_hooks } -# -# Run all tests -# -run_tests() { +set_ucx_common_test_env() { export UCX_HANDLE_ERRORS=bt export UCX_ERROR_SIGNALS=SIGILL,SIGSEGV,SIGBUS,SIGFPE,SIGPIPE,SIGABRT export UCX_TCP_PORT_RANGE="$((33000 + EXECUTOR_NUMBER * 1000))-$((33999 + EXECUTOR_NUMBER * 1000))" @@ -1063,6 +1060,14 @@ run_tests() { export UCX_IB_ROCE_LOCAL_SUBNET=y export UCX_IB_ROCE_SUBNET_PREFIX_LEN=inf + export LSAN_OPTIONS=suppressions=${WORKSPACE}/contrib/lsan.supp + export ASAN_OPTIONS=protect_shadow_gap=0 +} + +# +# Run all tests +# +run_tests() { export UCX_PROTO_REQUEST_RESET=y # load cuda env only if GPU available for remaining tests @@ -1102,15 +1107,6 @@ run_tests() { } run_test_proto_disable() { - export UCX_HANDLE_ERRORS=bt - export UCX_ERROR_SIGNALS=SIGILL,SIGSEGV,SIGBUS,SIGFPE,SIGPIPE,SIGABRT - export UCX_TCP_PORT_RANGE="$((33000 + EXECUTOR_NUMBER * 1000))-$((33999 + EXECUTOR_NUMBER * 1000))" - export UCX_TCP_CM_REUSEADDR=y - - # Don't cross-connect RoCE devices - export UCX_IB_ROCE_LOCAL_SUBNET=y - export UCX_IB_ROCE_SUBNET_PREFIX_LEN=inf - # build for devel tests and gtest build devel --enable-gtest @@ -1120,14 +1116,23 @@ run_test_proto_disable() { run_gtest "default" } +run_asan_check() { + build devel --enable-gtest --enable-asan --without-valgrind + run_gtest "default" +} + prepare try_load_cuda_env if [ -n "$JENKINS_RUN_TESTS" ] || [ -n "$RUN_TESTS" ] then check_machine + set_ucx_common_test_env + if [[ "$PROTO_ENABLE" == "no" ]]; then run_test_proto_disable + elif [[ "$ASAN_CHECK" == "yes" ]]; then + run_asan_check else run_tests fi diff --git a/src/ucm/util/sys.h b/src/ucm/util/sys.h index bbdc1e7550f..ab196ebe129 100644 --- a/src/ucm/util/sys.h +++ b/src/ucm/util/sys.h @@ -121,15 +121,11 @@ pid_t ucm_get_tid(); static UCS_F_ALWAYS_INLINE ucm_mmap_hook_mode_t ucm_get_hook_mode(ucm_mmap_hook_mode_t config_mode) { -#ifdef __SANITIZE_ADDRESS__ - return UCM_MMAP_HOOK_NONE; -#else if (RUNNING_ON_VALGRIND && (config_mode == UCM_MMAP_HOOK_BISTRO)) { return UCM_MMAP_HOOK_RELOC; } return config_mode; -#endif } diff --git a/test/gtest/Makefile.am b/test/gtest/Makefile.am index f560cad0c20..e76749c232d 100644 --- a/test/gtest/Makefile.am +++ b/test/gtest/Makefile.am @@ -27,6 +27,9 @@ export UCX_HANDLE_ERRORS export UCX_LOG_LEVEL export UCX_LOG_PRINT_ENABLE +export LSAN_OPTIONS=suppressions=$(top_srcdir)/contrib/lsan.supp +export ASAN_OPTIONS=protect_shadow_gap=0 + GTEST_ARGS = \ --gtest_filter=$(GTEST_FILTER) \ $(GTEST_EXTRA_ARGS) diff --git a/test/gtest/common/test_helpers.cc b/test/gtest/common/test_helpers.cc index 4e4662d2fcc..eb218b57c95 100644 --- a/test/gtest/common/test_helpers.cc +++ b/test/gtest/common/test_helpers.cc @@ -324,12 +324,16 @@ void analyze_test_results() int test_time_multiplier() { int factor = 1; -#if _BullseyeCoverage - factor *= 10; -#endif if (RUNNING_ON_VALGRIND) { factor *= 20; } +#if _BullseyeCoverage + factor *= 10; +#endif +#ifdef __SANITIZE_ADDRESS__ + factor *= 20; +#endif + return factor; } diff --git a/test/gtest/uct/ib/test_cqe_zipping.cc b/test/gtest/uct/ib/test_cqe_zipping.cc index 83697111fde..a67c8a6c605 100644 --- a/test/gtest/uct/ib/test_cqe_zipping.cc +++ b/test/gtest/uct/ib/test_cqe_zipping.cc @@ -75,6 +75,7 @@ class test_cqe_zipping : public test_uct_ib_with_specific_port { test_uct_ib::init(); if (!check_cqe_zip_caps()) { + cleanup(); UCS_TEST_SKIP_R("unsupported"); } diff --git a/test/gtest/uct/test_md.cc b/test/gtest/uct/test_md.cc index e4c00728923..fe3cc46ab3b 100644 --- a/test/gtest/uct/test_md.cc +++ b/test/gtest/uct/test_md.cc @@ -1084,13 +1084,15 @@ UCS_TEST_SKIP_COND_P(test_md_fork, fork, ASSERT_EQ(pid, waitpid(pid, &child_status, 0)); EXPECT_TRUE(WIFEXITED(child_status)) << ucs::exit_status_info(child_status); +#ifndef __SANITIZE_ADDRESS__ if (!RUNNING_ON_VALGRIND) { - /* Under valgrind, leaks are possible due to early exit, so don't expect - * an exit status of 0 + /* Under valgrind or ASAN, leaks are possible due to early exit, + * so don't expect an exit status of 0 */ EXPECT_EQ(0, WEXITSTATUS(child_status)) << ucs::exit_status_info(child_status); } +#endif free(page); }