From c960839a382baf509570e61d3877a1b42b292ce9 Mon Sep 17 00:00:00 2001 From: Hans Pabst Date: Wed, 16 Oct 2024 10:21:45 +0200 Subject: [PATCH] #857: conditionally rely on PARALLEL WORKSHARE - Avoid nested parallelism (dbcsr_acc_set_active_device). - Rely on omp_get_level (instead of omp_in_parallel), omp_in_parallel only accounts for active regions. - Avoid IF-condition as part of the WORKSHARE-directive. - Removed (nested) PARALLEL WORKSHARE constructs. --- src/acc/dbcsr_acc_device.F | 13 ++++++++++--- src/data/dbcsr_ptr_util.F | 16 +--------------- src/mpi/dbcsr_mpiwrap.F | 6 ------ src/ops/dbcsr_operations.F | 21 +-------------------- 4 files changed, 12 insertions(+), 44 deletions(-) diff --git a/src/acc/dbcsr_acc_device.F b/src/acc/dbcsr_acc_device.F index 7b4d29f25c6..d9ec94526e3 100644 --- a/src/acc/dbcsr_acc_device.F +++ b/src/acc/dbcsr_acc_device.F @@ -13,6 +13,8 @@ MODULE dbcsr_acc_device #endif #include "base/dbcsr_base_uses.f90" +!$ USE OMP_LIB, ONLY: omp_get_level + IMPLICIT NONE PUBLIC :: dbcsr_acc_get_ndevices, dbcsr_acc_set_active_device, dbcsr_acc_clear_errors @@ -83,11 +85,16 @@ SUBROUTINE dbcsr_acc_set_active_device(device_id) #if defined (__DBCSR_ACC) INTEGER :: istat -!$OMP PARALLEL DEFAULT(NONE) PRIVATE(istat) SHARED(device_id) - istat = acc_set_active_device_cu(device_id) +!$ IF (0 == omp_get_level()) THEN + istat = 0 +!$OMP PARALLEL DEFAULT(NONE) SHARED(device_id) REDUCTION(MAX:istat) + istat = acc_set_active_device_cu(device_id) +!$OMP END PARALLEL +!$ ELSE + istat = acc_set_active_device_cu(device_id) +!$ END IF IF (istat /= 0) & DBCSR_ABORT("dbcsr_acc_set_active_device: failed") -!$OMP END PARALLEL #else MARK_USED(device_id) diff --git a/src/data/dbcsr_ptr_util.F b/src/data/dbcsr_ptr_util.F index eb9d25da071..030eaa16cf5 100644 --- a/src/data/dbcsr_ptr_util.F +++ b/src/data/dbcsr_ptr_util.F @@ -28,8 +28,6 @@ MODULE dbcsr_ptr_util mp_deallocate #include "base/dbcsr_base_uses.f90" -!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads - IMPLICIT NONE PRIVATE @@ -294,15 +292,9 @@ SUBROUTINE mem_copy_${nametype1}$ (dst, src, n) !! length of copy ${type1}$, DIMENSION(1:n), INTENT(OUT) :: dst !! destination memory - ${type1}$, DIMENSION(1:n), INTENT(IN) :: src + ${type1}$, DIMENSION(1:n), INTENT(IN) :: src !! source memory -#if !defined(__DBCSR_DISABLE_WORKSHARE) -!$OMP PARALLEL WORKSHARE DEFAULT(none) SHARED(dst,src) -#endif dst(:) = src(:) -#if !defined(__DBCSR_DISABLE_WORKSHARE) -!$OMP END PARALLEL WORKSHARE -#endif END SUBROUTINE mem_copy_${nametype1}$ SUBROUTINE mem_zero_${nametype1}$ (dst, n) @@ -312,13 +304,7 @@ SUBROUTINE mem_zero_${nametype1}$ (dst, n) !! length of elements to zero ${type1}$, DIMENSION(1:n), INTENT(OUT) :: dst !! destination memory -#if !defined(__DBCSR_DISABLE_WORKSHARE) -!$OMP PARALLEL WORKSHARE DEFAULT(none) SHARED(dst) -#endif dst(:) = ${zero1}$ -#if !defined(__DBCSR_DISABLE_WORKSHARE) -!$OMP END PARALLEL WORKSHARE -#endif END SUBROUTINE mem_zero_${nametype1}$ SUBROUTINE mem_alloc_${nametype1}$ (mem, n, mem_type) diff --git a/src/mpi/dbcsr_mpiwrap.F b/src/mpi/dbcsr_mpiwrap.F index f5393630eb9..a82edf17251 100644 --- a/src/mpi/dbcsr_mpiwrap.F +++ b/src/mpi/dbcsr_mpiwrap.F @@ -5182,13 +5182,7 @@ SUBROUTINE mp_rget_${nametype1}$v(base, source, win, win_data, myproc, disp, req MARK_USED(myproc) #endif IF (do_local_copy) THEN -#if !defined(__DBCSR_DISABLE_WORKSHARE) -!$OMP PARALLEL WORKSHARE DEFAULT(none) SHARED(base,win_data,disp_aint,len) -#endif base(:) = win_data(disp_aint + 1:disp_aint + len) -#if !defined(__DBCSR_DISABLE_WORKSHARE) -!$OMP END PARALLEL WORKSHARE -#endif request = mp_request_null ierr = 0 ELSE diff --git a/src/ops/dbcsr_operations.F b/src/ops/dbcsr_operations.F index e0a59a92e48..44112b8cb23 100644 --- a/src/ops/dbcsr_operations.F +++ b/src/ops/dbcsr_operations.F @@ -94,7 +94,7 @@ MODULE dbcsr_operations mp_sum #include "base/dbcsr_base_uses.f90" -!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads +!$ USE OMP_LIB, ONLY: omp_get_thread_num, omp_get_num_threads IMPLICIT NONE @@ -316,7 +316,6 @@ SUBROUTINE dbcsr_zero(matrix_a) CALL timeset(routineN, handle) SELECT CASE (dbcsr_get_data_type(matrix_a)) -#if defined(__DBCSR_DISABLE_WORKSHARE) CASE (dbcsr_type_complex_4) matrix_a%data_area%d%c_sp = (0.0, 0.0) CASE (dbcsr_type_complex_8) @@ -325,24 +324,6 @@ SUBROUTINE dbcsr_zero(matrix_a) matrix_a%data_area%d%r_sp = 0.0 CASE (dbcsr_type_real_8) matrix_a%data_area%d%r_dp = 0.0_dp -#else - CASE (dbcsr_type_complex_4) -!$OMP PARALLEL WORKSHARE DEFAULT(NONE), SHARED(matrix_a) - matrix_a%data_area%d%c_sp = (0.0, 0.0) -!$OMP END PARALLEL WORKSHARE - CASE (dbcsr_type_complex_8) -!$OMP PARALLEL WORKSHARE DEFAULT(NONE), SHARED(matrix_a) - matrix_a%data_area%d%c_dp = (0.0_dp, 0.0_dp) -!$OMP END PARALLEL WORKSHARE - CASE (dbcsr_type_real_4) -!$OMP PARALLEL WORKSHARE DEFAULT(NONE), SHARED(matrix_a) - matrix_a%data_area%d%r_sp = 0.0 -!$OMP END PARALLEL WORKSHARE - CASE (dbcsr_type_real_8) -!$OMP PARALLEL WORKSHARE DEFAULT(NONE), SHARED(matrix_a) - matrix_a%data_area%d%r_dp = 0.0_dp -!$OMP END PARALLEL WORKSHARE -#endif END SELECT CALL timestop(handle) END SUBROUTINE dbcsr_zero