Skip to content

Commit

Permalink
Merge pull request #71 from ecmwf-ifs/nams_low_level_gpu_hdf5
Browse files Browse the repository at this point in the history
HDF5 support for CUDA/HIP/SYCL variants
  • Loading branch information
reuterbal authored Feb 21, 2024
2 parents 4bbe68e + 99bb032 commit dcaeba1
Show file tree
Hide file tree
Showing 16 changed files with 1,255 additions and 241 deletions.
5 changes: 1 addition & 4 deletions .github/scripts/verify-targets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@ then
if [[ "$build_flags" == *"--with-cuda"* ]]
then
targets+=(dwarf-cloudsc-gpu-scc-cuf dwarf-cloudsc-gpu-scc-cuf-k-caching)
if [[ "$io_library_flag" == "--with-serialbox" ]]
then
targets+=(dwarf-cloudsc-c-cuda dwarf-cloudsc-c-cuda-hoist dwarf-cloudsc-c-cuda-k-caching)
fi
targets+=(dwarf-cloudsc-c-cuda dwarf-cloudsc-c-cuda-hoist dwarf-cloudsc-c-cuda-k-caching)
fi
fi

Expand Down
20 changes: 17 additions & 3 deletions arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,29 @@ module_load cray-mpich/8.1.18
module_load craype/2.7.17
module_load craype-accel-amd-gfx90a
module_load buildtools/22.08
module_load cray-hdf5/1.12.1.5
module_load cray-python/3.9.12.1

### Handling of "magic" cray modules
# 1) Load the cray modules
module_load cray-hdf5/1.12.1.5
# 2) Store variables to locate the packages
_HDF5_ROOT=${CRAY_HDF5_PREFIX}
# 3) Unload the cray modules in reverse order, removing all the magic
module_unload cray-hdf5
# 4) Define variables that CMake introspects
export HDF5_ROOT=${_HDF5_ROOT}

# Export environment variable3s
export MPI_HOME=${MPICH_DIR}
export CC=cc
export CXX=CC
export FC=ftn
export HIPCXX=$(hipconfig --hipclangpath)/clang++

module list

set -x

export CC=cc CXX=CC FC=ftn

# Restore tracing to stored setting
{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null

Expand Down
13 changes: 11 additions & 2 deletions arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,13 @@ set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" )
####################################################################

set( ENABLE_OMP ON CACHE STRING "" )
set( OpenMP_C_FLAGS "-homp" CACHE STRING "" )
set( OpenMP_Fortran_FLAGS "-homp" CACHE STRING "" )
set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" )
set( OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" )

set( OpenMP_C_LIB_NAMES "craymp" )
set( OpenMP_CXX_LIB_NAMES "craymp" )
set( OpenMP_Fortran_LIB_NAMES "craymp" )
set( OpenMP_craymp_LIBRARY "/opt/cray/pe/cce/14.0.2/cce/x86_64/lib/libcraymp.so" )

####################################################################
# OpenACC FLAGS
Expand All @@ -40,3 +45,7 @@ set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed")

set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG")

if(NOT DEFINED CMAKE_HIP_ARCHITECTURES)
set(CMAKE_HIP_ARCHITECTURES gfx90a)
endif()
20 changes: 17 additions & 3 deletions arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,31 @@ module_load cray-mpich/8.1.18
module_load craype/2.7.20
module_load craype-accel-amd-gfx90a
module_load buildtools/23.03
module_load cray-hdf5/1.12.1.5
module_load cray-python/3.9.12.1
module_load Boost/1.81.0-cpeCray-23.03
module_load partition/G

### Handling of "magic" cray modules
# 1) Load the cray modules
module_load cray-hdf5/1.12.1.5
# 2) Store variables to locate the packages
_HDF5_ROOT=${CRAY_HDF5_PREFIX}
# 3) Unload the cray modules in reverse order, removing all the magic
module_unload cray-hdf5
# 4) Define variables that CMake introspects
export HDF5_ROOT=${_HDF5_ROOT}

# Export environment variable3s
export MPI_HOME=${MPICH_DIR}
export CC=cc
export CXX=CC
export FC=ftn
export HIPCXX=$(hipconfig --hipclangpath)/clang++

module list

set -x

export CC=cc CXX=CC FC=ftn

# Restore tracing to stored setting
{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null

Expand Down
2 changes: 1 addition & 1 deletion arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ set( OpenACC_Fortran_FLAGS "-hacc -h acc_model=deep_copy" )
####################################################################

set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -03 -ffast-math")
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
if(NOT DEFINED CMAKE_HIP_ARCHITECTURES)
set(CMAKE_HIP_ARCHITECTURES gfx90a)
endif()

Expand Down
20 changes: 17 additions & 3 deletions arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,30 @@ module_load rocm/5.2.3
module_load buildtools/23.09
module_load Boost/1.82.0-cpeCray-23.09
module_load cray-python/3.10.10
module_load cray-hdf5/1.12.2.7
module_load craype-x86-trento
module_load craype-accel-amd-gfx90a

### Handling of "magic" cray modules
# 1) Load the cray modules
module_load cray-hdf5/1.12.2.7
# 2) Store variables to locate the packages
_HDF5_ROOT=${CRAY_HDF5_PREFIX}
# 3) Unload the cray modules in reverse order, removing all the magic
module_unload cray-hdf5
# 4) Define variables that CMake introspects
export HDF5_ROOT=${_HDF5_ROOT}

# Export environment variable3s
export MPI_HOME=${MPICH_DIR}
export CC=cc
export CXX=CC
export FC=ftn
export HIPCXX=$(hipconfig --hipclangpath)/clang++

module list

set -x

export CC=cc CXX=CC FC=ftn

# Restore tracing to stored setting
{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null

Expand Down
7 changes: 5 additions & 2 deletions arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed")

set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG")

set( GPU_TARGETS "gfx90a" CACHE STRING "" )
# select OpenMP pragma to be used
if(NOT DEFINED CMAKE_HIP_ARCHITECTURES)
set(CMAKE_HIP_ARCHITECTURES gfx90a)
endif()

# select OpenMP pragma to be used
set( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL OFF CACHE BOOL "" )
45 changes: 32 additions & 13 deletions src/cloudsc_cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# Define this dwarf variant as an ECBuild feature
ecbuild_add_option( FEATURE CLOUDSC_C_CUDA
DESCRIPTION "Build the CUDA version of CLOUDSC C using Serialbox" DEFAULT ON
CONDITION Serialbox_FOUND AND HAVE_CUDA
CONDITION (Serialbox_FOUND OR HDF5_FOUND) AND HAVE_CUDA
)

if( HAVE_CLOUDSC_C_CUDA )
Expand All @@ -36,9 +36,12 @@ if( HAVE_CLOUDSC_C_CUDA )
PUBLIC_INCLUDES
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/cloudsc>
PUBLIC_LIBS
Serialbox::Serialbox_C
$<${HAVE_OMP}:OpenMP::OpenMP_C>
PUBLIC_LIBS
$<${HAVE_HDF5}:hdf5::hdf5>
$<${HAVE_SERIALBOX}:Serialbox::Serialbox_C>
$<${HAVE_OMP}:OpenMP::OpenMP_C>
DEFINITIONS
${CLOUDSC_DEFINITIONS}
)

target_include_directories(
Expand Down Expand Up @@ -91,9 +94,12 @@ if( HAVE_CLOUDSC_C_CUDA )
PUBLIC_INCLUDES
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/cloudsc>
PUBLIC_LIBS
Serialbox::Serialbox_C
$<${HAVE_OMP}:OpenMP::OpenMP_C>
PUBLIC_LIBS
$<${HAVE_HDF5}:hdf5::hdf5>
$<${HAVE_SERIALBOX}:Serialbox::Serialbox_C>
$<${HAVE_OMP}:OpenMP::OpenMP_C>
DEFINITIONS
${CLOUDSC_DEFINITIONS}
)

target_include_directories(
Expand Down Expand Up @@ -146,9 +152,12 @@ if( HAVE_CLOUDSC_C_CUDA )
PUBLIC_INCLUDES
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/cloudsc>
PUBLIC_LIBS
Serialbox::Serialbox_C
$<${HAVE_OMP}:OpenMP::OpenMP_C>
PUBLIC_LIBS
$<${HAVE_HDF5}:hdf5::hdf5>
$<${HAVE_SERIALBOX}:Serialbox::Serialbox_C>
$<${HAVE_OMP}:OpenMP::OpenMP_C>
DEFINITIONS
${CLOUDSC_DEFINITIONS}
)

target_include_directories(
Expand Down Expand Up @@ -181,9 +190,19 @@ if( HAVE_CLOUDSC_C_CUDA )
)
###

# Create symlink for the input data
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data )

# Create symlink for the input data
if( HAVE_SERIALBOX )
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data )
endif()

if( HAVE_HDF5 )
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/input.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../input.h5 )
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 )
endif()

else()
ecbuild_info( "Serialbox and/or CUDA not found, disabling CUDA prototype(s)" )
Expand Down
33 changes: 14 additions & 19 deletions src/cloudsc_cuda/cloudsc/cloudsc_validate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ void validate_1d(const char *name, double * v_ref, double * v_field, int nlon, i
/* Computes and prints errors in the "L2 norm sense" */
int b, bsize, jk;
double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp;
double (*field)[nlon] = (double (*)[nlon]) v_field;
double (*reference)[nlon] = (double (*)[nlon]) v_ref;

zminval = +DBL_MAX;
zmaxval = -DBL_MAX;
Expand All @@ -64,14 +62,14 @@ void validate_1d(const char *name, double * v_ref, double * v_field, int nlon, i
for (b = 0; b < nblocks; b++) {
bsize = min(nlon, ngptot - b*nlon); // field block size
for (jk = 0; jk < bsize; jk++) {
zminval = fmin(zminval, field[b][jk]);
zmaxval = fmax(zmaxval, field[b][jk]);
zminval = fmin(zminval, v_field[b*nlon+jk]);
zmaxval = fmax(zmaxval, v_field[b*nlon+jk]);

// Difference against reference result in one-norm sense
zdiff = fabs(field[b][jk] - reference[b][jk]);
zdiff = fabs(v_field[b*nlon+jk] - v_ref[b*nlon+jk]);
zmaxerr = fmax(zmaxerr, zdiff);
zerrsum = zerrsum + zdiff;
zsum = zsum + abs(reference[b][jk]);
zsum = zsum + abs(v_ref[b*nlon+jk]);
}
}
zavgpgp = zerrsum / (double) ngptot;
Expand All @@ -84,8 +82,6 @@ void validate_2d(const char *name, double *v_ref, double *v_field, int nlon, int
/* Computes and prints errors in the "L2 norm sense" */
int b, bsize, jl, jk;
double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp;
double (*field)[nlev][nlon] = (double (*)[nlev][nlon]) v_field;
double (*reference)[nlev][nlon] = (double (*)[nlev][nlon]) v_ref;

zminval = +DBL_MAX;
zmaxval = -DBL_MAX;
Expand All @@ -99,13 +95,14 @@ void validate_2d(const char *name, double *v_ref, double *v_field, int nlon, int
bsize = min(nlon, ngptot - b*nlon); // field block size
for (jl = 0; jl < nlev; jl++) {
for (jk = 0; jk < bsize; jk++) {
zminval = fmin(zminval, field[b][jl][jk]);
zmaxval = fmax(zmaxval, field[b][jl][jk]);
zminval = fmin(zminval, v_field[b*nlev*nlon+jl*nlon+jk]);
zmaxval = fmax(zmaxval, v_field[b*nlev*nlon+jl*nlon+jk]);

// Difference against reference result in one-norm sense
zdiff = fabs(field[b][jl][jk] - reference[b][jl][jk]);
zdiff = fabs(v_field[b*nlev*nlon+jl*nlon+jk] - v_ref[b*nlev*nlon+jl*nlon+jk]);
zmaxerr = fmax(zmaxerr, zdiff);
zerrsum = zerrsum + zdiff;
zsum = zsum + abs(reference[b][jl][jk]);
zsum = zsum + abs(v_ref[b*nlev*nlon+jl*nlon+jk]);
}
}
}
Expand All @@ -120,8 +117,6 @@ void validate_3d(const char *name, double *v_ref, double *v_field, int nlon,
/* Computes and prints errors in the "L2 norm sense" */
int b, bsize, jl, jk, jm;
double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp;
double (*field)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_field;
double (*reference)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_ref;

zminval = +DBL_MAX;
zmaxval = -DBL_MAX;
Expand All @@ -136,18 +131,18 @@ void validate_3d(const char *name, double *v_ref, double *v_field, int nlon,
for (jm = 0; jm < nclv; jm++) {
for (jl = 0; jl < nlev; jl++) {
for (jk = 0; jk < bsize; jk++) {
zminval = fmin(zminval, field[b][jm][jl][jk]);
zmaxval = fmax(zmaxval, field[b][jm][jl][jk]);
zminval = fmin(zminval, v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]);
zmaxval = fmax(zmaxval, v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]);

// Difference against reference result in one-norm sense
zdiff = fabs(field[b][jm][jl][jk] - reference[b][jm][jl][jk]);
zdiff = fabs(v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk] - v_ref[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]);
zmaxerr = fmax(zmaxerr, zdiff);
zerrsum = zerrsum + zdiff;
zsum = zsum + abs(reference[b][jm][jl][jk]);
zsum = zsum + abs(v_ref[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]);
}
}
}
}
}
zavgpgp = zerrsum / (double) ngptot;
print_error(name, zminval, zmaxval, zmaxerr, zerrsum, zsum, zavgpgp, 2);
}
Expand Down
Loading

0 comments on commit dcaeba1

Please sign in to comment.