Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDF5 support for CUDA/HIP/SYCL variants #71

Merged
merged 9 commits into from
Feb 21, 2024
5 changes: 1 addition & 4 deletions .github/scripts/verify-targets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@ then
if [[ "$build_flags" == *"--with-cuda"* ]]
then
targets+=(dwarf-cloudsc-gpu-scc-cuf dwarf-cloudsc-gpu-scc-cuf-k-caching)
if [[ "$io_library_flag" == "--with-serialbox" ]]
then
targets+=(dwarf-cloudsc-c-cuda dwarf-cloudsc-c-cuda-hoist dwarf-cloudsc-c-cuda-k-caching)
fi
targets+=(dwarf-cloudsc-c-cuda dwarf-cloudsc-c-cuda-hoist dwarf-cloudsc-c-cuda-k-caching)
fi
fi

Expand Down
20 changes: 17 additions & 3 deletions arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,29 @@ module_load cray-mpich/8.1.18
module_load craype/2.7.17
module_load craype-accel-amd-gfx90a
module_load buildtools/22.08
module_load cray-hdf5/1.12.1.5
module_load cray-python/3.9.12.1

### Handling of "magic" cray modules
# 1) Load the cray modules
module_load cray-hdf5/1.12.1.5
# 2) Store variables to locate the packages
_HDF5_ROOT=${CRAY_HDF5_PREFIX}
# 3) Unload the cray modules in reverse order, removing all the magic
module_unload cray-hdf5
# 4) Define variables that CMake introspects
export HDF5_ROOT=${_HDF5_ROOT}

# Export environment variable3s
export MPI_HOME=${MPICH_DIR}
export CC=cc
export CXX=CC
export FC=ftn
export HIPCXX=$(hipconfig --hipclangpath)/clang++

module list

set -x

export CC=cc CXX=CC FC=ftn

# Restore tracing to stored setting
{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null

Expand Down
13 changes: 11 additions & 2 deletions arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,13 @@ set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" )
####################################################################

set( ENABLE_OMP ON CACHE STRING "" )
set( OpenMP_C_FLAGS "-homp" CACHE STRING "" )
set( OpenMP_Fortran_FLAGS "-homp" CACHE STRING "" )
set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" )
set( OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" )

set( OpenMP_C_LIB_NAMES "craymp" )
set( OpenMP_CXX_LIB_NAMES "craymp" )
set( OpenMP_Fortran_LIB_NAMES "craymp" )
set( OpenMP_craymp_LIBRARY "/opt/cray/pe/cce/14.0.2/cce/x86_64/lib/libcraymp.so" )

####################################################################
# OpenACC FLAGS
Expand All @@ -40,3 +45,7 @@ set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed")

set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG")

if(NOT DEFINED CMAKE_HIP_ARCHITECTURES)
set(CMAKE_HIP_ARCHITECTURES gfx90a)
endif()
20 changes: 17 additions & 3 deletions arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,31 @@ module_load cray-mpich/8.1.18
module_load craype/2.7.20
module_load craype-accel-amd-gfx90a
module_load buildtools/23.03
module_load cray-hdf5/1.12.1.5
module_load cray-python/3.9.12.1
module_load Boost/1.81.0-cpeCray-23.03
module_load partition/G

### Handling of "magic" cray modules
# 1) Load the cray modules
module_load cray-hdf5/1.12.1.5
# 2) Store variables to locate the packages
_HDF5_ROOT=${CRAY_HDF5_PREFIX}
# 3) Unload the cray modules in reverse order, removing all the magic
module_unload cray-hdf5
# 4) Define variables that CMake introspects
export HDF5_ROOT=${_HDF5_ROOT}

# Export environment variable3s
export MPI_HOME=${MPICH_DIR}
export CC=cc
export CXX=CC
export FC=ftn
export HIPCXX=$(hipconfig --hipclangpath)/clang++

module list

set -x

export CC=cc CXX=CC FC=ftn

# Restore tracing to stored setting
{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null

Expand Down
2 changes: 1 addition & 1 deletion arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ set( OpenACC_Fortran_FLAGS "-hacc -h acc_model=deep_copy" )
####################################################################

set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -03 -ffast-math")
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
if(NOT DEFINED CMAKE_HIP_ARCHITECTURES)
set(CMAKE_HIP_ARCHITECTURES gfx90a)
endif()

Expand Down
20 changes: 17 additions & 3 deletions arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,30 @@ module_load rocm/5.2.3
module_load buildtools/23.09
module_load Boost/1.82.0-cpeCray-23.09
module_load cray-python/3.10.10
module_load cray-hdf5/1.12.2.7
module_load craype-x86-trento
module_load craype-accel-amd-gfx90a

### Handling of "magic" cray modules
# 1) Load the cray modules
module_load cray-hdf5/1.12.2.7
# 2) Store variables to locate the packages
_HDF5_ROOT=${CRAY_HDF5_PREFIX}
# 3) Unload the cray modules in reverse order, removing all the magic
module_unload cray-hdf5
# 4) Define variables that CMake introspects
export HDF5_ROOT=${_HDF5_ROOT}

# Export environment variable3s
export MPI_HOME=${MPICH_DIR}
export CC=cc
export CXX=CC
export FC=ftn
export HIPCXX=$(hipconfig --hipclangpath)/clang++

module list

set -x

export CC=cc CXX=CC FC=ftn

# Restore tracing to stored setting
{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null

Expand Down
7 changes: 5 additions & 2 deletions arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed")

set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG")

set( GPU_TARGETS "gfx90a" CACHE STRING "" )
# select OpenMP pragma to be used
if(NOT DEFINED CMAKE_HIP_ARCHITECTURES)
set(CMAKE_HIP_ARCHITECTURES gfx90a)
endif()

# select OpenMP pragma to be used
set( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL OFF CACHE BOOL "" )
45 changes: 32 additions & 13 deletions src/cloudsc_cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# Define this dwarf variant as an ECBuild feature
ecbuild_add_option( FEATURE CLOUDSC_C_CUDA
DESCRIPTION "Build the CUDA version of CLOUDSC C using Serialbox" DEFAULT ON
CONDITION Serialbox_FOUND AND HAVE_CUDA
CONDITION (Serialbox_FOUND OR HDF5_FOUND) AND HAVE_CUDA
)

if( HAVE_CLOUDSC_C_CUDA )
Expand All @@ -36,9 +36,12 @@ if( HAVE_CLOUDSC_C_CUDA )
PUBLIC_INCLUDES
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/cloudsc>
PUBLIC_LIBS
Serialbox::Serialbox_C
$<${HAVE_OMP}:OpenMP::OpenMP_C>
PUBLIC_LIBS
$<${HAVE_HDF5}:hdf5::hdf5>
$<${HAVE_SERIALBOX}:Serialbox::Serialbox_C>
$<${HAVE_OMP}:OpenMP::OpenMP_C>
DEFINITIONS
${CLOUDSC_DEFINITIONS}
)

target_include_directories(
Expand Down Expand Up @@ -91,9 +94,12 @@ if( HAVE_CLOUDSC_C_CUDA )
PUBLIC_INCLUDES
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/cloudsc>
PUBLIC_LIBS
Serialbox::Serialbox_C
$<${HAVE_OMP}:OpenMP::OpenMP_C>
PUBLIC_LIBS
$<${HAVE_HDF5}:hdf5::hdf5>
$<${HAVE_SERIALBOX}:Serialbox::Serialbox_C>
$<${HAVE_OMP}:OpenMP::OpenMP_C>
DEFINITIONS
${CLOUDSC_DEFINITIONS}
)

target_include_directories(
Expand Down Expand Up @@ -146,9 +152,12 @@ if( HAVE_CLOUDSC_C_CUDA )
PUBLIC_INCLUDES
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/cloudsc>
PUBLIC_LIBS
Serialbox::Serialbox_C
$<${HAVE_OMP}:OpenMP::OpenMP_C>
PUBLIC_LIBS
$<${HAVE_HDF5}:hdf5::hdf5>
$<${HAVE_SERIALBOX}:Serialbox::Serialbox_C>
$<${HAVE_OMP}:OpenMP::OpenMP_C>
DEFINITIONS
${CLOUDSC_DEFINITIONS}
)

target_include_directories(
Expand Down Expand Up @@ -181,9 +190,19 @@ if( HAVE_CLOUDSC_C_CUDA )
)
###

# Create symlink for the input data
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data )

# Create symlink for the input data
if( HAVE_SERIALBOX )
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data )
endif()

if( HAVE_HDF5 )
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/input.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../input.h5 )
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 )
endif()

else()
ecbuild_info( "Serialbox and/or CUDA not found, disabling CUDA prototype(s)" )
Expand Down
33 changes: 14 additions & 19 deletions src/cloudsc_cuda/cloudsc/cloudsc_validate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ void validate_1d(const char *name, double * v_ref, double * v_field, int nlon, i
/* Computes and prints errors in the "L2 norm sense" */
int b, bsize, jk;
double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp;
double (*field)[nlon] = (double (*)[nlon]) v_field;
double (*reference)[nlon] = (double (*)[nlon]) v_ref;

zminval = +DBL_MAX;
zmaxval = -DBL_MAX;
Expand All @@ -64,14 +62,14 @@ void validate_1d(const char *name, double * v_ref, double * v_field, int nlon, i
for (b = 0; b < nblocks; b++) {
bsize = min(nlon, ngptot - b*nlon); // field block size
for (jk = 0; jk < bsize; jk++) {
zminval = fmin(zminval, field[b][jk]);
zmaxval = fmax(zmaxval, field[b][jk]);
zminval = fmin(zminval, v_field[b*nlon+jk]);
zmaxval = fmax(zmaxval, v_field[b*nlon+jk]);

// Difference against reference result in one-norm sense
zdiff = fabs(field[b][jk] - reference[b][jk]);
zdiff = fabs(v_field[b*nlon+jk] - v_ref[b*nlon+jk]);
zmaxerr = fmax(zmaxerr, zdiff);
zerrsum = zerrsum + zdiff;
zsum = zsum + abs(reference[b][jk]);
zsum = zsum + abs(v_ref[b*nlon+jk]);
}
}
zavgpgp = zerrsum / (double) ngptot;
Expand All @@ -84,8 +82,6 @@ void validate_2d(const char *name, double *v_ref, double *v_field, int nlon, int
/* Computes and prints errors in the "L2 norm sense" */
int b, bsize, jl, jk;
double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp;
double (*field)[nlev][nlon] = (double (*)[nlev][nlon]) v_field;
double (*reference)[nlev][nlon] = (double (*)[nlev][nlon]) v_ref;

zminval = +DBL_MAX;
zmaxval = -DBL_MAX;
Expand All @@ -99,13 +95,14 @@ void validate_2d(const char *name, double *v_ref, double *v_field, int nlon, int
bsize = min(nlon, ngptot - b*nlon); // field block size
for (jl = 0; jl < nlev; jl++) {
for (jk = 0; jk < bsize; jk++) {
zminval = fmin(zminval, field[b][jl][jk]);
zmaxval = fmax(zmaxval, field[b][jl][jk]);
zminval = fmin(zminval, v_field[b*nlev*nlon+jl*nlon+jk]);
zmaxval = fmax(zmaxval, v_field[b*nlev*nlon+jl*nlon+jk]);

// Difference against reference result in one-norm sense
zdiff = fabs(field[b][jl][jk] - reference[b][jl][jk]);
zdiff = fabs(v_field[b*nlev*nlon+jl*nlon+jk] - v_ref[b*nlev*nlon+jl*nlon+jk]);
zmaxerr = fmax(zmaxerr, zdiff);
zerrsum = zerrsum + zdiff;
zsum = zsum + abs(reference[b][jl][jk]);
zsum = zsum + abs(v_ref[b*nlev*nlon+jl*nlon+jk]);
}
}
}
Expand All @@ -120,8 +117,6 @@ void validate_3d(const char *name, double *v_ref, double *v_field, int nlon,
/* Computes and prints errors in the "L2 norm sense" */
int b, bsize, jl, jk, jm;
double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp;
double (*field)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_field;
double (*reference)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_ref;

zminval = +DBL_MAX;
zmaxval = -DBL_MAX;
Expand All @@ -136,18 +131,18 @@ void validate_3d(const char *name, double *v_ref, double *v_field, int nlon,
for (jm = 0; jm < nclv; jm++) {
for (jl = 0; jl < nlev; jl++) {
for (jk = 0; jk < bsize; jk++) {
zminval = fmin(zminval, field[b][jm][jl][jk]);
zmaxval = fmax(zmaxval, field[b][jm][jl][jk]);
zminval = fmin(zminval, v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]);
zmaxval = fmax(zmaxval, v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]);

// Difference against reference result in one-norm sense
zdiff = fabs(field[b][jm][jl][jk] - reference[b][jm][jl][jk]);
zdiff = fabs(v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk] - v_ref[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]);
zmaxerr = fmax(zmaxerr, zdiff);
zerrsum = zerrsum + zdiff;
zsum = zsum + abs(reference[b][jm][jl][jk]);
zsum = zsum + abs(v_ref[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]);
}
}
}
}
}
zavgpgp = zerrsum / (double) ngptot;
print_error(name, zminval, zmaxval, zmaxerr, zerrsum, zsum, zavgpgp, 2);
}
Expand Down
Loading
Loading