Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introducing SYCL implementations/variants #64

Merged
merged 13 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ if ( HAVE_HIP )
find_package(hip REQUIRED)
endif()

ecbuild_add_option( FEATURE SYCL
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be linked to a check that SYCL is actually available.

We could do this adding REQUIRED_PACKAGES "IntelSYCL" (see https://www.intel.com/content/www/us/en/docs/dpcpp-cpp-compiler/developer-guide-reference/2024-0/use-cmake-with-the-compiler.html).

If we want to make this compatible with other SYCL implementations (e.g., whatever OpenSYCL is called these days), we could use this pattern:

find_package(IntelSYCL QUIET)
if(NOT IntelSYCL_FOUND)
    find_package(IntelDPCPP QUIET)
endif()
find_package(AdaptiveCpp QUIET)
ecbuild_add_option( FEATURE SYCL
    DESCRIPTION "SYCL" DEFAULT OFF
    CONDITION IntelSYCL_FOUND OR IntelDPCPP_FOUND OR AdaptiveCpp_FOUND )

DESCRIPTION "SYCL" DEFAULT OFF)

### OpenMP
ecbuild_add_option( FEATURE OMP
DESCRIPTION "OpenMP" DEFAULT ON
Expand Down
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,22 @@ Isambard. A set of arch and toolchain files and detailed installation
and run instructions are provided
[here](https://confluence.ecmwf.int/display/~nabr/3rd+Isambard+Hackathon).

### SYCL version of CLOUDSC

A preliminary SYCL code variant has been added and tested with a custom
DPCPP install on ECMWF's AC partition. To build this, please use the
SYCL-specific environment setups:

```
./cloudsc-bundle build --clean --build-dir=build-sycl --with-gpu --with-sycl --with-serialbox --arch=arch/ecmwf/hpc2020/intel-sycl/2021.4.0

# Then run with
cd build-sycl && . env.sh
./bin/dwarf-cloudsc-scc-sycl 1 240000 128
./bin/dwarf-cloudsc-scc-hoist-sycl 1 240000 128
./bin/dwarf-cloudsc-scc-k-caching-sycl 1 240000 128
```

## Running and testing

The different prototype variants of the dwarf create different binaries that
Expand Down
48 changes: 48 additions & 0 deletions arch/ecmwf/hpc2020/intel-sycl/2021.4.0/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# (C) Copyright 1988- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

# Source me to get the correct configure/build/run environment

# Store tracing and disable (module is *way* too verbose)
{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null

module_load() {
echo "+ module load $1"
module load $1
}
module_unload() {
echo "+ module unload $1"
module unload $1
}

# Unload all modules to be certain
module_unload intel
module_unload openmpi
module_unload hpcx-openmpi
module_unload boost
module_unload hdf5
module_unload cmake
module_unload python3
module_unload java

# Load modules
module_load prgenv/intel
module_load intel/2021.4.0
module_load hpcx-openmpi/2.10.0
module_load boost/1.71.0
module_load hdf5/1.10.6
module_load cmake/3.20.2
module_load python3/3.8.8-01
module_load java/11.0.6

set -x

# Restore tracing to stored setting
{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null

export ECBUILD_TOOLCHAIN="./toolchain.cmake"
1 change: 1 addition & 0 deletions arch/ecmwf/hpc2020/intel-sycl/2021.4.0/toolchain.cmake
163 changes: 163 additions & 0 deletions arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# (C) Copyright 1988- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

####################################################################
# ARCHITECTURE
####################################################################

set( EC_HAVE_C_INLINE 1 )
set( EC_HAVE_FUNCTION_DEF 1 )
set( EC_HAVE_CXXABI_H 1 )
set( EC_HAVE_CXX_BOOL 1 )
set( EC_HAVE_CXX_SSTREAM 1 )
set( EC_HAVE_CXX_INT_128 0 )
set( CMAKE_SIZEOF_VOID_P 8 )
set( EC_SIZEOF_PTR 8 )
set( EC_SIZEOF_CHAR 1 )
set( EC_SIZEOF_SHORT 2 )
set( EC_SIZEOF_INT 4 )
set( EC_SIZEOF_LONG 8 )
set( EC_SIZEOF_LONG_LONG 8 )
set( EC_SIZEOF_FLOAT 4 )
set( EC_SIZEOF_DOUBLE 8 )
set( EC_SIZEOF_LONG_DOUBLE 8 )
set( EC_SIZEOF_SIZE_T 8 )
set( EC_SIZEOF_SSIZE_T 8 )
set( EC_SIZEOF_OFF_T 8 )
set( EC_BIG_ENDIAN 0 )
set( EC_LITTLE_ENDIAN 1 )
set( IEEE_BE 0 )
set( IEEE_LE 1 )
set( EC_HAVE_FSEEK 1 )
set( EC_HAVE_FSEEKO 1 )
set( EC_HAVE_FTELLO 1 )
set( EC_HAVE_LSEEK 0 )
set( EC_HAVE_FTRUNCATE 0 )
set( EC_HAVE_OPEN 0 )
set( EC_HAVE_FOPEN 1 )
set( EC_HAVE_FMEMOPEN 1 )
set( EC_HAVE_FUNOPEN 0 )
set( EC_HAVE_FLOCK 1 )
set( EC_HAVE_MMAP 1 )
set( EC_HAVE_POSIX_MEMALIGN 1 )
set( EC_HAVE_F_GETLK 1 )
set( EC_HAVE_F_SETLK 1 )
set( EC_HAVE_F_SETLKW 1 )
set( EC_HAVE_F_GETLK64 1 )
set( EC_HAVE_F_SETLK64 1 )
set( EC_HAVE_F_SETLKW64 1 )
set( EC_HAVE_MAP_ANONYMOUS 1 )
set( EC_HAVE_MAP_ANON 1 )
set( EC_HAVE_ASSERT_H 1 )
set( EC_HAVE_STDLIB_H 1 )
set( EC_HAVE_UNISTD_H 1 )
set( EC_HAVE_STRING_H 1 )
set( EC_HAVE_STRINGS_H 1 )
set( EC_HAVE_SYS_STAT_H 1 )
set( EC_HAVE_SYS_TIME_H 1 )
set( EC_HAVE_SYS_TYPES_H 1 )
set( EC_HAVE_MALLOC_H 1 )
set( EC_HAVE_SYS_MALLOC_H 0 )
set( EC_HAVE_SYS_PARAM_H 1 )
set( EC_HAVE_SYS_MOUNT_H 1 )
set( EC_HAVE_SYS_VFS_H 1 )
set( EC_HAVE_OFFT 1 )
set( EC_HAVE_OFF64T 1 )
set( EC_HAVE_STRUCT_STAT 1 )
set( EC_HAVE_STRUCT_STAT64 1 )
set( EC_HAVE_STAT 1 )
set( EC_HAVE_STAT64 1 )
set( EC_HAVE_FSTAT 1 )
set( EC_HAVE_FSTAT64 1 )
set( EC_HAVE_FSEEKO64 1 )
set( EC_HAVE_FTELLO64 1 )
set( EC_HAVE_LSEEK64 1 )
set( EC_HAVE_OPEN64 1 )
set( EC_HAVE_FOPEN64 1 )
set( EC_HAVE_FTRUNCATE64 1 )
set( EC_HAVE_FLOCK64 1 )
set( EC_HAVE_MMAP64 1 )
set( EC_HAVE_STRUCT_STATVFS 1 )
set( EC_HAVE_STRUCT_STATVFS64 1 )
set( EC_HAVE_FOPENCOOKIE 1 )
set( EC_HAVE_FSYNC 1 )
set( EC_HAVE_FDATASYNC 1 )
set( EC_HAVE_DIRFD 1 )
set( EC_HAVE_SYSPROC 0 )
set( EC_HAVE_SYSPROCFS 1 )
set( EC_HAVE_EXECINFO_BACKTRACE 1 )
set( EC_HAVE_GMTIME_R 1 )
set( EC_HAVE_GETPWUID_R 1 )
set( EC_HAVE_GETPWNAM_R 1 )
set( EC_HAVE_READDIR_R 1 )
set( EC_HAVE_DIRENT_D_TYPE 1 )
set( EC_HAVE_GETHOSTBYNAME_R 1 )
set( EC_HAVE_ATTRIBUTE_CONSTRUCTOR 1 )
set( EC_ATTRIBUTE_CONSTRUCTOR_INITS_ARGV 0 )
set( EC_HAVE_PROCFS 1 )
set( EC_HAVE_DLFCN_H 1 )
set( EC_HAVE_DLADDR 1 )
set( EC_HAVE_AIOCB 1 )
set( EC_HAVE_AIOCB64 1 )

# Disable relative rpaths as aprun does not respect it
set( ENABLE_RELATIVE_RPATHS OFF CACHE STRING "Disable relative rpaths" FORCE )
MichaelSt98 marked this conversation as resolved.
Show resolved Hide resolved

####################################################################
# COMPILER
####################################################################

set( ECBUILD_FIND_MPI ON )
set( ECBUILD_TRUST_FLAGS ON )
MichaelSt98 marked this conversation as resolved.
Show resolved Hide resolved

####################################################################
# Compiler FLAGS
####################################################################

# General Flags (add to default)

set(ECBUILD_Fortran_FLAGS "-g")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -qopenmp-threadprivate compat")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -assume byterecl")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -convert big_endian")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -traceback")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -align array64byte")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -warn nounused,nouncalled")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -march=core-avx2")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-functions")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-limit=1500")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Winline")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -no-fma")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -assume realloc_lhs")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fp-model precise")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ftz")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fp-speculation=safe")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fast-transcendentals")

####################################################################
# Additional compiler flags for SYCL offload via CUDA backend
####################################################################

# Additional Intel DPCPP compiler for SYCL offload
set(CMAKE_CXX_COMPILER "/home/nams/opt/dpcpp/bin/clang++")

# Initial set of flags to things going with a custom DPCPP install on AC
set(CMAKE_CXX_FLAGS "-O3 -L/home/nams/opt/dpcpp/lib -fopenmp -lstdc++")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-early-optimizations -fsycl")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --cuda-gpu-arch=sm_80")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/apps/intel/2021.4.0/compiler/2021.4.0/linux/compiler/include")
Comment on lines +43 to +47
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think since we're now using add_sycl_to_target, the proper way would be to add the relevant ones to SYCL_FLAGS only. But because we're high-jacking the CMake module from a newer installation, I found it tricky to get to work reliably. So, just as an FYI and not necessary to change anything here.


####################################################################
# LINK FLAGS
####################################################################

set( ECBUILD_SHARED_LINKER_FLAGS "-Wl,--eh-frame-hdr " )
set( ECBUILD_MODULE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Wl,-Map,loadmap" )
set( ECBUILD_EXE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Wl,-Map,loadmap -Wl,--as-needed" )
set( ECBUILD_CXX_IMPLICIT_LINK_LIBRARIES "${LIBCRAY_CXX_RTS}" CACHE STRING "" )
MichaelSt98 marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 6 additions & 0 deletions bundle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ options :
cmake: >
ENABLE_HIP=ON

- with-sycl :
help: Enable GPU kernel variant based on SYCL
cmake: >
ENABLE_SYCL=ON
ENABLE_CLOUDSC_SYCL=ON
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The second is redundant, it should automatically be switched on when SYCL and Serialbox are enabled due to it being DEFAULT ON


- with-mpi :
help : Enable MPI-parallel kernel
cmake : ENABLE_MPI=ON
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@ add_subdirectory(cloudsc_python)
add_subdirectory(cloudsc_c)
add_subdirectory(cloudsc_cuda)
add_subdirectory(cloudsc_hip)
add_subdirectory(cloudsc_sycl)
add_subdirectory(cloudsc_gpu)
add_subdirectory(cloudsc_loki)
Loading
Loading