Skip to content

Commit

Permalink
Merge pull request #70 from ecmwf-ifs/naan-update-field_api
Browse files Browse the repository at this point in the history
Update to new open-source FIELD_API
  • Loading branch information
reuterbal authored Feb 7, 2024
2 parents 75b3911 + d66b5f5 commit 6961e75
Show file tree
Hide file tree
Showing 10 changed files with 331 additions and 370 deletions.
12 changes: 11 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ jobs:
build_flags: '--cloudsc-fortran-pyiface=ON --cloudsc-python-f2py=ON'

# Add nvhpc build configurations with serialbox and HDF5
- arch: nvhpc/21.9
nvhpc_version: 21.9
io_library_flag: ''
build_flags: '--with-gpu --with-loki --cmake="ENABLE_ACC=OFF"'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE
- arch: nvhpc/21.9
nvhpc_version: 21.9
io_library_flag: ''
Expand All @@ -66,6 +71,11 @@ jobs:
build_flags: '--with-gpu --with-loki --with-cuda --with-atlas'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE

- arch: nvhpc/23.5
nvhpc_version: 23.5
io_library_flag: ''
build_flags: '--with-gpu --with-loki --cmake="ENABLE_ACC=OFF"'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE
- arch: nvhpc/23.5
nvhpc_version: 23.5
io_library_flag: ''
Expand Down Expand Up @@ -159,7 +169,7 @@ jobs:

# Run ctest
- name: Run CTest
if: ${{ !( contains(matrix.build_flags, '--single-precision') || (contains(matrix.build_flags, '--with-cuda') && contains(matrix.build_flags, '--with-atlas')) ) }}
if: ${{ !( contains(matrix.build_flags, '--single-precision') || (contains(matrix.build_flags, '--with-cuda') && contains(matrix.arch, 'nvhpc')) ) }}
working-directory: ./build
run: |
source env.sh
Expand Down
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,10 @@ ecbuild_find_package( NAME atlas )
ecbuild_add_option( FEATURE SINGLE_PRECISION
DESCRIPTION "Build CLOUDSC in single precision" DEFAULT OFF
)
set(prec dp)
if( HAVE_SINGLE_PRECISION )
list(APPEND CLOUDSC_DEFINITIONS SINGLE)
set(prec sp)
endif()

# build executables
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ Balthasar Reuter ([email protected])
C version of CLOUDSC including loop fusion and temporary local
array demotion.
- **dwarf-cloudsc-gpu-scc-field**: GPU-enabled and optimized version of
CLOUDSC that uses the SCC loop layout, and uses [FIELD API](https://git.ecmwf.int/projects/RDX/repos/field_api/browse) (a Fortran library purpose-built for IFS data-structures that facilitates the
CLOUDSC that uses the SCC loop layout, and uses [FIELD API](https://github.com/ecmwf-ifs/field_api) (a Fortran library purpose-built for IFS data-structures that facilitates the
creation and management of field objects in scientific code) to perform device offload
and copyback. The intent is to demonstrate the explicit use of pinned host memory to speed-up
data transfers, as provided by the shipped prototype implmentation, and
Expand Down
2 changes: 1 addition & 1 deletion arch/toolchains/github-ubuntu-nvhpc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" )

# NB: We have to add `-mp` again to avoid undefined symbols during linking
# (smells like an Nvidia bug)
set( OpenACC_Fortran_FLAGS "-acc=host -mp" CACHE STRING "" )
set( OpenACC_Fortran_FLAGS "-acc=gpu -mp" CACHE STRING "" )
# Enable this to get more detailed compiler output
# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" )

Expand Down
25 changes: 14 additions & 11 deletions bundle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ cmake : >
BUILD_fckit=OFF
BUILD_atlas=OFF
ENABLE_OMP=ON
ENABLE_SINGLE_PRECISION=OFF
projects :

Expand Down Expand Up @@ -51,6 +52,14 @@ projects :
ECKIT_ENABLE_BUILD_TOOLS=OFF
ECKIT_ENABLE_CUDA=OFF
- field_api :
git : https://github.com/ecmwf-ifs/field_api.git
version : main
optional: true
require : ecbuild
cmake : >
UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module
- fckit :
git : https://github.com/ecmwf/fckit
version : 0.11.0
Expand All @@ -68,16 +77,6 @@ projects :
ATLAS_ENABLE_TESTS=OFF
ATLAS_ENABLE_CUDA=OFF
- field_api :
git : ${BITBUCKET}/rdx/field_api
version : master
optional: true
require : ecbuild
cmake : >
ENABLE_FIELD_API_TESTS=OFF
ENABLE_FIELD_API_FIAT_BUILD=OFF
FIELD_API_UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module
- cloudsc-dwarf :
# The CLOUDSC dwarf project with multiple implementations
dir : $PWD
Expand All @@ -91,8 +90,12 @@ options :
cmake : CMAKE_TOOLCHAIN_FILE={{value}}

- single-precision :
# Disabling DOUBLE_PRECISION only affects field_api
help : Enable single precision build of the dwarf
cmake : ENABLE_SINGLE_PRECISION=ON
cmake : >
ENABLE_SINGLE_PRECISION=ON
ENABLE_DOUBLE_PRECISION=OFF
FIELD_API_DEFINITIONS=SINGLE
- with-gpu :
help : Enable GPU kernels
Expand Down
149 changes: 89 additions & 60 deletions src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@ MODULE CLOUDSC_DRIVER_GPU_SCC_FIELD_MOD
CONTAINS

SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( &
& NUMOMP, NPROMA, NLEV, NGPTOT, NGPBLKS, NGPTOTG, KFLDX, PTSPHY, FIELD_STATE &
& NUMOMP, NPROMA, NLEV, NGPTOT, NGPBLKS, NGPTOTG, KFLDX, PTSPHY, FIELD_STATE, USE_PACKED &
& )
! Driver routine that invokes the optimized CLAW-based CLOUDSC GPU kernel

INTEGER(KIND=JPIM) :: NUMOMP, NPROMA, NLEV, NGPTOT, NGPBLKS, NGPTOTG
INTEGER(KIND=JPIM) :: KFLDX
REAL(KIND=JPRB) :: PTSPHY ! Physics timestep
TYPE(CLOUDSC_FIELD_STATE), INTENT(INOUT) :: FIELD_STATE
LOGICAL, INTENT(IN) :: USE_PACKED

REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PT(:,:,:) ! T at start of callpar
REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PQ(:,:,:) ! Q at start of callpar
Expand Down Expand Up @@ -102,34 +103,41 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( &
! Global timer for the parallel region
CALL TIMER%START(NUMOMP)

CALL FIELD_STATE%F_PT%GET_DEVICE_DATA_RDONLY(PT)
CALL FIELD_STATE%F_PQ%GET_DEVICE_DATA_RDONLY(PQ)
CALL FIELD_STATE%F_PVFA%GET_DEVICE_DATA_RDONLY(PVFA)
CALL FIELD_STATE%F_PVFL%GET_DEVICE_DATA_RDONLY(PVFL)
CALL FIELD_STATE%F_PVFI%GET_DEVICE_DATA_RDONLY(PVFI)
CALL FIELD_STATE%F_PDYNA%GET_DEVICE_DATA_RDONLY(PDYNA)
CALL FIELD_STATE%F_PDYNL%GET_DEVICE_DATA_RDONLY(PDYNL)
CALL FIELD_STATE%F_PDYNI%GET_DEVICE_DATA_RDONLY(PDYNI)
CALL FIELD_STATE%F_PHRSW%GET_DEVICE_DATA_RDONLY(PHRSW)
CALL FIELD_STATE%F_PHRLW%GET_DEVICE_DATA_RDONLY(PHRLW)
CALL FIELD_STATE%F_PVERVEL%GET_DEVICE_DATA_RDONLY(PVERVEL)
CALL FIELD_STATE%F_PAP%GET_DEVICE_DATA_RDONLY(PAP)
IF(USE_PACKED)THEN
CALL FIELD_STATE%DATA_RDONLY%SYNC_DEVICE_RDONLY()
CALL FIELD_STATE%DATA_RWONLY%SYNC_DEVICE_RDWR()
! If this is called then the subsequent FIELD_STATE%FIELDS_RDONLY/RWONLY%PTR%GET_DEVICE_DATA()
! calls don't trigger any data movement, they just return an updated device pointer
ENDIF

CALL FIELD_STATE%FIELDS_RDONLY(1)%PTR%GET_DEVICE_DATA_RDONLY(PT)
CALL FIELD_STATE%FIELDS_RDONLY(2)%PTR%GET_DEVICE_DATA_RDONLY(PQ)
CALL FIELD_STATE%FIELDS_RDONLY(3)%PTR%GET_DEVICE_DATA_RDONLY(PVFA)
CALL FIELD_STATE%FIELDS_RDONLY(4)%PTR%GET_DEVICE_DATA_RDONLY(PVFL)
CALL FIELD_STATE%FIELDS_RDONLY(5)%PTR%GET_DEVICE_DATA_RDONLY(PVFI)
CALL FIELD_STATE%FIELDS_RDONLY(6)%PTR%GET_DEVICE_DATA_RDONLY(PDYNA)
CALL FIELD_STATE%FIELDS_RDONLY(7)%PTR%GET_DEVICE_DATA_RDONLY(PDYNL)
CALL FIELD_STATE%FIELDS_RDONLY(8)%PTR%GET_DEVICE_DATA_RDONLY(PDYNI)
CALL FIELD_STATE%FIELDS_RDONLY(9)%PTR%GET_DEVICE_DATA_RDONLY(PHRSW)
CALL FIELD_STATE%FIELDS_RDONLY(10)%PTR%GET_DEVICE_DATA_RDONLY(PHRLW)
CALL FIELD_STATE%FIELDS_RDONLY(11)%PTR%GET_DEVICE_DATA_RDONLY(PVERVEL)
CALL FIELD_STATE%FIELDS_RDONLY(12)%PTR%GET_DEVICE_DATA_RDONLY(PAP)
CALL FIELD_STATE%F_PAPH%GET_DEVICE_DATA_RDONLY(PAPH)
CALL FIELD_STATE%F_PLSM%GET_DEVICE_DATA_RDONLY(PLSM)
CALL FIELD_STATE%F_LDCUM%GET_DEVICE_DATA_RDONLY(LDCUM)
CALL FIELD_STATE%F_KTYPE%GET_DEVICE_DATA_RDONLY(KTYPE)
CALL FIELD_STATE%F_PLU%GET_DEVICE_DATA_RDONLY(PLU)
CALL FIELD_STATE%F_PSNDE%GET_DEVICE_DATA_RDONLY(PSNDE)
CALL FIELD_STATE%F_PMFU%GET_DEVICE_DATA_RDONLY(PMFU)
CALL FIELD_STATE%F_PMFD%GET_DEVICE_DATA_RDONLY(PMFD)
CALL FIELD_STATE%F_PA%GET_DEVICE_DATA_RDONLY(PA)
CALL FIELD_STATE%FIELDS_RDONLY(13)%PTR%GET_DEVICE_DATA_RDONLY(PLU)
CALL FIELD_STATE%FIELDS_RDONLY(14)%PTR%GET_DEVICE_DATA_RDONLY(PSNDE)
CALL FIELD_STATE%FIELDS_RDONLY(15)%PTR%GET_DEVICE_DATA_RDONLY(PMFU)
CALL FIELD_STATE%FIELDS_RDONLY(16)%PTR%GET_DEVICE_DATA_RDONLY(PMFD)
CALL FIELD_STATE%FIELDS_RDONLY(17)%PTR%GET_DEVICE_DATA_RDONLY(PA)
CALL FIELD_STATE%F_PCLV%GET_DEVICE_DATA_RDONLY(PCLV)
CALL FIELD_STATE%F_PSUPSAT%GET_DEVICE_DATA_RDONLY(PSUPSAT)
CALL FIELD_STATE%F_PLCRIT_AER%GET_DEVICE_DATA_RDONLY(PLCRIT_AER)
CALL FIELD_STATE%F_PICRIT_AER%GET_DEVICE_DATA_RDONLY(PICRIT_AER)
CALL FIELD_STATE%F_PRE_ICE%GET_DEVICE_DATA_RDONLY(PRE_ICE)
CALL FIELD_STATE%F_PCCN%GET_DEVICE_DATA_RDONLY(PCCN)
CALL FIELD_STATE%F_PNICE%GET_DEVICE_DATA_RDONLY(PNICE)
CALL FIELD_STATE%FIELDS_RDONLY(18)%PTR%GET_DEVICE_DATA_RDONLY(PSUPSAT)
CALL FIELD_STATE%FIELDS_RDONLY(19)%PTR%GET_DEVICE_DATA_RDONLY(PLCRIT_AER)
CALL FIELD_STATE%FIELDS_RDONLY(20)%PTR%GET_DEVICE_DATA_RDONLY(PICRIT_AER)
CALL FIELD_STATE%FIELDS_RDONLY(21)%PTR%GET_DEVICE_DATA_RDONLY(PRE_ICE)
CALL FIELD_STATE%FIELDS_RDONLY(22)%PTR%GET_DEVICE_DATA_RDONLY(PCCN)
CALL FIELD_STATE%FIELDS_RDONLY(23)%PTR%GET_DEVICE_DATA_RDONLY(PNICE)
CALL FIELD_STATE%TENDENCY_TMP%F_T%GET_DEVICE_DATA_RDONLY(TEND_TMP_T)
CALL FIELD_STATE%TENDENCY_TMP%F_Q%GET_DEVICE_DATA_RDONLY(TEND_TMP_Q)
CALL FIELD_STATE%TENDENCY_TMP%F_A%GET_DEVICE_DATA_RDONLY(TEND_TMP_A)
Expand All @@ -138,25 +146,40 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( &
CALL FIELD_STATE%F_PLUDE%GET_DEVICE_DATA_RDWR(PLUDE)
CALL FIELD_STATE%F_PCOVPTOT%GET_DEVICE_DATA_RDWR(PCOVPTOT)
CALL FIELD_STATE%F_PRAINFRAC_TOPRFZ%GET_DEVICE_DATA_RDWR(PRAINFRAC_TOPRFZ)
CALL FIELD_STATE%F_PFSQLF%GET_DEVICE_DATA_RDWR(PFSQLF)
CALL FIELD_STATE%F_PFSQIF%GET_DEVICE_DATA_RDWR(PFSQIF)
CALL FIELD_STATE%F_PFCQLNG%GET_DEVICE_DATA_RDWR(PFCQLNG)
CALL FIELD_STATE%F_PFCQNNG%GET_DEVICE_DATA_RDWR(PFCQNNG)
CALL FIELD_STATE%F_PFSQRF%GET_DEVICE_DATA_RDWR(PFSQRF)
CALL FIELD_STATE%F_PFSQSF%GET_DEVICE_DATA_RDWR(PFSQSF)
CALL FIELD_STATE%F_PFCQRNG%GET_DEVICE_DATA_RDWR(PFCQRNG)
CALL FIELD_STATE%F_PFCQSNG%GET_DEVICE_DATA_RDWR(PFCQSNG)
CALL FIELD_STATE%F_PFSQLTUR%GET_DEVICE_DATA_RDWR(PFSQLTUR)
CALL FIELD_STATE%F_PFSQITUR%GET_DEVICE_DATA_RDWR(PFSQITUR)
CALL FIELD_STATE%F_PFPLSL%GET_DEVICE_DATA_RDWR(PFPLSL)
CALL FIELD_STATE%F_PFPLSN%GET_DEVICE_DATA_RDWR(PFPLSN)
CALL FIELD_STATE%F_PFHPSL%GET_DEVICE_DATA_RDWR(PFHPSL)
CALL FIELD_STATE%F_PFHPSN%GET_DEVICE_DATA_RDWR(PFHPSN)
CALL FIELD_STATE%FIELDS_RWONLY(1)%PTR%GET_DEVICE_DATA_RDWR(PFSQLF)
CALL FIELD_STATE%FIELDS_RWONLY(2)%PTR%GET_DEVICE_DATA_RDWR(PFSQIF)
CALL FIELD_STATE%FIELDS_RWONLY(3)%PTR%GET_DEVICE_DATA_RDWR(PFCQLNG)
CALL FIELD_STATE%FIELDS_RWONLY(4)%PTR%GET_DEVICE_DATA_RDWR(PFCQNNG)
CALL FIELD_STATE%FIELDS_RWONLY(5)%PTR%GET_DEVICE_DATA_RDWR(PFSQRF)
CALL FIELD_STATE%FIELDS_RWONLY(6)%PTR%GET_DEVICE_DATA_RDWR(PFSQSF)
CALL FIELD_STATE%FIELDS_RWONLY(7)%PTR%GET_DEVICE_DATA_RDWR(PFCQRNG)
CALL FIELD_STATE%FIELDS_RWONLY(8)%PTR%GET_DEVICE_DATA_RDWR(PFCQSNG)
CALL FIELD_STATE%FIELDS_RWONLY(9)%PTR%GET_DEVICE_DATA_RDWR(PFSQLTUR)
CALL FIELD_STATE%FIELDS_RWONLY(10)%PTR%GET_DEVICE_DATA_RDWR(PFSQITUR)
CALL FIELD_STATE%FIELDS_RWONLY(11)%PTR%GET_DEVICE_DATA_RDWR(PFPLSL)
CALL FIELD_STATE%FIELDS_RWONLY(12)%PTR%GET_DEVICE_DATA_RDWR(PFPLSN)
CALL FIELD_STATE%FIELDS_RWONLY(13)%PTR%GET_DEVICE_DATA_RDWR(PFHPSL)
CALL FIELD_STATE%FIELDS_RWONLY(14)%PTR%GET_DEVICE_DATA_RDWR(PFHPSN)
CALL FIELD_STATE%TENDENCY_LOC%F_T%GET_DEVICE_DATA_RDWR(TEND_LOC_T)
CALL FIELD_STATE%TENDENCY_LOC%F_Q%GET_DEVICE_DATA_RDWR(TEND_LOC_Q)
CALL FIELD_STATE%TENDENCY_LOC%F_A%GET_DEVICE_DATA_RDWR(TEND_LOC_A)
CALL FIELD_STATE%TENDENCY_LOC%F_CLD%GET_DEVICE_DATA_RDWR(TEND_LOC_CLD)
!$acc data copyin(yrecldp)

!$acc data copyin(yrecldp) deviceptr(PT, PQ,TEND_TMP_T,TEND_TMP_Q,&
!$acc & TEND_TMP_A, TEND_TMP_CLD, TEND_LOC_T, TEND_LOC_Q, &
!$acc & TEND_LOC_A, TEND_LOC_CLD, PVFA, PVFL, PVFI, &
!$acc & PDYNA, PDYNL, PDYNI, PHRSW, PHRLW,&
!$acc & PVERVEL, PAP, PAPH,&
!$acc & PLSM, LDCUM, KTYPE, &
!$acc & PLU, PLUDE, PSNDE, PMFU, PMFD,&
!$acc & PA, PCLV, PSUPSAT,&
!$acc & PLCRIT_AER,PICRIT_AER,&
!$acc & PRE_ICE, PCCN, PNICE,&
!$acc & PCOVPTOT, PRAINFRAC_TOPRFZ,&
!$acc & PFSQLF, PFSQIF , PFCQNNG, PFCQLNG,&
!$acc & PFSQRF, PFSQSF , PFCQRNG, PFCQSNG,&
!$acc & PFSQLTUR, PFSQITUR , &
!$acc & PFPLSL, PFPLSN, PFHPSL, PFHPSN)

! Local timer for each thread
TID = GET_THREAD_NUM()
Expand Down Expand Up @@ -198,27 +221,33 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( &

CALL TIMER%THREAD_END(TID)

CALL FIELD_STATE%F_PLUDE%ENSURE_HOST()
CALL FIELD_STATE%F_PCOVPTOT%ENSURE_HOST()
CALL FIELD_STATE%F_PRAINFRAC_TOPRFZ%ENSURE_HOST()
CALL FIELD_STATE%F_PFSQLF%ENSURE_HOST()
CALL FIELD_STATE%F_PFSQIF%ENSURE_HOST()
CALL FIELD_STATE%F_PFCQLNG%ENSURE_HOST()
CALL FIELD_STATE%F_PFCQNNG%ENSURE_HOST()
CALL FIELD_STATE%F_PFSQRF%ENSURE_HOST()
CALL FIELD_STATE%F_PFSQSF%ENSURE_HOST()
CALL FIELD_STATE%F_PFCQRNG%ENSURE_HOST()
CALL FIELD_STATE%F_PFCQSNG%ENSURE_HOST()
CALL FIELD_STATE%F_PFSQLTUR%ENSURE_HOST()
CALL FIELD_STATE%F_PFSQITUR%ENSURE_HOST()
CALL FIELD_STATE%F_PFPLSL%ENSURE_HOST()
CALL FIELD_STATE%F_PFPLSN%ENSURE_HOST()
CALL FIELD_STATE%F_PFHPSL%ENSURE_HOST()
CALL FIELD_STATE%F_PFHPSN%ENSURE_HOST()
CALL FIELD_STATE%TENDENCY_LOC%F_T%ENSURE_HOST()
CALL FIELD_STATE%TENDENCY_LOC%F_Q%ENSURE_HOST()
CALL FIELD_STATE%TENDENCY_LOC%F_A%ENSURE_HOST()
CALL FIELD_STATE%TENDENCY_LOC%F_CLD%ENSURE_HOST()
IF(USE_PACKED)THEN
CALL FIELD_STATE%DATA_RWONLY%SYNC_HOST_RDWR()
! If this is called then the subsequent FIELD_STATE%FIELDS_RWONLY%PTR%SYNC_HOST_RDWR() calls
! don't trigger any data movement
ENDIF

CALL FIELD_STATE%F_PLUDE%SYNC_HOST_RDWR()
CALL FIELD_STATE%F_PCOVPTOT%SYNC_HOST_RDWR()
CALL FIELD_STATE%F_PRAINFRAC_TOPRFZ%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(1)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(2)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(3)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(4)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(5)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(6)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(7)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(8)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(9)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(10)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(11)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(12)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(13)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%FIELDS_RWONLY(14)%PTR%SYNC_HOST_RDWR()
CALL FIELD_STATE%TENDENCY_LOC%F_T%SYNC_HOST_RDWR()
CALL FIELD_STATE%TENDENCY_LOC%F_Q%SYNC_HOST_RDWR()
CALL FIELD_STATE%TENDENCY_LOC%F_A%SYNC_HOST_RDWR()
CALL FIELD_STATE%TENDENCY_LOC%F_CLD%SYNC_HOST_RDWR()

CALL TIMER%END()

Expand Down
5 changes: 4 additions & 1 deletion src/cloudsc_gpu/dwarf_cloudsc_gpu.F90
Original file line number Diff line number Diff line change
Expand Up @@ -285,13 +285,16 @@ PROGRAM DWARF_CLOUDSC
! Call the driver to perform the parallel loop over our kernel
CALL CLOUDSC_DRIVER_GPU_SCC_FIELD( &
& NUMOMP, NPROMA, GLOBAL_STATE%KLEV, NGPTOT, GLOBAL_STATE%NBLOCKS, NGPTOTG, &
& GLOBAL_STATE%KFLDX, GLOBAL_STATE%PTSPHY, GLOBAL_STATE &
& GLOBAL_STATE%KFLDX, GLOBAL_STATE%PTSPHY, GLOBAL_STATE, USE_PACKED &
& )
#endif


! Validate the output against serialized reference data
CALL GLOBAL_STATE%VALIDATE(NPROMA, NGPTOT, NGPTOTG)
#ifdef CLOUDSC_GPU_SCC_FIELD
CALL GLOBAL_STATE%FINALIZE(USE_PACKED)
#endif

! Tear down MPI environment
CALL CLOUDSC_MPI_END()
Expand Down
2 changes: 1 addition & 1 deletion src/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ ecbuild_add_library( TARGET cloudsc-common-lib
$<${HAVE_MPI}:MPI::MPI_Fortran>
$<${HAVE_HDF5}:hdf5::hdf5_fortran>
$<${HAVE_SERIALBOX}:Serialbox::Serialbox_Fortran>
$<${HAVE_FIELD_API}:field_api>
$<${HAVE_FIELD_API}:field_api_${prec}>
)

if( HAVE_CUDA )
Expand Down
Loading

0 comments on commit 6961e75

Please sign in to comment.