Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove ALPAKA_ASSERT_OFFLOAD, introduce ALPAKA_ASSERT_ACC #2199

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions cmake/alpakaCommon.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,12 @@ if(alpaka_DISABLE_VENDOR_RNG)
target_compile_definitions(alpaka INTERFACE "ALPAKA_DISABLE_VENDOR_RNG")
endif()

# Device side assert
option(alpaka_ASSERT_ACC_ENABLE "Enable device side asserts. In case value is OFF device side asserts will be disabled even if NDEBUG is not defined." ON)
if(!alpaka_ASSERT_ACC_ENABLE)
target_compile_definitions(alpaka INTERFACE "ALPAKA_DISABLE_ASSERT_ACC")
endif()

#-------------------------------------------------------------------------------
# Debug output of common variables.
if(${alpaka_DEBUG} GREATER 1)
Expand Down Expand Up @@ -731,9 +737,6 @@ if(alpaka_ACC_SYCL_ENABLE)
endif()

target_compile_definitions(alpaka INTERFACE "ALPAKA_DEBUG=${alpaka_DEBUG}")
if(alpaka_DEBUG_OFFLOAD_ASSUME_HOST)
target_compile_definitions(alpaka INTERFACE "ALPAKA_DEBUG_OFFLOAD_ASSUME_HOST")
endif()

target_compile_definitions(alpaka INTERFACE "ALPAKA_BLOCK_SHARED_DYN_MEMBER_ALLOC_KIB=${alpaka_BLOCK_SHARED_DYN_MEMBER_ALLOC_KIB}")

Expand Down
2 changes: 1 addition & 1 deletion example/bufferCopy/src/bufferCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct TestBufferKernel
for(size_t z = idx[0]; z < data.extent(0); z += gridSize[0])
for(size_t y = idx[1]; y < data.extent(1); y += gridSize[1])
for(size_t x = idx[2]; x < data.extent(2); x += gridSize[2])
ALPAKA_ASSERT_OFFLOAD(
ALPAKA_ASSERT_ACC(
data(z, y, x)
== alpaka::mapIdx<1u>(Vec{z, y, x}, Vec{data.extent(0), data.extent(1), data.extent(2)})[0]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace alpaka
public:
BlockSharedMemDynMember(std::size_t sizeBytes) : m_dynPitch(getPitch(sizeBytes))
{
ALPAKA_ASSERT_OFFLOAD(static_cast<std::uint32_t>(sizeBytes) <= staticAllocBytes());
ALPAKA_ASSERT_ACC(static_cast<std::uint32_t>(sizeBytes) <= staticAllocBytes());
}

auto dynMemBegin() const -> uint8_t*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ namespace alpaka::detail
: m_mem(mem)
, m_capacity(static_cast<std::uint32_t>(capacity))
{
ALPAKA_ASSERT_OFFLOAD((m_mem == nullptr) == (m_capacity == 0u));
ALPAKA_ASSERT_ACC((m_mem == nullptr) == (m_capacity == 0u));
}
#else
BlockSharedMemStMemberImpl(std::uint8_t* mem, std::size_t) : m_mem(mem)
Expand All @@ -52,12 +52,12 @@ namespace alpaka::detail
{
// Add meta data chunk in front of the user data
m_allocdBytes = varChunkEnd<MetaData>(m_allocdBytes);
ALPAKA_ASSERT_OFFLOAD(m_allocdBytes <= m_capacity);
ALPAKA_ASSERT_ACC(m_allocdBytes <= m_capacity);
auto* meta = getLatestVarPtr<MetaData>();

// Allocate variable
m_allocdBytes = varChunkEnd<T>(m_allocdBytes);
ALPAKA_ASSERT_OFFLOAD(m_allocdBytes <= m_capacity);
ALPAKA_ASSERT_ACC(m_allocdBytes <= m_capacity);

// Update meta data with id and offset for the allocated variable.
meta->id = id;
Expand Down Expand Up @@ -87,7 +87,7 @@ namespace alpaka::detail
// Adjust offset to be aligned
std::uint32_t const alignedMetaDataOffset
= varChunkEnd<MetaData>(off) - static_cast<std::uint32_t>(sizeof(MetaData));
ALPAKA_ASSERT_OFFLOAD(
ALPAKA_ASSERT_ACC(
(alignedMetaDataOffset + static_cast<std::uint32_t>(sizeof(MetaData))) <= m_allocdBytes);
auto* metaDataPtr = reinterpret_cast<MetaData*>(m_mem + alignedMetaDataOffset);
off = metaDataPtr->offset;
Expand Down
55 changes: 40 additions & 15 deletions include/alpaka/core/Assert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,47 @@
#include <cassert>
#include <type_traits>

//! The assert can be explicit disabled by defining NDEBUG
#define ALPAKA_ASSERT(...) assert(__VA_ARGS__)

#if defined(ALPAKA_DEBUG_OFFLOAD_ASSUME_HOST) || defined(SYCL_EXT_ONEAPI_ASSERT)
# define ALPAKA_ASSERT_OFFLOAD(EXPRESSION) ALPAKA_ASSERT(EXPRESSION)
#elif defined __AMDGCN__ && (!defined NDEBUG)
# define ALPAKA_ASSERT_OFFLOAD(EXPRESSION) \
do \
{ \
if(!(EXPRESSION)) \
__builtin_trap(); \
} while(false)
//! Macro which expands to a noop.
//! Macro enforces an semicolon after the call.
#define ALPAKA_NOOP(...) \
do \
{ \
} while(false)

//! ALPAKA_ASSERT_ACC_IMPL is an assert-like macro.
//! It can be disabled setting the ALPAKA_DISABLE_ASSERT_ACC preprocessor symbol or the NDEBUG preprocessor symbol.
#if !defined(ALPAKA_DISABLE_ASSERT_ACC)
# define ALPAKA_ASSERT_ACC_IMPL(...) ALPAKA_ASSERT(__VA_ARGS__)
#else
# define ALPAKA_ASSERT_ACC_IMPL(...) ALPAKA_NOOP(__VA_ARGS__)
#endif

//! ALPAKA_ASSERT_ACC is an assert-like macro.
//!
//! In device code for a GPU or SYCL backend it can be disabled setting the ALPAKA_DISABLE_ASSERT_ACC preprocessor
//! symbol or the NDEBUG preprocessor symbol. In device code for a native C++ CPU backend and in host code, it is
//! equivalent to ALPAKA_ASSERT, and can be disabled setting the NDEBUG preprocessor symbol.
#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && defined(__CUDA_ARCH__)
// CUDA device code
# define ALPAKA_ASSERT_ACC(...) ALPAKA_ASSERT_ACC_IMPL(__VA_ARGS__)
#elif defined(ALPAKA_ACC_GPU_HIP_ENABLED) && defined(__HIP_DEVICE_COMPILE__)
// HIP/ROCm device code
# define ALPAKA_ASSERT_ACC(...) ALPAKA_ASSERT_ACC_IMPL(__VA_ARGS__)
#elif defined(ALPAKA_ACC_SYCL_ENABLED) && defined(__SYCL_DEVICE_ONLY__)
// SYCL/oneAPI device code
# if defined(SYCL_EXT_ONEAPI_ASSERT)
# define ALPAKA_ASSERT_ACC(...) ALPAKA_ASSERT_ACC_IMPL(__VA_ARGS__)
# else
# define ALPAKA_ASSERT_ACC(...) ALPAKA_NOOP(__VA_ARGS__)
# endif
// add here any other #elif conditions for non-CPU backends
// ...
#else
# define ALPAKA_ASSERT_OFFLOAD(EXPRESSION) \
do \
{ \
} while(false)
// CPU backend, or host code
# define ALPAKA_ASSERT_ACC(...) ALPAKA_ASSERT(__VA_ARGS__)
#endif

namespace alpaka::core
Expand All @@ -38,7 +63,7 @@ namespace alpaka::core
[[maybe_unused]] TArg const& arg)
{
if constexpr(std::is_signed_v<TArg>)
ALPAKA_ASSERT_OFFLOAD(arg >= 0);
ALPAKA_ASSERT_ACC(arg >= 0);

// Nothing to do for unsigned types.
}
Expand All @@ -63,7 +88,7 @@ namespace alpaka::core
[[maybe_unused]] TRhs const& rhs)
{
if constexpr(std::is_signed_v<TRhs> || (TLhs::value != 0u))
ALPAKA_ASSERT_OFFLOAD(TLhs::value > rhs);
ALPAKA_ASSERT_ACC(TLhs::value > rhs);

// Nothing to do for unsigned types comparing to zero.
}
Expand Down
2 changes: 1 addition & 1 deletion include/alpaka/idx/bt/IdxBtOmp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ namespace alpaka
static auto getIdx(bt::IdxBtOmp<TDim, TIdx> const& /* idx */, TWorkDiv const& workDiv) -> Vec<TDim, TIdx>
{
// We assume that the thread id is positive.
ALPAKA_ASSERT_OFFLOAD(::omp_get_thread_num() >= 0);
ALPAKA_ASSERT_ACC(::omp_get_thread_num() >= 0);
// \TODO: Would it be faster to precompute the index and cache it inside an array?
return mapIdx<TDim::value>(
Vec<DimInt<1u>, TIdx>(static_cast<TIdx>(::omp_get_thread_num())),
Expand Down
4 changes: 2 additions & 2 deletions include/alpaka/warp/WarpGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ namespace alpaka::warp::trait
template<typename T>
static auto shfl(warp::WarpGenericSycl<TDim> const& warp, T value, std::int32_t srcLane, std::int32_t width)
{
ALPAKA_ASSERT_OFFLOAD(width > 0);
ALPAKA_ASSERT_OFFLOAD(srcLane >= 0);
ALPAKA_ASSERT_ACC(width > 0);
ALPAKA_ASSERT_ACC(srcLane >= 0);

/* If width < srcLane the sub-group needs to be split into assumed subdivisions. The first item of each
subdivision has the assumed index 0. The srcLane index is relative to the subdivisions.
Expand Down