Skip to content

Commit

Permalink
Replace cub::Traits by numeric_limits and deprecate
Browse files Browse the repository at this point in the history
* Consistently use ::cuda::std::numeric_limits in CUB

Fixes: NVIDIA#3381
  • Loading branch information
bernhardmgruber committed Jan 29, 2025
1 parent 86457ca commit 55c40a9
Show file tree
Hide file tree
Showing 45 changed files with 525 additions and 561 deletions.
27 changes: 2 additions & 25 deletions c2h/generators.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
#include <thrust/scan.h>
#include <thrust/tabulate.h>

#include <cuda/std/type_traits>
#include <cuda/type_traits>

#include <cstdint>

Expand Down Expand Up @@ -118,30 +118,7 @@ private:
c2h::device_vector<float> m_distribution;
};

// TODO(bgruber): modelled after cub::Traits. We should generalize this somewhere into libcu++.
template <typename T>
struct is_floating_point : ::cuda::std::is_floating_point<T>
{};
#ifdef _CCCL_HAS_NVFP16
template <>
struct is_floating_point<__half> : ::cuda::std::true_type
{};
#endif // _CCCL_HAS_NVFP16
#ifdef _CCCL_HAS_NVBF16
template <>
struct is_floating_point<__nv_bfloat16> : ::cuda::std::true_type
{};
#endif // _CCCL_HAS_NVBF16
#ifdef __CUDA_FP8_TYPES_EXIST__
template <>
struct is_floating_point<__nv_fp8_e4m3> : ::cuda::std::true_type
{};
template <>
struct is_floating_point<__nv_fp8_e5m2> : ::cuda::std::true_type
{};
#endif // __CUDA_FP8_TYPES_EXIST__

template <typename T, bool = is_floating_point<T>::value>
template <typename T, bool = ::cuda::is_floating_point_v<T>>
struct random_to_item_t
{
float m_min;
Expand Down
43 changes: 30 additions & 13 deletions c2h/include/c2h/bfloat16.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,10 @@ struct bfloat16_t
}
};

#ifdef __GNUC__
# pragma GCC diagnostic pop
#endif

/******************************************************************************
* I/O stream overloads
******************************************************************************/
Expand All @@ -229,28 +233,41 @@ inline std::ostream& operator<<(std::ostream& out, const __nv_bfloat16& x)
}

/******************************************************************************
* Traits overloads
* traits and limits
******************************************************************************/

_LIBCUDACXX_BEGIN_NAMESPACE_STD
template <>
struct CUB_NS_QUALIFIER::FpLimits<bfloat16_t>
struct __is_extended_floating_point<bfloat16_t> : true_type
{};
#ifndef _CCCL_NO_INLINE_VARIABLES
template <>
_CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<bfloat16_t> = true;
#endif // _CCCL_NO_INLINE_VARIABLES

template <>
class __numeric_limits_impl<bfloat16_t, __numeric_limits_type::__floating_point>
{
static __host__ __device__ __forceinline__ bfloat16_t Max()
public:
static __host__ __device__ __forceinline__ bfloat16_t max()
{
return bfloat16_t(numeric_limits<__nv_bfloat16>::max());
}

static __host__ __device__ __forceinline__ bfloat16_t min()
{
return bfloat16_t::max();
return bfloat16_t(numeric_limits<__nv_bfloat16>::min());
}

static __host__ __device__ __forceinline__ bfloat16_t Lowest()
static __host__ __device__ __forceinline__ bfloat16_t lowest()
{
return bfloat16_t::lowest();
return bfloat16_t(numeric_limits<__nv_bfloat16>::lowest());
}
};
_LIBCUDACXX_END_NAMESPACE_STD

template <>
struct CUB_NS_QUALIFIER::NumericTraits<bfloat16_t>
: CUB_NS_QUALIFIER::BaseTraits<FLOATING_POINT, true, false, unsigned short, bfloat16_t>
{};

#ifdef __GNUC__
# pragma GCC diagnostic pop
#endif
struct CUB_NS_QUALIFIER::detail::unsigned_bits<bfloat16_t, void>
{
using type = unsigned short;
};
13 changes: 7 additions & 6 deletions c2h/include/c2h/catch2_test_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

#include <cuda/std/bit>
#include <cuda/std/cmath>
#include <cuda/std/limits>
#include <cuda/std/type_traits>
#include <cuda/std/utility>

Expand Down Expand Up @@ -283,10 +284,10 @@ inline std::size_t adjust_seed_count(std::size_t requested)
}
} // namespace detail

#define C2H_SEED(N) \
c2h::seed_t \
{ \
GENERATE_COPY(take( \
detail::adjust_seed_count(N), \
random(std::numeric_limits<unsigned long long int>::min(), std::numeric_limits<unsigned long long int>::max()))) \
#define C2H_SEED(N) \
c2h::seed_t \
{ \
GENERATE_COPY(take(detail::adjust_seed_count(N), \
random(::cuda::std::numeric_limits<unsigned long long int>::min(), \
::cuda::std::numeric_limits<unsigned long long int>::max()))) \
}
28 changes: 14 additions & 14 deletions c2h/include/c2h/custom_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@

#pragma once

#include <limits>
#include <cuda/std/limits>

#include <memory>
#include <ostream>

Expand Down Expand Up @@ -178,34 +179,33 @@ class accumulateable_t

} // namespace c2h

namespace std
{
_LIBCUDACXX_BEGIN_NAMESPACE_STD
template <template <typename> class... Policies>
class numeric_limits<c2h::custom_type_t<Policies...>>
class __numeric_limits_impl<c2h::custom_type_t<Policies...>, __numeric_limits_type::__other>
{
public:
static c2h::custom_type_t<Policies...> max()
static __host__ __device__ c2h::custom_type_t<Policies...> max()
{
c2h::custom_type_t<Policies...> val;
val.key = std::numeric_limits<std::size_t>::max();
val.val = std::numeric_limits<std::size_t>::max();
val.key = numeric_limits<std::size_t>::max();
val.val = numeric_limits<std::size_t>::max();
return val;
}

static c2h::custom_type_t<Policies...> min()
static __host__ __device__ c2h::custom_type_t<Policies...> min()
{
c2h::custom_type_t<Policies...> val;
val.key = std::numeric_limits<std::size_t>::min();
val.val = std::numeric_limits<std::size_t>::min();
val.key = numeric_limits<std::size_t>::min();
val.val = numeric_limits<std::size_t>::min();
return val;
}

static c2h::custom_type_t<Policies...> lowest()
static __host__ __device__ c2h::custom_type_t<Policies...> lowest()
{
c2h::custom_type_t<Policies...> val;
val.key = std::numeric_limits<std::size_t>::lowest();
val.val = std::numeric_limits<std::size_t>::lowest();
val.key = numeric_limits<std::size_t>::lowest();
val.val = numeric_limits<std::size_t>::lowest();
return val;
}
};
} // namespace std
_LIBCUDACXX_END_NAMESPACE_STD
45 changes: 5 additions & 40 deletions c2h/include/c2h/generators.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

#include <thrust/detail/config/device_system.h>

#include <limits>
#include <cuda/std/limits>

#include <c2h/custom_type.h>
#include <c2h/vector.h>
Expand All @@ -52,41 +52,6 @@ _CCCL_DIAG_PUSH
_CCCL_DIAG_POP
# endif // _CCCL_CUDACC_AT_LEAST(11, 8)
# endif // _CCCL_HAS_NVBF16

# if defined(__CUDA_FP8_TYPES_EXIST__)
namespace std
{
template <>
class numeric_limits<__nv_fp8_e4m3>
{
public:
static __nv_fp8_e4m3 max()
{
return cub::Traits<__nv_fp8_e4m3>::Max();
}

static __nv_fp8_e4m3 lowest()
{
return cub::Traits<__nv_fp8_e4m3>::Lowest();
}
};

template <>
class numeric_limits<__nv_fp8_e5m2>
{
public:
static __nv_fp8_e5m2 max()
{
return cub::Traits<__nv_fp8_e5m2>::Max();
}

static __nv_fp8_e5m2 lowest()
{
return cub::Traits<__nv_fp8_e5m2>::Lowest();
}
};
} // namespace std
# endif // defined(__CUDA_FP8_TYPES_EXIST__)
#endif // THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA

namespace c2h
Expand Down Expand Up @@ -157,8 +122,8 @@ void init_key_segments(const c2h::device_vector<OffsetT>& segment_offsets, KeyT*
template <template <typename> class... Ps>
void gen(seed_t seed,
c2h::device_vector<c2h::custom_type_t<Ps...>>& data,
c2h::custom_type_t<Ps...> min = std::numeric_limits<c2h::custom_type_t<Ps...>>::lowest(),
c2h::custom_type_t<Ps...> max = std::numeric_limits<c2h::custom_type_t<Ps...>>::max())
c2h::custom_type_t<Ps...> min = ::cuda::std::numeric_limits<c2h::custom_type_t<Ps...>>::lowest(),
c2h::custom_type_t<Ps...> max = ::cuda::std::numeric_limits<c2h::custom_type_t<Ps...>>::max())
{
detail::gen(seed,
reinterpret_cast<char*>(thrust::raw_pointer_cast(data.data())),
Expand All @@ -171,8 +136,8 @@ void gen(seed_t seed,
template <typename T>
void gen(seed_t seed,
c2h::device_vector<T>& data,
T min = std::numeric_limits<T>::lowest(),
T max = std::numeric_limits<T>::max());
T min = ::cuda::std::numeric_limits<T>::lowest(),
T max = ::cuda::std::numeric_limits<T>::max());

template <typename T>
void gen(modulo_t mod, c2h::device_vector<T>& data);
Expand Down
44 changes: 31 additions & 13 deletions c2h/include/c2h/half.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

#include <cub/util_type.cuh>

#include <cuda/std/limits>
#include <cuda/std/type_traits>

#include <cstdint>
Expand Down Expand Up @@ -306,6 +307,10 @@ struct half_t
}
};

#ifdef __GNUC__
# pragma GCC diagnostic pop
#endif

/******************************************************************************
* I/O stream overloads
******************************************************************************/
Expand All @@ -324,28 +329,41 @@ inline std::ostream& operator<<(std::ostream& out, const __half& x)
}

/******************************************************************************
* Traits overloads
* traits and limits
******************************************************************************/

_LIBCUDACXX_BEGIN_NAMESPACE_STD
template <>
struct CUB_NS_QUALIFIER::FpLimits<half_t>
struct __is_extended_floating_point<half_t> : true_type
{};
#ifndef _CCCL_NO_INLINE_VARIABLES
template <>
_CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<half_t> = true;
#endif // _CCCL_NO_INLINE_VARIABLES

template <>
class __numeric_limits_impl<half_t, __numeric_limits_type::__floating_point>
{
static __host__ __device__ __forceinline__ half_t Max()
public:
static __host__ __device__ __forceinline__ half_t max()
{
return half_t(numeric_limits<__half>::max());
}

static __host__ __device__ __forceinline__ half_t min()
{
return (half_t::max)();
return half_t(numeric_limits<__half>::min());
}

static __host__ __device__ __forceinline__ half_t Lowest()
static __host__ __device__ __forceinline__ half_t lowest()
{
return half_t::lowest();
return half_t(numeric_limits<__half>::lowest());
}
};
_LIBCUDACXX_END_NAMESPACE_STD

template <>
struct CUB_NS_QUALIFIER::NumericTraits<half_t>
: CUB_NS_QUALIFIER::BaseTraits<FLOATING_POINT, true, false, unsigned short, half_t>
{};

#ifdef __GNUC__
# pragma GCC diagnostic pop
#endif
struct CUB_NS_QUALIFIER::detail::unsigned_bits<half_t, void>
{
using type = unsigned short;
};
Loading

0 comments on commit 55c40a9

Please sign in to comment.