Skip to content

Commit

Permalink
Merge pull request NVIDIA#1492 from allisonvacanti/cub_namespace_conv…
Browse files Browse the repository at this point in the history
…enience

Pull the fully qualified cub:: namespace into thrust::
  • Loading branch information
alliepiper authored Jul 28, 2021
2 parents 368266e + 231cd14 commit 29d46bf
Show file tree
Hide file tree
Showing 22 changed files with 423 additions and 391 deletions.
34 changes: 33 additions & 1 deletion thrust/system/cuda/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@

#include <thrust/detail/config.h>

// We don't directly include <cub/version.cuh> since it doesn't exist in
// older releases. This header will always pull in version info:
#include <cub/util_namespace.cuh>

#if defined(__CUDACC__) || defined(__NVCOMPILER_CUDA__)
# if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__>= 350 && defined(__CUDACC_RDC__))
# define __THRUST_HAS_CUDART__ 1
Expand Down Expand Up @@ -69,9 +73,37 @@
#endif

#ifndef THRUST_IGNORE_CUB_VERSION_CHECK

#include <thrust/version.h>
#include <cub/util_namespace.cuh> // This includes <cub/version.cuh> in newer releases.
#if THRUST_VERSION != CUB_VERSION
#error The version of CUB in your include path is not compatible with this release of Thrust. CUB is now included in the CUDA Toolkit, so you no longer need to use your own checkout of CUB. Define THRUST_IGNORE_CUB_VERSION_CHECK to ignore this.
#endif

// Make sure the CUB namespace has been declared using the modern macros:
CUB_NAMESPACE_BEGIN
CUB_NAMESPACE_END

#else // THRUST_IGNORE_CUB_VERSION_CHECK

// Make sure the CUB namespace has been declared. Use the old macros for compat
// with older CUB:
CUB_NS_PREFIX
namespace cub {}
CUB_NS_POSTFIX

// Older versions of CUB do not define this. Set it to a reasonable default if
// not provided.
#ifndef CUB_NS_QUALIFIER
#define CUB_NS_QUALIFIER ::cub
#endif

#endif // THRUST_IGNORE_CUB_VERSION_CHECK

// Pull the fully qualified cub:: namespace into the thrust:: namespace so we
// don't have to use CUB_NS_QUALIFIER as long as we're in thrust::.
THRUST_NAMESPACE_BEGIN
namespace cub
{
using namespace CUB_NS_QUALIFIER;
}
THRUST_NAMESPACE_END
36 changes: 18 additions & 18 deletions thrust/system/cuda/detail/adjacent_difference.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ namespace __adjacent_difference {

template <int _BLOCK_THREADS,
int _ITEMS_PER_THREAD = 1,
CUB_NS_QUALIFIER::BlockLoadAlgorithm _LOAD_ALGORITHM = CUB_NS_QUALIFIER::BLOCK_LOAD_DIRECT,
CUB_NS_QUALIFIER::CacheLoadModifier _LOAD_MODIFIER = CUB_NS_QUALIFIER::LOAD_DEFAULT,
CUB_NS_QUALIFIER::BlockStoreAlgorithm _STORE_ALGORITHM = CUB_NS_QUALIFIER::BLOCK_STORE_DIRECT>
cub::BlockLoadAlgorithm _LOAD_ALGORITHM = cub::BLOCK_LOAD_DIRECT,
cub::CacheLoadModifier _LOAD_MODIFIER = cub::LOAD_DEFAULT,
cub::BlockStoreAlgorithm _STORE_ALGORITHM = cub::BLOCK_STORE_DIRECT>
struct PtxPolicy
{
enum
Expand All @@ -77,9 +77,9 @@ namespace __adjacent_difference {
ITEMS_PER_TILE = BLOCK_THREADS * ITEMS_PER_THREAD
};

static const CUB_NS_QUALIFIER::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM;
static const CUB_NS_QUALIFIER::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER;
static const CUB_NS_QUALIFIER::BlockStoreAlgorithm STORE_ALGORITHM = _STORE_ALGORITHM;
static const cub::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM;
static const cub::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER;
static const cub::BlockStoreAlgorithm STORE_ALGORITHM = _STORE_ALGORITHM;
};

template<int INPUT_SIZE, int NOMINAL_4B_ITEMS_PER_THREAD>
Expand Down Expand Up @@ -115,9 +115,9 @@ namespace __adjacent_difference {
};
typedef PtxPolicy<128,
ITEMS_PER_THREAD,
CUB_NS_QUALIFIER::BLOCK_LOAD_WARP_TRANSPOSE,
CUB_NS_QUALIFIER::LOAD_DEFAULT,
CUB_NS_QUALIFIER::BLOCK_STORE_WARP_TRANSPOSE>
cub::BLOCK_LOAD_WARP_TRANSPOSE,
cub::LOAD_DEFAULT,
cub::BLOCK_STORE_WARP_TRANSPOSE>
type;
};
template <class T>
Expand All @@ -131,9 +131,9 @@ namespace __adjacent_difference {
};
typedef PtxPolicy<128,
ITEMS_PER_THREAD,
CUB_NS_QUALIFIER::BLOCK_LOAD_WARP_TRANSPOSE,
CUB_NS_QUALIFIER::LOAD_LDG,
CUB_NS_QUALIFIER::BLOCK_STORE_WARP_TRANSPOSE>
cub::BLOCK_LOAD_WARP_TRANSPOSE,
cub::LOAD_LDG,
cub::BLOCK_STORE_WARP_TRANSPOSE>
type;
};

Expand All @@ -159,11 +159,11 @@ namespace __adjacent_difference {
typedef typename core::BlockStore<PtxPlan, OutputIt, input_type>::type
BlockStore;

typedef CUB_NS_QUALIFIER::BlockAdjacentDifference<input_type,
PtxPlan::BLOCK_THREADS,
1,
1,
Arch::ver>
typedef cub::BlockAdjacentDifference<input_type,
PtxPlan::BLOCK_THREADS,
1,
1,
Arch::ver>
BlockAdjacentDifference;

union TempStorage
Expand Down Expand Up @@ -396,7 +396,7 @@ namespace __adjacent_difference {


Size tile_size = difference_plan.items_per_tile;
Size num_tiles = CUB_NS_QUALIFIER::DivideAndRoundUp(num_items, tile_size);
Size num_tiles = cub::DivideAndRoundUp(num_items, tile_size);

size_t tmp1 = num_tiles * sizeof(input_type);
size_t vshmem_size = core::vshmem_size(difference_plan.shared_memory_size,
Expand Down
20 changes: 10 additions & 10 deletions thrust/system/cuda/detail/async/exclusive_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,16 @@ async_exclusive_scan_n(execution_policy<DerivedPolicy>& policy,
InitialValueType init,
BinaryOp op)
{
using Dispatch32 = CUB_NS_QUALIFIER::DispatchScan<ForwardIt,
OutputIt,
BinaryOp,
InitialValueType,
thrust::detail::int32_t>;
using Dispatch64 = CUB_NS_QUALIFIER::DispatchScan<ForwardIt,
OutputIt,
BinaryOp,
InitialValueType,
thrust::detail::int64_t>;
using Dispatch32 = cub::DispatchScan<ForwardIt,
OutputIt,
BinaryOp,
InitialValueType,
thrust::detail::int32_t>;
using Dispatch64 = cub::DispatchScan<ForwardIt,
OutputIt,
BinaryOp,
InitialValueType,
thrust::detail::int64_t>;

auto const device_alloc = get_async_device_allocator(policy);
unique_eager_event ev;
Expand Down
12 changes: 6 additions & 6 deletions thrust/system/cuda/detail/async/inclusive_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ async_inclusive_scan_n(execution_policy<DerivedPolicy>& policy,
OutputIt out,
BinaryOp op)
{
using Dispatch32 = CUB_NS_QUALIFIER::DispatchScan<ForwardIt,
using Dispatch32 = cub::DispatchScan<ForwardIt,
OutputIt,
BinaryOp,
CUB_NS_QUALIFIER::NullType,
cub::NullType,
thrust::detail::int32_t>;
using Dispatch64 = CUB_NS_QUALIFIER::DispatchScan<ForwardIt,
using Dispatch64 = cub::DispatchScan<ForwardIt,
OutputIt,
BinaryOp,
CUB_NS_QUALIFIER::NullType,
cub::NullType,
thrust::detail::int64_t>;

auto const device_alloc = get_async_device_allocator(policy);
Expand All @@ -99,7 +99,7 @@ async_inclusive_scan_n(execution_policy<DerivedPolicy>& policy,
first,
out,
op,
CUB_NS_QUALIFIER::NullType{},
cub::NullType{},
n_fixed,
nullptr,
THRUST_DEBUG_SYNC_FLAG));
Expand Down Expand Up @@ -146,7 +146,7 @@ async_inclusive_scan_n(execution_policy<DerivedPolicy>& policy,
first,
out,
op,
CUB_NS_QUALIFIER::NullType{},
cub::NullType{},
n_fixed,
user_raw_stream,
THRUST_DEBUG_SYNC_FLAG));
Expand Down
8 changes: 4 additions & 4 deletions thrust/system/cuda/detail/async/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ auto async_reduce_n(

size_t tmp_size = 0;
thrust::cuda_cub::throw_on_error(
CUB_NS_QUALIFIER::DeviceReduce::Reduce(
cub::DeviceReduce::Reduce(
nullptr
, tmp_size
, first
Expand Down Expand Up @@ -162,7 +162,7 @@ auto async_reduce_n(
// Run reduction.

thrust::cuda_cub::throw_on_error(
CUB_NS_QUALIFIER::DeviceReduce::Reduce(
cub::DeviceReduce::Reduce(
tmp_ptr
, tmp_size
, first
Expand Down Expand Up @@ -233,7 +233,7 @@ auto async_reduce_into_n(

size_t tmp_size = 0;
thrust::cuda_cub::throw_on_error(
CUB_NS_QUALIFIER::DeviceReduce::Reduce(
cub::DeviceReduce::Reduce(
nullptr
, tmp_size
, first
Expand Down Expand Up @@ -297,7 +297,7 @@ auto async_reduce_into_n(
// Run reduction.

thrust::cuda_cub::throw_on_error(
CUB_NS_QUALIFIER::DeviceReduce::Reduce(
cub::DeviceReduce::Reduce(
tmp_ptr
, tmp_size
, first
Expand Down
10 changes: 5 additions & 5 deletions thrust/system/cuda/detail/async/sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,12 +293,12 @@ invoke_radix_sort(
cudaStream_t stream
, void* tmp_ptr
, std::size_t& tmp_size
, CUB_NS_QUALIFIER::DoubleBuffer<T>& keys
, cub::DoubleBuffer<T>& keys
, Size& n
, StrictWeakOrdering
)
{
return CUB_NS_QUALIFIER::DeviceRadixSort::SortKeys(
return cub::DeviceRadixSort::SortKeys(
tmp_ptr
, tmp_size
, keys
Expand All @@ -319,12 +319,12 @@ invoke_radix_sort(
cudaStream_t stream
, void* tmp_ptr
, std::size_t& tmp_size
, CUB_NS_QUALIFIER::DoubleBuffer<T>& keys
, cub::DoubleBuffer<T>& keys
, Size& n
, StrictWeakOrdering
)
{
return CUB_NS_QUALIFIER::DeviceRadixSort::SortKeysDescending(
return cub::DeviceRadixSort::SortKeysDescending(
tmp_ptr
, tmp_size
, keys
Expand Down Expand Up @@ -366,7 +366,7 @@ auto async_stable_sort_n(

unique_eager_event e;

CUB_NS_QUALIFIER::DoubleBuffer<T> keys(
cub::DoubleBuffer<T> keys(
raw_pointer_cast(&*first), nullptr
);

Expand Down
24 changes: 12 additions & 12 deletions thrust/system/cuda/detail/binary_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,9 @@ namespace __binary_search {

template <int _BLOCK_THREADS,
int _ITEMS_PER_THREAD = 1,
CUB_NS_QUALIFIER::BlockLoadAlgorithm _LOAD_ALGORITHM = CUB_NS_QUALIFIER::BLOCK_LOAD_DIRECT,
CUB_NS_QUALIFIER::CacheLoadModifier _LOAD_MODIFIER = CUB_NS_QUALIFIER::LOAD_LDG,
CUB_NS_QUALIFIER::BlockStoreAlgorithm _STORE_ALGORITHM = CUB_NS_QUALIFIER::BLOCK_STORE_DIRECT>
cub::BlockLoadAlgorithm _LOAD_ALGORITHM = cub::BLOCK_LOAD_DIRECT,
cub::CacheLoadModifier _LOAD_MODIFIER = cub::LOAD_LDG,
cub::BlockStoreAlgorithm _STORE_ALGORITHM = cub::BLOCK_STORE_DIRECT>
struct PtxPolicy
{
enum
Expand All @@ -199,9 +199,9 @@ namespace __binary_search {
ITEMS_PER_TILE = _BLOCK_THREADS * _ITEMS_PER_THREAD
};

static const CUB_NS_QUALIFIER::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM;
static const CUB_NS_QUALIFIER::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER;
static const CUB_NS_QUALIFIER::BlockStoreAlgorithm STORE_ALGORITHM = _STORE_ALGORITHM;
static const cub::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM;
static const cub::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER;
static const cub::BlockStoreAlgorithm STORE_ALGORITHM = _STORE_ALGORITHM;
}; // PtxPolicy

template <class Arch, class T>
Expand All @@ -218,9 +218,9 @@ namespace __binary_search {

typedef PtxPolicy<128,
ITEMS_PER_THREAD,
CUB_NS_QUALIFIER::BLOCK_LOAD_WARP_TRANSPOSE,
CUB_NS_QUALIFIER::LOAD_LDG,
CUB_NS_QUALIFIER::BLOCK_STORE_TRANSPOSE>
cub::BLOCK_LOAD_WARP_TRANSPOSE,
cub::LOAD_LDG,
cub::BLOCK_STORE_TRANSPOSE>
type;
};

Expand All @@ -237,9 +237,9 @@ namespace __binary_search {

typedef PtxPolicy<128,
ITEMS_PER_THREAD,
CUB_NS_QUALIFIER::BLOCK_LOAD_WARP_TRANSPOSE,
CUB_NS_QUALIFIER::LOAD_LDG,
CUB_NS_QUALIFIER::BLOCK_STORE_WARP_TRANSPOSE>
cub::BLOCK_LOAD_WARP_TRANSPOSE,
cub::LOAD_LDG,
cub::BLOCK_STORE_WARP_TRANSPOSE>
type;
};

Expand Down
Loading

0 comments on commit 29d46bf

Please sign in to comment.