Skip to content

Commit

Permalink
Replace CUB_NS_QUALIFIER with cub:: where possible.
Browse files Browse the repository at this point in the history
This effectively reverts a large portion of
363c352 by relying on the namespace
alias added in the previous commit.
  • Loading branch information
alliepiper committed Jul 27, 2021
1 parent 59ca84f commit 231cd14
Show file tree
Hide file tree
Showing 21 changed files with 390 additions and 390 deletions.
36 changes: 18 additions & 18 deletions thrust/system/cuda/detail/adjacent_difference.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ namespace __adjacent_difference {

template <int _BLOCK_THREADS,
int _ITEMS_PER_THREAD = 1,
CUB_NS_QUALIFIER::BlockLoadAlgorithm _LOAD_ALGORITHM = CUB_NS_QUALIFIER::BLOCK_LOAD_DIRECT,
CUB_NS_QUALIFIER::CacheLoadModifier _LOAD_MODIFIER = CUB_NS_QUALIFIER::LOAD_DEFAULT,
CUB_NS_QUALIFIER::BlockStoreAlgorithm _STORE_ALGORITHM = CUB_NS_QUALIFIER::BLOCK_STORE_DIRECT>
cub::BlockLoadAlgorithm _LOAD_ALGORITHM = cub::BLOCK_LOAD_DIRECT,
cub::CacheLoadModifier _LOAD_MODIFIER = cub::LOAD_DEFAULT,
cub::BlockStoreAlgorithm _STORE_ALGORITHM = cub::BLOCK_STORE_DIRECT>
struct PtxPolicy
{
enum
Expand All @@ -77,9 +77,9 @@ namespace __adjacent_difference {
ITEMS_PER_TILE = BLOCK_THREADS * ITEMS_PER_THREAD
};

static const CUB_NS_QUALIFIER::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM;
static const CUB_NS_QUALIFIER::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER;
static const CUB_NS_QUALIFIER::BlockStoreAlgorithm STORE_ALGORITHM = _STORE_ALGORITHM;
static const cub::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM;
static const cub::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER;
static const cub::BlockStoreAlgorithm STORE_ALGORITHM = _STORE_ALGORITHM;
};

template<int INPUT_SIZE, int NOMINAL_4B_ITEMS_PER_THREAD>
Expand Down Expand Up @@ -115,9 +115,9 @@ namespace __adjacent_difference {
};
typedef PtxPolicy<128,
ITEMS_PER_THREAD,
CUB_NS_QUALIFIER::BLOCK_LOAD_WARP_TRANSPOSE,
CUB_NS_QUALIFIER::LOAD_DEFAULT,
CUB_NS_QUALIFIER::BLOCK_STORE_WARP_TRANSPOSE>
cub::BLOCK_LOAD_WARP_TRANSPOSE,
cub::LOAD_DEFAULT,
cub::BLOCK_STORE_WARP_TRANSPOSE>
type;
};
template <class T>
Expand All @@ -131,9 +131,9 @@ namespace __adjacent_difference {
};
typedef PtxPolicy<128,
ITEMS_PER_THREAD,
CUB_NS_QUALIFIER::BLOCK_LOAD_WARP_TRANSPOSE,
CUB_NS_QUALIFIER::LOAD_LDG,
CUB_NS_QUALIFIER::BLOCK_STORE_WARP_TRANSPOSE>
cub::BLOCK_LOAD_WARP_TRANSPOSE,
cub::LOAD_LDG,
cub::BLOCK_STORE_WARP_TRANSPOSE>
type;
};

Expand All @@ -159,11 +159,11 @@ namespace __adjacent_difference {
typedef typename core::BlockStore<PtxPlan, OutputIt, input_type>::type
BlockStore;

typedef CUB_NS_QUALIFIER::BlockAdjacentDifference<input_type,
PtxPlan::BLOCK_THREADS,
1,
1,
Arch::ver>
typedef cub::BlockAdjacentDifference<input_type,
PtxPlan::BLOCK_THREADS,
1,
1,
Arch::ver>
BlockAdjacentDifference;

union TempStorage
Expand Down Expand Up @@ -396,7 +396,7 @@ namespace __adjacent_difference {


Size tile_size = difference_plan.items_per_tile;
Size num_tiles = CUB_NS_QUALIFIER::DivideAndRoundUp(num_items, tile_size);
Size num_tiles = cub::DivideAndRoundUp(num_items, tile_size);

size_t tmp1 = num_tiles * sizeof(input_type);
size_t vshmem_size = core::vshmem_size(difference_plan.shared_memory_size,
Expand Down
20 changes: 10 additions & 10 deletions thrust/system/cuda/detail/async/exclusive_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,16 @@ async_exclusive_scan_n(execution_policy<DerivedPolicy>& policy,
InitialValueType init,
BinaryOp op)
{
using Dispatch32 = CUB_NS_QUALIFIER::DispatchScan<ForwardIt,
OutputIt,
BinaryOp,
InitialValueType,
thrust::detail::int32_t>;
using Dispatch64 = CUB_NS_QUALIFIER::DispatchScan<ForwardIt,
OutputIt,
BinaryOp,
InitialValueType,
thrust::detail::int64_t>;
using Dispatch32 = cub::DispatchScan<ForwardIt,
OutputIt,
BinaryOp,
InitialValueType,
thrust::detail::int32_t>;
using Dispatch64 = cub::DispatchScan<ForwardIt,
OutputIt,
BinaryOp,
InitialValueType,
thrust::detail::int64_t>;

auto const device_alloc = get_async_device_allocator(policy);
unique_eager_event ev;
Expand Down
12 changes: 6 additions & 6 deletions thrust/system/cuda/detail/async/inclusive_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ async_inclusive_scan_n(execution_policy<DerivedPolicy>& policy,
OutputIt out,
BinaryOp op)
{
using Dispatch32 = CUB_NS_QUALIFIER::DispatchScan<ForwardIt,
using Dispatch32 = cub::DispatchScan<ForwardIt,
OutputIt,
BinaryOp,
CUB_NS_QUALIFIER::NullType,
cub::NullType,
thrust::detail::int32_t>;
using Dispatch64 = CUB_NS_QUALIFIER::DispatchScan<ForwardIt,
using Dispatch64 = cub::DispatchScan<ForwardIt,
OutputIt,
BinaryOp,
CUB_NS_QUALIFIER::NullType,
cub::NullType,
thrust::detail::int64_t>;

auto const device_alloc = get_async_device_allocator(policy);
Expand All @@ -99,7 +99,7 @@ async_inclusive_scan_n(execution_policy<DerivedPolicy>& policy,
first,
out,
op,
CUB_NS_QUALIFIER::NullType{},
cub::NullType{},
n_fixed,
nullptr,
THRUST_DEBUG_SYNC_FLAG));
Expand Down Expand Up @@ -146,7 +146,7 @@ async_inclusive_scan_n(execution_policy<DerivedPolicy>& policy,
first,
out,
op,
CUB_NS_QUALIFIER::NullType{},
cub::NullType{},
n_fixed,
user_raw_stream,
THRUST_DEBUG_SYNC_FLAG));
Expand Down
8 changes: 4 additions & 4 deletions thrust/system/cuda/detail/async/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ auto async_reduce_n(

size_t tmp_size = 0;
thrust::cuda_cub::throw_on_error(
CUB_NS_QUALIFIER::DeviceReduce::Reduce(
cub::DeviceReduce::Reduce(
nullptr
, tmp_size
, first
Expand Down Expand Up @@ -162,7 +162,7 @@ auto async_reduce_n(
// Run reduction.

thrust::cuda_cub::throw_on_error(
CUB_NS_QUALIFIER::DeviceReduce::Reduce(
cub::DeviceReduce::Reduce(
tmp_ptr
, tmp_size
, first
Expand Down Expand Up @@ -233,7 +233,7 @@ auto async_reduce_into_n(

size_t tmp_size = 0;
thrust::cuda_cub::throw_on_error(
CUB_NS_QUALIFIER::DeviceReduce::Reduce(
cub::DeviceReduce::Reduce(
nullptr
, tmp_size
, first
Expand Down Expand Up @@ -297,7 +297,7 @@ auto async_reduce_into_n(
// Run reduction.

thrust::cuda_cub::throw_on_error(
CUB_NS_QUALIFIER::DeviceReduce::Reduce(
cub::DeviceReduce::Reduce(
tmp_ptr
, tmp_size
, first
Expand Down
10 changes: 5 additions & 5 deletions thrust/system/cuda/detail/async/sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,12 +293,12 @@ invoke_radix_sort(
cudaStream_t stream
, void* tmp_ptr
, std::size_t& tmp_size
, CUB_NS_QUALIFIER::DoubleBuffer<T>& keys
, cub::DoubleBuffer<T>& keys
, Size& n
, StrictWeakOrdering
)
{
return CUB_NS_QUALIFIER::DeviceRadixSort::SortKeys(
return cub::DeviceRadixSort::SortKeys(
tmp_ptr
, tmp_size
, keys
Expand All @@ -319,12 +319,12 @@ invoke_radix_sort(
cudaStream_t stream
, void* tmp_ptr
, std::size_t& tmp_size
, CUB_NS_QUALIFIER::DoubleBuffer<T>& keys
, cub::DoubleBuffer<T>& keys
, Size& n
, StrictWeakOrdering
)
{
return CUB_NS_QUALIFIER::DeviceRadixSort::SortKeysDescending(
return cub::DeviceRadixSort::SortKeysDescending(
tmp_ptr
, tmp_size
, keys
Expand Down Expand Up @@ -366,7 +366,7 @@ auto async_stable_sort_n(

unique_eager_event e;

CUB_NS_QUALIFIER::DoubleBuffer<T> keys(
cub::DoubleBuffer<T> keys(
raw_pointer_cast(&*first), nullptr
);

Expand Down
24 changes: 12 additions & 12 deletions thrust/system/cuda/detail/binary_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,9 @@ namespace __binary_search {

template <int _BLOCK_THREADS,
int _ITEMS_PER_THREAD = 1,
CUB_NS_QUALIFIER::BlockLoadAlgorithm _LOAD_ALGORITHM = CUB_NS_QUALIFIER::BLOCK_LOAD_DIRECT,
CUB_NS_QUALIFIER::CacheLoadModifier _LOAD_MODIFIER = CUB_NS_QUALIFIER::LOAD_LDG,
CUB_NS_QUALIFIER::BlockStoreAlgorithm _STORE_ALGORITHM = CUB_NS_QUALIFIER::BLOCK_STORE_DIRECT>
cub::BlockLoadAlgorithm _LOAD_ALGORITHM = cub::BLOCK_LOAD_DIRECT,
cub::CacheLoadModifier _LOAD_MODIFIER = cub::LOAD_LDG,
cub::BlockStoreAlgorithm _STORE_ALGORITHM = cub::BLOCK_STORE_DIRECT>
struct PtxPolicy
{
enum
Expand All @@ -199,9 +199,9 @@ namespace __binary_search {
ITEMS_PER_TILE = _BLOCK_THREADS * _ITEMS_PER_THREAD
};

static const CUB_NS_QUALIFIER::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM;
static const CUB_NS_QUALIFIER::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER;
static const CUB_NS_QUALIFIER::BlockStoreAlgorithm STORE_ALGORITHM = _STORE_ALGORITHM;
static const cub::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM;
static const cub::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER;
static const cub::BlockStoreAlgorithm STORE_ALGORITHM = _STORE_ALGORITHM;
}; // PtxPolicy

template <class Arch, class T>
Expand All @@ -218,9 +218,9 @@ namespace __binary_search {

typedef PtxPolicy<128,
ITEMS_PER_THREAD,
CUB_NS_QUALIFIER::BLOCK_LOAD_WARP_TRANSPOSE,
CUB_NS_QUALIFIER::LOAD_LDG,
CUB_NS_QUALIFIER::BLOCK_STORE_TRANSPOSE>
cub::BLOCK_LOAD_WARP_TRANSPOSE,
cub::LOAD_LDG,
cub::BLOCK_STORE_TRANSPOSE>
type;
};

Expand All @@ -237,9 +237,9 @@ namespace __binary_search {

typedef PtxPolicy<128,
ITEMS_PER_THREAD,
CUB_NS_QUALIFIER::BLOCK_LOAD_WARP_TRANSPOSE,
CUB_NS_QUALIFIER::LOAD_LDG,
CUB_NS_QUALIFIER::BLOCK_STORE_WARP_TRANSPOSE>
cub::BLOCK_LOAD_WARP_TRANSPOSE,
cub::LOAD_LDG,
cub::BLOCK_STORE_WARP_TRANSPOSE>
type;
};

Expand Down
44 changes: 22 additions & 22 deletions thrust/system/cuda/detail/copy_if.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ namespace __copy_if {

template <int _BLOCK_THREADS,
int _ITEMS_PER_THREAD = 1,
CUB_NS_QUALIFIER::BlockLoadAlgorithm _LOAD_ALGORITHM = CUB_NS_QUALIFIER::BLOCK_LOAD_DIRECT,
CUB_NS_QUALIFIER::CacheLoadModifier _LOAD_MODIFIER = CUB_NS_QUALIFIER::LOAD_LDG,
CUB_NS_QUALIFIER::BlockScanAlgorithm _SCAN_ALGORITHM = CUB_NS_QUALIFIER::BLOCK_SCAN_WARP_SCANS>
cub::BlockLoadAlgorithm _LOAD_ALGORITHM = cub::BLOCK_LOAD_DIRECT,
cub::CacheLoadModifier _LOAD_MODIFIER = cub::LOAD_LDG,
cub::BlockScanAlgorithm _SCAN_ALGORITHM = cub::BLOCK_SCAN_WARP_SCANS>
struct PtxPolicy
{
enum
Expand All @@ -83,9 +83,9 @@ namespace __copy_if {
ITEMS_PER_THREAD = _ITEMS_PER_THREAD,
ITEMS_PER_TILE = _BLOCK_THREADS * _ITEMS_PER_THREAD,
};
static const CUB_NS_QUALIFIER::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM;
static const CUB_NS_QUALIFIER::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER;
static const CUB_NS_QUALIFIER::BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM;
static const cub::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM;
static const cub::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER;
static const cub::BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM;
}; // struct PtxPolicy

template<class, class>
Expand All @@ -104,9 +104,9 @@ namespace __copy_if {

typedef PtxPolicy<128,
ITEMS_PER_THREAD,
CUB_NS_QUALIFIER::BLOCK_LOAD_WARP_TRANSPOSE,
CUB_NS_QUALIFIER::LOAD_LDG,
CUB_NS_QUALIFIER::BLOCK_SCAN_WARP_SCANS>
cub::BLOCK_LOAD_WARP_TRANSPOSE,
cub::LOAD_LDG,
cub::BLOCK_SCAN_WARP_SCANS>
type;
}; // Tuning<350>

Expand All @@ -124,9 +124,9 @@ namespace __copy_if {

typedef PtxPolicy<128,
ITEMS_PER_THREAD,
CUB_NS_QUALIFIER::BLOCK_LOAD_WARP_TRANSPOSE,
CUB_NS_QUALIFIER::LOAD_LDG,
CUB_NS_QUALIFIER::BLOCK_SCAN_WARP_SCANS>
cub::BLOCK_LOAD_WARP_TRANSPOSE,
cub::LOAD_LDG,
cub::BLOCK_SCAN_WARP_SCANS>
type;
}; // Tuning<350>

Expand All @@ -143,9 +143,9 @@ namespace __copy_if {

typedef PtxPolicy<128,
ITEMS_PER_THREAD,
CUB_NS_QUALIFIER::BLOCK_LOAD_WARP_TRANSPOSE,
CUB_NS_QUALIFIER::LOAD_DEFAULT,
CUB_NS_QUALIFIER::BLOCK_SCAN_WARP_SCANS>
cub::BLOCK_LOAD_WARP_TRANSPOSE,
cub::LOAD_DEFAULT,
cub::BLOCK_SCAN_WARP_SCANS>
type;
}; // Tuning<300>

Expand All @@ -162,7 +162,7 @@ namespace __copy_if {
typedef typename iterator_traits<ItemsIt>::value_type item_type;
typedef typename iterator_traits<StencilIt>::value_type stencil_type;

typedef CUB_NS_QUALIFIER::ScanTileState<Size> ScanTileState;
typedef cub::ScanTileState<Size> ScanTileState;

template <class Arch>
struct PtxPlan : Tuning<Arch, item_type>::type
Expand All @@ -175,13 +175,13 @@ namespace __copy_if {
typedef typename core::BlockLoad<PtxPlan, ItemsLoadIt>::type BlockLoadItems;
typedef typename core::BlockLoad<PtxPlan, StencilLoadIt>::type BlockLoadStencil;

typedef CUB_NS_QUALIFIER::TilePrefixCallbackOp<Size,
CUB_NS_QUALIFIER::Sum,
typedef cub::TilePrefixCallbackOp<Size,
cub::Sum,
ScanTileState,
Arch::ver>
TilePrefixCallback;

typedef CUB_NS_QUALIFIER::BlockScan<Size,
typedef cub::BlockScan<Size,
PtxPlan::BLOCK_THREADS,
PtxPlan::SCAN_ALGORITHM,
1,
Expand Down Expand Up @@ -445,7 +445,7 @@ namespace __copy_if {
{
TilePrefixCallback prefix_cb(tile_state,
storage.scan_storage.prefix,
CUB_NS_QUALIFIER::Sum(),
cub::Sum(),
tile_idx);
BlockScan(storage.scan_storage.scan)
.ExclusiveSum(selection_flags,
Expand Down Expand Up @@ -638,7 +638,7 @@ namespace __copy_if {
typename get_plan<copy_if_agent>::type copy_if_plan = copy_if_agent::get_plan(stream);

int tile_size = copy_if_plan.items_per_tile;
size_t num_tiles = CUB_NS_QUALIFIER::DivideAndRoundUp(num_items, tile_size);
size_t num_tiles = cub::DivideAndRoundUp(num_items, tile_size);

size_t vshmem_size = core::vshmem_size(copy_if_plan.shared_memory_size,
num_tiles);
Expand All @@ -653,7 +653,7 @@ namespace __copy_if {


void* allocations[2] = {NULL, NULL};
status = CUB_NS_QUALIFIER::AliasTemporaries(d_temp_storage,
status = cub::AliasTemporaries(d_temp_storage,
temp_storage_bytes,
allocations,
allocation_sizes);
Expand Down
2 changes: 1 addition & 1 deletion thrust/system/cuda/detail/core/agent_launcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ namespace core {
max_blocks_per_sm_impl(K k, int block_threads)
{
int occ;
cudaError_t status = CUB_NS_QUALIFIER::MaxSmOccupancy(occ, k, block_threads);
cudaError_t status = cub::MaxSmOccupancy(occ, k, block_threads);
return cuda_optional<int>(status == cudaSuccess ? occ : -1, status);
}

Expand Down
Loading

0 comments on commit 231cd14

Please sign in to comment.