Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Merge pull request #466 from senior-zero/enh-main/github/adj_diff_size_t
Browse files Browse the repository at this point in the history
Alternative approach to 64-bit indexing in adjacent difference
  • Loading branch information
gevtushenko authored May 5, 2022
2 parents 0b4e9eb + 91d7d6b commit c7aca46
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 112 deletions.
62 changes: 62 additions & 0 deletions cub/detail/choose_offset.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/******************************************************************************
* Copyright (c) 2011-2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#pragma once

#include <cub/config.cuh>

#include <cstdint>
#include <type_traits>

CUB_NAMESPACE_BEGIN

namespace detail
{

/**
* ChooseOffsetT checks NumItemsT, the type of the num_items parameter, and
* selects the offset type based on it.
*/
template <typename NumItemsT>
struct ChooseOffsetT
{
// NumItemsT must be an integral type (but not bool).
static_assert(
std::is_integral<NumItemsT>::value &&
!std::is_same<typename std::remove_cv<NumItemsT>::type, bool>::value,
"NumItemsT must be an integral type, but not bool");

// Unsigned integer type for global offsets.
using Type = typename std::conditional<sizeof(NumItemsT) <= 4,
std::uint32_t,
unsigned long long>::type;
};

} // namespace detail

CUB_NAMESPACE_END

94 changes: 41 additions & 53 deletions cub/device/device_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@

#pragma once

#include "../config.cuh"
#include "../util_namespace.cuh"
#include "dispatch/dispatch_adjacent_difference.cuh"
#include <cub/config.cuh>
#include <cub/detail/choose_offset.cuh>
#include <cub/device/dispatch/dispatch_adjacent_difference.cuh>
#include <cub/util_namespace.cuh>

#include <thrust/detail/integer_traits.h>
#include <thrust/detail/cstdint.h>
Expand Down Expand Up @@ -98,9 +99,9 @@ CUB_NAMESPACE_BEGIN
struct DeviceAdjacentDifference
{
private:

template <bool in_place,
bool read_left,
typename NumItemsT,
typename InputIteratorT,
typename OutputIteratorT,
typename DifferenceOpT>
Expand All @@ -109,53 +110,28 @@ private:
std::size_t &temp_storage_bytes,
InputIteratorT d_input,
OutputIteratorT d_output,
std::size_t num_items,
NumItemsT num_items,
DifferenceOpT difference_op,
cudaStream_t stream,
bool debug_synchronous)
{
const auto uint64_threshold = static_cast<std::size_t>(
THRUST_NS_QUALIFIER::detail::integer_traits<
THRUST_NS_QUALIFIER::detail::int32_t>::const_max);

if (num_items <= uint64_threshold)
{
using OffsetT = std::uint32_t;
using DispatchT = DispatchAdjacentDifference<InputIteratorT,
OutputIteratorT,
DifferenceOpT,
OffsetT,
in_place,
read_left>;
using OffsetT = typename detail::ChooseOffsetT<NumItemsT>::Type;

return DispatchT::Dispatch(d_temp_storage,
temp_storage_bytes,
d_input,
d_output,
static_cast<OffsetT>(num_items),
difference_op,
stream,
debug_synchronous);
}
else
{
using OffsetT = std::uint64_t;
using DispatchT = DispatchAdjacentDifference<InputIteratorT,
OutputIteratorT,
DifferenceOpT,
OffsetT,
in_place,
read_left>;
using DispatchT = DispatchAdjacentDifference<InputIteratorT,
OutputIteratorT,
DifferenceOpT,
OffsetT,
in_place,
read_left>;

return DispatchT::Dispatch(d_temp_storage,
temp_storage_bytes,
d_input,
d_output,
static_cast<OffsetT>(num_items),
difference_op,
stream,
debug_synchronous);
}
return DispatchT::Dispatch(d_temp_storage,
temp_storage_bytes,
d_input,
d_output,
static_cast<OffsetT>(num_items),
difference_op,
stream,
debug_synchronous);
}

public:
Expand Down Expand Up @@ -234,6 +210,8 @@ public:
* Its `result_type` is convertible to a type in `OutputIteratorT`'s set of
* `value_types`.
*
* @tparam NumItemsT **[inferred]** Type of num_items
*
* @param[in] d_temp_storage
* Device-accessible allocation of temporary storage. When `nullptr`, the
* required allocation size is written to `temp_storage_bytes` and no work
Expand Down Expand Up @@ -265,13 +243,14 @@ public:
*/
template <typename InputIteratorT,
typename OutputIteratorT,
typename DifferenceOpT = cub::Difference>
typename DifferenceOpT = cub::Difference,
typename NumItemsT = std::uint32_t>
static CUB_RUNTIME_FUNCTION cudaError_t
SubtractLeftCopy(void *d_temp_storage,
std::size_t &temp_storage_bytes,
InputIteratorT d_input,
OutputIteratorT d_output,
std::size_t num_items,
NumItemsT num_items,
DifferenceOpT difference_op = {},
cudaStream_t stream = 0,
bool debug_synchronous = false)
Expand Down Expand Up @@ -353,6 +332,8 @@ public:
* Its `result_type` is convertible to a type in `RandomAccessIteratorT`'s
* set of `value_types`.
*
* @tparam NumItemsT **[inferred]** Type of num_items
*
* @param[in] d_temp_storage
* Device-accessible allocation of temporary storage. When `nullptr`, the
* required allocation size is written to `temp_storage_bytes` and no work
Expand Down Expand Up @@ -380,12 +361,13 @@ public:
* be printed to the console. Default is `false`.
*/
template <typename RandomAccessIteratorT,
typename DifferenceOpT = cub::Difference>
typename DifferenceOpT = cub::Difference,
typename NumItemsT = std::uint32_t>
static CUB_RUNTIME_FUNCTION cudaError_t
SubtractLeft(void *d_temp_storage,
std::size_t &temp_storage_bytes,
RandomAccessIteratorT d_input,
std::size_t num_items,
NumItemsT num_items,
DifferenceOpT difference_op = {},
cudaStream_t stream = 0,
bool debug_synchronous = false)
Expand Down Expand Up @@ -477,6 +459,8 @@ public:
* Its `result_type` is convertible to a type in `RandomAccessIteratorT`'s
* set of `value_types`.
*
* @tparam NumItemsT **[inferred]** Type of num_items
*
* @param[in] d_temp_storage
* Device-accessible allocation of temporary storage. When `nullptr`, the
* required allocation size is written to `temp_storage_bytes` and no work
Expand Down Expand Up @@ -508,13 +492,14 @@ public:
*/
template <typename InputIteratorT,
typename OutputIteratorT,
typename DifferenceOpT = cub::Difference>
typename DifferenceOpT = cub::Difference,
typename NumItemsT = std::uint32_t>
static CUB_RUNTIME_FUNCTION cudaError_t
SubtractRightCopy(void *d_temp_storage,
std::size_t &temp_storage_bytes,
InputIteratorT d_input,
OutputIteratorT d_output,
std::size_t num_items,
NumItemsT num_items,
DifferenceOpT difference_op = {},
cudaStream_t stream = 0,
bool debug_synchronous = false)
Expand Down Expand Up @@ -586,6 +571,8 @@ public:
* Its `result_type` is convertible to a type in `RandomAccessIteratorT`'s
* set of `value_types`.
*
* @tparam NumItemsT **[inferred]** Type of num_items
*
* @param[in] d_temp_storage
* Device-accessible allocation of temporary storage. When `nullptr`, the
* required allocation size is written to `temp_storage_bytes` and no work
Expand Down Expand Up @@ -613,12 +600,13 @@ public:
* printed to the console. Default is `false`.
*/
template <typename RandomAccessIteratorT,
typename DifferenceOpT = cub::Difference>
typename DifferenceOpT = cub::Difference,
typename NumItemsT = std::uint32_t>
static CUB_RUNTIME_FUNCTION cudaError_t
SubtractRight(void *d_temp_storage,
std::size_t &temp_storage_bytes,
RandomAccessIteratorT d_input,
std::size_t num_items,
NumItemsT num_items,
DifferenceOpT difference_op = {},
cudaStream_t stream = 0,
bool debug_synchronous = false)
Expand Down
26 changes: 3 additions & 23 deletions cub/device/device_radix_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,32 +34,12 @@

#pragma once

#include <stdio.h>
#include <iterator>
#include <type_traits>

#include "dispatch/dispatch_radix_sort.cuh"
#include "../config.cuh"
#include <cub/config.cuh>
#include <cub/detail/choose_offset.cuh>
#include <cub/device/dispatch/dispatch_radix_sort.cuh>

CUB_NAMESPACE_BEGIN

namespace detail {
/** ChooseOffsetT checks NumItemsT, the type of the num_items parameter, and
* selects the offset type based on it. */
template <typename NumItemsT>
struct ChooseOffsetT
{
// NumItemsT must be an integral type (but not bool).
static_assert(std::is_integral<NumItemsT>::value &&
!std::is_same<typename std::remove_cv<NumItemsT>::type, bool>::value,
"NumItemsT must be an integral type, but not bool");

// Unsigned integer type for global offsets.
using Type = typename std::conditional<sizeof(NumItemsT) <= 4, uint32_t, unsigned long long>::type;
};

} // namespace detail

/**
* \brief DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. ![](sorting_logo.png)
* \ingroup SingleModule
Expand Down
Loading

0 comments on commit c7aca46

Please sign in to comment.