Skip to content

Commit

Permalink
Add groupby product support (#7763)
Browse files Browse the repository at this point in the history
closes #4882

Added groupby.product support in both hash and sort groupby.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Jake Hemstad (https://github.com/jrhemstad)
  - https://github.com/brandon-b-miller

URL: #7763
  • Loading branch information
karthikeyann authored Apr 21, 2021
1 parent f11bcd7 commit c0cf5e1
Show file tree
Hide file tree
Showing 13 changed files with 436 additions and 54 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ add_library(cudf
src/groupby/sort/group_min.cu
src/groupby/sort/group_nth_element.cu
src/groupby/sort/group_nunique.cu
src/groupby/sort/group_product.cu
src/groupby/sort/group_quantiles.cu
src/groupby/sort/group_std.cu
src/groupby/sort/group_sum.cu
Expand Down
38 changes: 24 additions & 14 deletions cpp/include/cudf/detail/aggregation/aggregation.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -314,29 +314,18 @@ struct update_target_element<dictionary32, aggregation::SUM, target_has_nulls, s
}
};

// This code will segfault in nvcc/ptxas 10.2 only
// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317
// Enabling only for 2 types does not segfault. Using for unit tests.
#if (__CUDACC_VER_MAJOR__ == 10) and (__CUDACC_VER_MINOR__ == 2)
template <typename T>
constexpr bool is_SOS_supported()
{
return std::is_floating_point<T>::value;
}
#else
template <typename T>
constexpr bool is_SOS_supported()
constexpr bool is_product_supported()
{
return is_numeric<T>();
}
#endif

template <typename Source, bool target_has_nulls, bool source_has_nulls>
struct update_target_element<Source,
aggregation::SUM_OF_SQUARES,
target_has_nulls,
source_has_nulls,
std::enable_if_t<is_SOS_supported<Source>()>> {
std::enable_if_t<is_product_supported<Source>()>> {
__device__ void operator()(mutable_column_device_view target,
size_type target_index,
column_device_view source,
Expand All @@ -351,6 +340,26 @@ struct update_target_element<Source,
}
};

template <typename Source, bool target_has_nulls, bool source_has_nulls>
struct update_target_element<Source,
aggregation::PRODUCT,
target_has_nulls,
source_has_nulls,
std::enable_if_t<is_product_supported<Source>()>> {
__device__ void operator()(mutable_column_device_view target,
size_type target_index,
column_device_view source,
size_type source_index) const noexcept
{
if (source_has_nulls and source.is_null(source_index)) { return; }

using Target = target_type_t<Source, aggregation::PRODUCT>;
atomicMul(&target.element<Target>(target_index),
static_cast<Target>(source.element<Source>(source_index)));
if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); }
}
};

template <typename Source, bool target_has_nulls, bool source_has_nulls>
struct update_target_element<
Source,
Expand Down Expand Up @@ -559,7 +568,8 @@ struct identity_initializer {
k == aggregation::COUNT_VALID or k == aggregation::COUNT_ALL or
k == aggregation::ARGMAX or k == aggregation::ARGMIN or
k == aggregation::SUM_OF_SQUARES or k == aggregation::STD or
k == aggregation::VARIANCE);
k == aggregation::VARIANCE or
(k == aggregation::PRODUCT and is_product_supported<T>()));
}

template <typename T, aggregation::Kind k>
Expand Down
22 changes: 22 additions & 0 deletions cpp/include/cudf/detail/utilities/device_atomics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,28 @@ __forceinline__ __device__ T atomicAdd(T* address, T val)
return cudf::genericAtomicOperation(address, val, cudf::DeviceSum{});
}

/**
* @brief Overloads for `atomicMul`
* reads the `old` located at the `address` in global or shared memory,
* computes (old * val), and stores the result back to memory at the same
* address. These three operations are performed in one atomic transaction.
*
* The supported cudf types for `atomicMul` are:
* int8_t, int16_t, int32_t, int64_t, float, double, and bool
*
* All types are implemented by `atomicCAS`.
*
* @param[in] address The address of old value in global or shared memory
* @param[in] val The value to be multiplied
*
* @returns The old value at `address`
*/
template <typename T>
__forceinline__ __device__ T atomicMul(T* address, T val)
{
return cudf::genericAtomicOperation(address, val, cudf::DeviceProduct{});
}

/**
* @brief Overloads for `atomicMin`
* reads the `old` located at the `address` in global or shared memory,
Expand Down
45 changes: 21 additions & 24 deletions cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -55,33 +55,37 @@ namespace groupby {
namespace detail {
namespace hash {
namespace {
// This is a temporary fix due to compiler bug and we can resort back to
// constexpr once cuda 10.2 becomes RAPIDS's minimum compiler version
#if 0

/**
* @brief List of aggregation operations that can be computed with a hash-based
* implementation.
*/
constexpr std::array<aggregation::Kind, 10> hash_aggregations{
aggregation::SUM, aggregation::MIN, aggregation::MAX,
aggregation::COUNT_VALID, aggregation::COUNT_ALL,
aggregation::ARGMIN, aggregation::ARGMAX,
aggregation::SUM_OF_SQUARES,
aggregation::MEAN, aggregation::STD, aggregation::VARIANCE};

//Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL,
constexpr std::array<aggregation::Kind, 12> hash_aggregations{aggregation::SUM,
aggregation::PRODUCT,
aggregation::MIN,
aggregation::MAX,
aggregation::COUNT_VALID,
aggregation::COUNT_ALL,
aggregation::ARGMIN,
aggregation::ARGMAX,
aggregation::SUM_OF_SQUARES,
aggregation::MEAN,
aggregation::STD,
aggregation::VARIANCE};

// Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL,
// Compound: MEAN(SUM, COUNT_VALID), VARIANCE, STD(MEAN (SUM, COUNT_VALID), COUNT_VALID),
// ARGMAX, ARGMIN
// FIXME(kn): adding SUM_OF_SQUARES causes ptxas compiler crash (<=CUDA 10.2) for more than 3 types!

// TODO replace with std::find in C++20 onwards.
template <class T, size_t N>
constexpr bool array_contains(std::array<T, N> const& haystack, T needle) {
for (auto i = 0u; i < N; ++i) {
if (haystack[i] == needle) return true;
constexpr bool array_contains(std::array<T, N> const& haystack, T needle)
{
for (auto const& val : haystack) {
if (val == needle) return true;
}
return false;
}
#endif

/**
* @brief Indicates whether the specified aggregation operation can be computed
Expand All @@ -93,14 +97,7 @@ constexpr bool array_contains(std::array<T, N> const& haystack, T needle) {
*/
bool constexpr is_hash_aggregation(aggregation::Kind t)
{
// this is a temporary fix due to compiler bug and we can resort back to
// constexpr once cuda 10.2 becomes RAPIDS's minimum compiler version
// return array_contains(hash_aggregations, t);
return (t == aggregation::SUM) or (t == aggregation::MIN) or (t == aggregation::MAX) or
(t == aggregation::COUNT_VALID) or (t == aggregation::COUNT_ALL) or
(t == aggregation::ARGMIN) or (t == aggregation::ARGMAX) or
(t == aggregation::SUM_OF_SQUARES) or (t == aggregation::MEAN) or
(t == aggregation::STD) or (t == aggregation::VARIANCE);
return array_contains(hash_aggregations, t);
}

template <typename Map>
Expand Down
12 changes: 12 additions & 0 deletions cpp/src/groupby/sort/aggregate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,18 @@ void aggregrate_result_functor::operator()<aggregation::SUM>(aggregation const&
get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr));
};

template <>
void aggregrate_result_functor::operator()<aggregation::PRODUCT>(aggregation const& agg)
{
if (cache.has_result(col_idx, agg)) return;

cache.add_result(
col_idx,
agg,
detail::group_product(
get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr));
};

template <>
void aggregrate_result_functor::operator()<aggregation::ARGMAX>(aggregation const& agg)
{
Expand Down
46 changes: 46 additions & 0 deletions cpp/src/groupby/sort/group_product.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/utilities/span.hpp>
#include <groupby/sort/group_single_pass_reduction_util.cuh>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace groupby {
namespace detail {
std::unique_ptr<column> group_product(column_view const& values,
size_type num_groups,
cudf::device_span<size_type const> group_labels,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto values_type = cudf::is_dictionary(values.type())
? dictionary_column_view(values).keys().type()
: values.type();
return type_dispatcher(values_type,
reduce_functor<aggregation::PRODUCT>{},
values,
num_groups,
group_labels,
stream,
mr);
}

} // namespace detail
} // namespace groupby
} // namespace cudf
Loading

0 comments on commit c0cf5e1

Please sign in to comment.