diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d6e7a18441c..89f0ebeb239 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -199,6 +199,7 @@ add_library(cudf src/groupby/sort/group_min.cu src/groupby/sort/group_nth_element.cu src/groupby/sort/group_nunique.cu + src/groupby/sort/group_product.cu src/groupby/sort/group_quantiles.cu src/groupby/sort/group_std.cu src/groupby/sort/group_sum.cu diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index 3f5f5a91632..4d78f5ef05a 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -314,29 +314,18 @@ struct update_target_element -constexpr bool is_SOS_supported() -{ - return std::is_floating_point::value; -} -#else template -constexpr bool is_SOS_supported() +constexpr bool is_product_supported() { return is_numeric(); } -#endif template struct update_target_element()>> { + std::enable_if_t()>> { __device__ void operator()(mutable_column_device_view target, size_type target_index, column_device_view source, @@ -351,6 +340,26 @@ struct update_target_element +struct update_target_element()>> { + __device__ void operator()(mutable_column_device_view target, + size_type target_index, + column_device_view source, + size_type source_index) const noexcept + { + if (source_has_nulls and source.is_null(source_index)) { return; } + + using Target = target_type_t; + atomicMul(&target.element(target_index), + static_cast(source.element(source_index))); + if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } + } +}; + template struct update_target_element< Source, @@ -559,7 +568,8 @@ struct identity_initializer { k == aggregation::COUNT_VALID or k == aggregation::COUNT_ALL or k == aggregation::ARGMAX or k == aggregation::ARGMIN or k == aggregation::SUM_OF_SQUARES or k == aggregation::STD or - k == aggregation::VARIANCE); + k == aggregation::VARIANCE or + (k == aggregation::PRODUCT and is_product_supported())); } template diff --git a/cpp/include/cudf/detail/utilities/device_atomics.cuh b/cpp/include/cudf/detail/utilities/device_atomics.cuh index 246817a5cb5..00c9fc782e0 100644 --- a/cpp/include/cudf/detail/utilities/device_atomics.cuh +++ b/cpp/include/cudf/detail/utilities/device_atomics.cuh @@ -503,6 +503,28 @@ __forceinline__ __device__ T atomicAdd(T* address, T val) return cudf::genericAtomicOperation(address, val, cudf::DeviceSum{}); } +/** + * @brief Overloads for `atomicMul` + * reads the `old` located at the `address` in global or shared memory, + * computes (old * val), and stores the result back to memory at the same + * address. These three operations are performed in one atomic transaction. + * + * The supported cudf types for `atomicMul` are: + * int8_t, int16_t, int32_t, int64_t, float, double, and bool + * + * All types are implemented by `atomicCAS`. + * + * @param[in] address The address of old value in global or shared memory + * @param[in] val The value to be multiplied + * + * @returns The old value at `address` + */ +template +__forceinline__ __device__ T atomicMul(T* address, T val) +{ + return cudf::genericAtomicOperation(address, val, cudf::DeviceProduct{}); +} + /** * @brief Overloads for `atomicMin` * reads the `old` located at the `address` in global or shared memory, diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 38aacbe59a7..aced432311a 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -55,33 +55,37 @@ namespace groupby { namespace detail { namespace hash { namespace { -// This is a temporary fix due to compiler bug and we can resort back to -// constexpr once cuda 10.2 becomes RAPIDS's minimum compiler version -#if 0 + /** * @brief List of aggregation operations that can be computed with a hash-based * implementation. */ -constexpr std::array hash_aggregations{ - aggregation::SUM, aggregation::MIN, aggregation::MAX, - aggregation::COUNT_VALID, aggregation::COUNT_ALL, - aggregation::ARGMIN, aggregation::ARGMAX, - aggregation::SUM_OF_SQUARES, - aggregation::MEAN, aggregation::STD, aggregation::VARIANCE}; - -//Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL, +constexpr std::array hash_aggregations{aggregation::SUM, + aggregation::PRODUCT, + aggregation::MIN, + aggregation::MAX, + aggregation::COUNT_VALID, + aggregation::COUNT_ALL, + aggregation::ARGMIN, + aggregation::ARGMAX, + aggregation::SUM_OF_SQUARES, + aggregation::MEAN, + aggregation::STD, + aggregation::VARIANCE}; + +// Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL, // Compound: MEAN(SUM, COUNT_VALID), VARIANCE, STD(MEAN (SUM, COUNT_VALID), COUNT_VALID), // ARGMAX, ARGMIN -// FIXME(kn): adding SUM_OF_SQUARES causes ptxas compiler crash (<=CUDA 10.2) for more than 3 types! +// TODO replace with std::find in C++20 onwards. template -constexpr bool array_contains(std::array const& haystack, T needle) { - for (auto i = 0u; i < N; ++i) { - if (haystack[i] == needle) return true; +constexpr bool array_contains(std::array const& haystack, T needle) +{ + for (auto const& val : haystack) { + if (val == needle) return true; } return false; } -#endif /** * @brief Indicates whether the specified aggregation operation can be computed @@ -93,14 +97,7 @@ constexpr bool array_contains(std::array const& haystack, T needle) { */ bool constexpr is_hash_aggregation(aggregation::Kind t) { - // this is a temporary fix due to compiler bug and we can resort back to - // constexpr once cuda 10.2 becomes RAPIDS's minimum compiler version - // return array_contains(hash_aggregations, t); - return (t == aggregation::SUM) or (t == aggregation::MIN) or (t == aggregation::MAX) or - (t == aggregation::COUNT_VALID) or (t == aggregation::COUNT_ALL) or - (t == aggregation::ARGMIN) or (t == aggregation::ARGMAX) or - (t == aggregation::SUM_OF_SQUARES) or (t == aggregation::MEAN) or - (t == aggregation::STD) or (t == aggregation::VARIANCE); + return array_contains(hash_aggregations, t); } template diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 46185e07600..12f157cd3d9 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -98,6 +98,18 @@ void aggregrate_result_functor::operator()(aggregation const& get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); }; +template <> +void aggregrate_result_functor::operator()(aggregation const& agg) +{ + if (cache.has_result(col_idx, agg)) return; + + cache.add_result( + col_idx, + agg, + detail::group_product( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); +}; + template <> void aggregrate_result_functor::operator()(aggregation const& agg) { diff --git a/cpp/src/groupby/sort/group_product.cu b/cpp/src/groupby/sort/group_product.cu new file mode 100644 index 00000000000..e9cf8611b58 --- /dev/null +++ b/cpp/src/groupby/sort/group_product.cu @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +namespace cudf { +namespace groupby { +namespace detail { +std::unique_ptr group_product(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto values_type = cudf::is_dictionary(values.type()) + ? dictionary_column_view(values).keys().type() + : values.type(); + return type_dispatcher(values_type, + reduce_functor{}, + values, + num_groups, + group_labels, + stream, + mr); +} + +} // namespace detail +} // namespace groupby +} // namespace cudf diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index b69fe6a0291..71980082156 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,12 +24,24 @@ #include +/** @internal @file Internal API in this file are mostly segmented reduction operations on column, + * which are used in sort-based groupby aggregations. + * + */ namespace cudf { namespace groupby { namespace detail { /** * @brief Internal API to calculate groupwise sum * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_sum = [7, -3, 4, ] + * @endcode + * * @param values Grouped values to get sum of * @param num_groups Number of groups * @param group_labels ID of group that the corresponding value belongs to @@ -42,9 +54,40 @@ std::unique_ptr group_sum(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Internal API to calculate groupwise product + * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_product = [6, 2, 4, ] + * @endcode + * + * @param values Grouped values to get product of + * @param num_groups Number of groups + * @param group_labels ID of group that the corresponding value belongs to + * @param mr Device memory resource used to allocate the returned column's device memory + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr group_product(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** * @brief Internal API to calculate groupwise minimum value * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_min = [1, -2, 4, ] + * @endcode + * * @param values Grouped values to get minimum from * @param num_groups Number of groups * @param group_labels ID of group that the corresponding value belongs to @@ -60,6 +103,14 @@ std::unique_ptr group_min(column_view const& values, /** * @brief Internal API to calculate groupwise maximum value * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_max = [4, -1, 4, ] + * @endcode + * * @param values Grouped values to get maximum from * @param num_groups Number of groups * @param group_labels ID of group that the corresponding value belongs to @@ -75,7 +126,15 @@ std::unique_ptr group_max(column_view const& values, /** * @brief Internal API to calculate group-wise indices of maximum values. * - * @param values Ungrouped values to get maximum value's index from + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_max = [2, 0, 0, ] + * @endcode + * + * @param values Grouped values to get maximum value's index from * @param num_groups Number of groups * @param group_labels ID of group that the corresponding value belongs to * @param key_sort_order Indices indicating sort order of groupby keys @@ -92,7 +151,15 @@ std::unique_ptr group_argmax(column_view const& values, /** * @brief Internal API to calculate group-wise indices of minimum values. * - * @param values Ungrouped values to get minimum value's index from + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_max = [1, 1, 0, ] + * @endcode + * + * @param values Grouped values to get minimum value's index from * @param num_groups Number of groups * @param group_labels ID of group that the corresponding value belongs to * @param key_sort_order Indices indicating sort order of groupby keys @@ -110,6 +177,14 @@ std::unique_ptr group_argmin(column_view const& values, * @brief Internal API to calculate number of non-null values in each group of * @p values * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * num_groups = 4 + * + * group_count_valid = [3, 2, 1, 0] + * @endcode + * * @param values Grouped values to get valid count of * @param group_labels ID of group that the corresponding value belongs to * @param num_groups Number of groups ( unique values in @p group_labels ) @@ -125,6 +200,13 @@ std::unique_ptr group_count_valid(column_view const& values, /** * @brief Internal API to calculate number of values in each group of @p values * + * @code{.pseudo} + * group_offsets = [0, 3, 5, 7, 8] + * num_groups = 4 + * + * group_count_all = [3, 2, 2, 1] + * @endcode + * * @param group_offsets Offsets of groups' starting points within @p values * @param num_groups Number of groups ( unique values in @p group_labels ) * @param mr Device memory resource used to allocate the returned column's device memory @@ -138,6 +220,16 @@ std::unique_ptr group_count_all(cudf::device_span group /** * @brief Internal API to calculate groupwise variance * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * group_means = [2.333333, -1.5, 4.0, ] + * group_sizes = [3, 2, 2, 1] + * ddof = 1 + * + * group_var = [2.333333, 0.5, , ] + * @endcode + * * @param values Grouped values to get variance of * @param group_means Pre-calculated groupwise MEAN * @param group_sizes Number of valid elements per group @@ -158,6 +250,16 @@ std::unique_ptr group_var(column_view const& values, /** * @brief Internal API to calculate groupwise quantiles * + * @code{.pseudo} + * values = [1, 2, 4, -2, -1, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * group_sizes = [3, 2, 2, 1] + * num_groups = 4 + * quantiles = [0.25, 0.5] + * + * group_quantiles = [1.5, 2, -1.75, -1.5, 4, 4, , ] + * @endcode + * * @param values Grouped and sorted (within group) values to get quantiles from * @param group_sizes Number of valid elements per group * @param group_offsets Offsets of groups' starting points within @p values @@ -179,6 +281,16 @@ std::unique_ptr group_quantiles(column_view const& values, * @brief Internal API to calculate number of unique values in each group of * @p values * + * @code{.pseudo} + * values = [2, 4, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * group_offsets = [0, 3, 5, 7, 8] + * num_groups = 4 + * + * group_nunique(null_policy::EXCLUDE) = [2, 2, 1, 0] + * group_nunique(null_policy::INCLUDE) = [2, 2, 2, 1] + * @endcode + * * @param values Grouped and sorted (within group) values to get unique count of * @param group_labels ID of group that the corresponding value belongs to * @param num_groups Number of groups ( unique values in @p group_labels ) @@ -200,6 +312,17 @@ std::unique_ptr group_nunique(column_view const& values, /** * @brief Internal API to calculate nth values in each group of @p values * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_sizes = [3, 2, 2, 1] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * group_offsets = [0, 3, 5, 7, 8] + * num_groups = 4 + * + * group_nth_element(n=0, null_policy::EXCLUDE) = [2, -1, 4, ] + * group_nth_element(n=0, null_policy::INCLUDE) = [2, -1, , ] + * @endcode + * * @param values Grouped values to get nth value of * @param group_sizes Number of elements per group * @param group_labels ID of group that the corresponding value belongs to @@ -223,6 +346,14 @@ std::unique_ptr group_nth_element(column_view const& values, /** * @brief Internal API to collect grouped values into a lists column * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_offsets = [0, 3, 5, 7, 8] + * num_groups = 4 + * + * group_collect = [[2, 1, 4], [-1, -2] [, 4], []] + * @endcode + * * @param values Grouped values to collect * @param group_offsets Offsets of groups' starting points within @p values * @param num_groups Number of groups @@ -235,6 +366,9 @@ std::unique_ptr group_collect(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** @endinternal + * + */ } // namespace detail } // namespace groupby } // namespace cudf diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 63a68974d6b..e5e93bbef47 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -40,14 +40,17 @@ struct reduce_functor { template static constexpr bool is_supported() { - if (K == aggregation::SUM) - return cudf::is_numeric() || cudf::is_duration() || cudf::is_fixed_point(); - else if (K == aggregation::MIN or K == aggregation::MAX) - return cudf::is_fixed_width() and is_relationally_comparable(); - else if (K == aggregation::ARGMIN or K == aggregation::ARGMAX) - return is_relationally_comparable(); - else - return false; + switch (K) { + case aggregation::SUM: + return cudf::is_numeric() || cudf::is_duration() || cudf::is_fixed_point(); + case aggregation::PRODUCT: return cudf::detail::is_product_supported(); + case aggregation::MIN: + case aggregation::MAX: + return cudf::is_fixed_width() and is_relationally_comparable(); + case aggregation::ARGMIN: + case aggregation::ARGMAX: return is_relationally_comparable(); + default: return false; + } } template diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index c8b7ac51615..9dbd4a881a6 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -63,6 +63,7 @@ ConfigureTest(GROUPBY_TEST groupby/group_sum_test.cpp groupby/group_min_test.cpp groupby/group_max_test.cpp + groupby/group_product_test.cpp groupby/group_sum_of_squares_test.cpp groupby/group_mean_test.cpp groupby/group_var_test.cpp diff --git a/cpp/tests/groupby/group_product_test.cpp b/cpp/tests/groupby/group_product_test.cpp new file mode 100644 index 00000000000..5af27585bee --- /dev/null +++ b/cpp/tests/groupby/group_product_test.cpp @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include + +namespace cudf { +namespace test { +template +struct groupby_product_test : public cudf::test::BaseFixture { +}; + +using K = int32_t; +using supported_types = cudf::test::Types; + +TYPED_TEST_CASE(groupby_product_test, supported_types); + +TYPED_TEST(groupby_product_test, basic) +{ + using V = TypeParam; + using R = cudf::detail::target_type_t; + + // clang-format off + fixed_width_column_wrapper keys { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + fixed_width_column_wrapper vals { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + // { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3} + fixed_width_column_wrapper expect_keys { 1, 2, 3 }; + // { 0, 3, 6, 1, 4, 5, 9, 2, 7, 8} + fixed_width_column_wrapper expect_vals({ 0., 180., 112. }, all_valid()); + // clang-format on + + test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation()); +} + +TYPED_TEST(groupby_product_test, empty_cols) +{ + using V = TypeParam; + using R = cudf::detail::target_type_t; + + fixed_width_column_wrapper keys{}; + fixed_width_column_wrapper vals{}; + + fixed_width_column_wrapper expect_keys{}; + fixed_width_column_wrapper expect_vals{}; + + test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation()); +} + +TYPED_TEST(groupby_product_test, zero_valid_keys) +{ + using V = TypeParam; + using R = cudf::detail::target_type_t; + + fixed_width_column_wrapper keys({1, 2, 3}, all_null()); + fixed_width_column_wrapper vals{3, 4, 5}; + + fixed_width_column_wrapper expect_keys{}; + fixed_width_column_wrapper expect_vals{}; + + test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation()); +} + +TYPED_TEST(groupby_product_test, zero_valid_values) +{ + using V = TypeParam; + using R = cudf::detail::target_type_t; + + fixed_width_column_wrapper keys{1, 1, 1}; + fixed_width_column_wrapper vals({3, 4, 5}, all_null()); + + fixed_width_column_wrapper expect_keys{1}; + fixed_width_column_wrapper expect_vals({0}, all_null()); + + test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation()); +} + +TYPED_TEST(groupby_product_test, null_keys_and_values) +{ + using V = TypeParam; + using R = cudf::detail::target_type_t; + + // clang-format off + fixed_width_column_wrapper keys( { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4}, + { 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1}); + fixed_width_column_wrapper vals( { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 3}, + { 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0}); + + // { 1, 1, 2, 2, 2, 3, 3, 4} + fixed_width_column_wrapper expect_keys({ 1, 2, 3, 4}, all_valid()); + // { _, 3, 6, 1, 4, 9, 2, 8, _} + fixed_width_column_wrapper expect_vals({ 18., 36., 16., 3.}, + { 1, 1, 1, 0}); + // clang-format on + + test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation()); +} + +// This test will not work until the following ptxas bug is fixed in 10.2 +// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317&cp= +TYPED_TEST(groupby_product_test, DISABLED_dictionary) +{ + using V = TypeParam; + using R = cudf::detail::target_type_t; + + // clang-format off + fixed_width_column_wrapper keys{ 1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + dictionary_column_wrapper vals{ 0, 2, 2, 3, 4, 5, 6, 7, 8, 9}; + + // { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3} + fixed_width_column_wrapper expect_keys({ 1, 2, 3 }); + // { 0, 3, 6, 1, 4, 5, 9, 2, 7, 8} + fixed_width_column_wrapper expect_vals({ 0., 180., 112. }, all_valid()); + // clang-format on + + test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation()); +} + +} // namespace test +} // namespace cudf diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx index 682d8cbf329..bed1581ad95 100644 --- a/python/cudf/cudf/_lib/aggregation.pyx +++ b/python/cudf/cudf/_lib/aggregation.pyx @@ -163,6 +163,7 @@ cdef class Aggregation: cdef Aggregation agg = cls() agg.c_obj = move(libcudf_aggregation.make_product_aggregation()) return agg + prod = product @classmethod def sum_of_squares(cls): diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 8d32c99b5b0..b643bc7f7fd 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -587,6 +587,10 @@ def sum(self): """Compute the column-wise sum of the values in each group.""" return self.agg("sum") + def prod(self): + """Compute the column-wise product of the values in each group.""" + return self.agg("prod") + def idxmin(self): """Get the column-wise index of the minimum value in each group.""" return self.agg("idxmin") diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 868387b100e..37840be8922 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -314,7 +314,18 @@ def emulate(df): @pytest.mark.parametrize("nelem", [2, 3, 100, 500, 1000]) @pytest.mark.parametrize( "func", - ["mean", "std", "var", "min", "max", "idxmin", "idxmax", "count", "sum"], + [ + "mean", + "std", + "var", + "min", + "max", + "idxmin", + "idxmax", + "count", + "sum", + "prod", + ], ) def test_groupby_2keys_agg(nelem, func): # gdf (Note: lack of multiIndex) @@ -390,7 +401,7 @@ def test_groupby_agg_decimal(num_groups, nelem_per_group, func): @pytest.mark.parametrize( - "agg", ["min", "max", "idxmin", "idxmax", "count", "sum", "mean"] + "agg", ["min", "max", "idxmin", "idxmax", "count", "sum", "prod", "mean"] ) def test_series_groupby(agg): s = pd.Series([1, 2, 3]) @@ -404,7 +415,7 @@ def test_series_groupby(agg): @pytest.mark.parametrize( - "agg", ["min", "max", "idxmin", "idxmax", "count", "sum", "mean"] + "agg", ["min", "max", "idxmin", "idxmax", "count", "sum", "prod", "mean"] ) def test_series_groupby_agg(agg): s = pd.Series([1, 2, 3]) @@ -422,6 +433,7 @@ def test_series_groupby_agg(agg): "max", "count", "sum", + "prod", "mean", pytest.param( "idxmin", @@ -451,6 +463,7 @@ def test_groupby_level_zero(agg): "max", "count", "sum", + "prod", "mean", pytest.param( "idxmin", @@ -815,7 +828,7 @@ def test_groupby_multi_agg_hash_groupby(agg): @pytest.mark.parametrize( - "agg", ["min", "max", "idxmax", "idxmax", "sum", "count", "mean"] + "agg", ["min", "max", "idxmax", "idxmax", "sum", "prod", "count", "mean"] ) def test_groupby_nulls_basic(agg): check_dtype = False if agg in _index_type_aggs else True @@ -855,7 +868,7 @@ def test_groupby_nulls_basic(agg): # Pandas' null semantics. Should we change it? assert_groupby_results_equal( getattr(pdf.groupby("a"), agg)().fillna(0), - getattr(gdf.groupby("a"), agg)().fillna(0), + getattr(gdf.groupby("a"), agg)().fillna(0 if agg != "prod" else 1), check_dtype=check_dtype, )