diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d6e7a18441c..89f0ebeb239 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -199,6 +199,7 @@ add_library(cudf
     src/groupby/sort/group_min.cu
     src/groupby/sort/group_nth_element.cu
     src/groupby/sort/group_nunique.cu
+    src/groupby/sort/group_product.cu
     src/groupby/sort/group_quantiles.cu
     src/groupby/sort/group_std.cu
     src/groupby/sort/group_sum.cu
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh
index 3f5f5a91632..4d78f5ef05a 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.cuh
+++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh
@@ -314,29 +314,18 @@ struct update_target_element<dictionary32, aggregation::SUM, target_has_nulls, s
   }
 };
 
-// This code will segfault in nvcc/ptxas 10.2 only
-// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317
-// Enabling only for 2 types does not segfault. Using for unit tests.
-#if (__CUDACC_VER_MAJOR__ == 10) and (__CUDACC_VER_MINOR__ == 2)
-template <typename T>
-constexpr bool is_SOS_supported()
-{
-  return std::is_floating_point<T>::value;
-}
-#else
 template <typename T>
-constexpr bool is_SOS_supported()
+constexpr bool is_product_supported()
 {
   return is_numeric<T>();
 }
-#endif
 
 template <typename Source, bool target_has_nulls, bool source_has_nulls>
 struct update_target_element<Source,
                              aggregation::SUM_OF_SQUARES,
                              target_has_nulls,
                              source_has_nulls,
-                             std::enable_if_t<is_SOS_supported<Source>()>> {
+                             std::enable_if_t<is_product_supported<Source>()>> {
   __device__ void operator()(mutable_column_device_view target,
                              size_type target_index,
                              column_device_view source,
@@ -351,6 +340,26 @@ struct update_target_element<Source,
   }
 };
 
+template <typename Source, bool target_has_nulls, bool source_has_nulls>
+struct update_target_element<Source,
+                             aggregation::PRODUCT,
+                             target_has_nulls,
+                             source_has_nulls,
+                             std::enable_if_t<is_product_supported<Source>()>> {
+  __device__ void operator()(mutable_column_device_view target,
+                             size_type target_index,
+                             column_device_view source,
+                             size_type source_index) const noexcept
+  {
+    if (source_has_nulls and source.is_null(source_index)) { return; }
+
+    using Target = target_type_t<Source, aggregation::PRODUCT>;
+    atomicMul(&target.element<Target>(target_index),
+              static_cast<Target>(source.element<Source>(source_index)));
+    if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); }
+  }
+};
+
 template <typename Source, bool target_has_nulls, bool source_has_nulls>
 struct update_target_element<
   Source,
@@ -559,7 +568,8 @@ struct identity_initializer {
             k == aggregation::COUNT_VALID or k == aggregation::COUNT_ALL or
             k == aggregation::ARGMAX or k == aggregation::ARGMIN or
             k == aggregation::SUM_OF_SQUARES or k == aggregation::STD or
-            k == aggregation::VARIANCE);
+            k == aggregation::VARIANCE or
+            (k == aggregation::PRODUCT and is_product_supported<T>()));
   }
 
   template <typename T, aggregation::Kind k>
diff --git a/cpp/include/cudf/detail/utilities/device_atomics.cuh b/cpp/include/cudf/detail/utilities/device_atomics.cuh
index 246817a5cb5..00c9fc782e0 100644
--- a/cpp/include/cudf/detail/utilities/device_atomics.cuh
+++ b/cpp/include/cudf/detail/utilities/device_atomics.cuh
@@ -503,6 +503,28 @@ __forceinline__ __device__ T atomicAdd(T* address, T val)
   return cudf::genericAtomicOperation(address, val, cudf::DeviceSum{});
 }
 
+/**
+ * @brief Overloads for `atomicMul`
+ * reads the `old` located at the `address` in global or shared memory,
+ * computes (old * val), and stores the result back to memory at the same
+ * address. These three operations are performed in one atomic transaction.
+ *
+ * The supported cudf types for `atomicMul` are:
+ * int8_t, int16_t, int32_t, int64_t, float, double, and bool
+ *
+ * All types are implemented by `atomicCAS`.
+ *
+ * @param[in] address The address of old value in global or shared memory
+ * @param[in] val The value to be multiplied
+ *
+ * @returns The old value at `address`
+ */
+template <typename T>
+__forceinline__ __device__ T atomicMul(T* address, T val)
+{
+  return cudf::genericAtomicOperation(address, val, cudf::DeviceProduct{});
+}
+
 /**
  * @brief Overloads for `atomicMin`
  * reads the `old` located at the `address` in global or shared memory,
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 38aacbe59a7..aced432311a 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -55,33 +55,37 @@ namespace groupby {
 namespace detail {
 namespace hash {
 namespace {
-// This is a temporary fix due to compiler bug and we can resort back to
-// constexpr once cuda 10.2 becomes RAPIDS's minimum compiler version
-#if 0
+
 /**
  * @brief List of aggregation operations that can be computed with a hash-based
  * implementation.
  */
-constexpr std::array<aggregation::Kind, 10> hash_aggregations{
-    aggregation::SUM, aggregation::MIN, aggregation::MAX,
-    aggregation::COUNT_VALID, aggregation::COUNT_ALL,
-    aggregation::ARGMIN, aggregation::ARGMAX,
-    aggregation::SUM_OF_SQUARES,
-    aggregation::MEAN, aggregation::STD, aggregation::VARIANCE};
-
-//Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL,
+constexpr std::array<aggregation::Kind, 12> hash_aggregations{aggregation::SUM,
+                                                              aggregation::PRODUCT,
+                                                              aggregation::MIN,
+                                                              aggregation::MAX,
+                                                              aggregation::COUNT_VALID,
+                                                              aggregation::COUNT_ALL,
+                                                              aggregation::ARGMIN,
+                                                              aggregation::ARGMAX,
+                                                              aggregation::SUM_OF_SQUARES,
+                                                              aggregation::MEAN,
+                                                              aggregation::STD,
+                                                              aggregation::VARIANCE};
+
+// Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL,
 // Compound: MEAN(SUM, COUNT_VALID), VARIANCE, STD(MEAN (SUM, COUNT_VALID), COUNT_VALID),
 // ARGMAX, ARGMIN
-// FIXME(kn): adding SUM_OF_SQUARES causes ptxas compiler crash (<=CUDA 10.2) for more than 3 types!
 
+// TODO replace with std::find in C++20 onwards.
 template <class T, size_t N>
-constexpr bool array_contains(std::array<T, N> const& haystack, T needle) {
-  for (auto i = 0u; i < N; ++i) {
-    if (haystack[i] == needle) return true;
+constexpr bool array_contains(std::array<T, N> const& haystack, T needle)
+{
+  for (auto const& val : haystack) {
+    if (val == needle) return true;
   }
   return false;
 }
-#endif
 
 /**
  * @brief Indicates whether the specified aggregation operation can be computed
@@ -93,14 +97,7 @@ constexpr bool array_contains(std::array<T, N> const& haystack, T needle) {
  */
 bool constexpr is_hash_aggregation(aggregation::Kind t)
 {
-  // this is a temporary fix due to compiler bug and we can resort back to
-  // constexpr once cuda 10.2 becomes RAPIDS's minimum compiler version
-  // return array_contains(hash_aggregations, t);
-  return (t == aggregation::SUM) or (t == aggregation::MIN) or (t == aggregation::MAX) or
-         (t == aggregation::COUNT_VALID) or (t == aggregation::COUNT_ALL) or
-         (t == aggregation::ARGMIN) or (t == aggregation::ARGMAX) or
-         (t == aggregation::SUM_OF_SQUARES) or (t == aggregation::MEAN) or
-         (t == aggregation::STD) or (t == aggregation::VARIANCE);
+  return array_contains(hash_aggregations, t);
 }
 
 template <typename Map>
diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index 46185e07600..12f157cd3d9 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -98,6 +98,18 @@ void aggregrate_result_functor::operator()<aggregation::SUM>(aggregation const&
       get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr));
 };
 
+template <>
+void aggregrate_result_functor::operator()<aggregation::PRODUCT>(aggregation const& agg)
+{
+  if (cache.has_result(col_idx, agg)) return;
+
+  cache.add_result(
+    col_idx,
+    agg,
+    detail::group_product(
+      get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr));
+};
+
 template <>
 void aggregrate_result_functor::operator()<aggregation::ARGMAX>(aggregation const& agg)
 {
diff --git a/cpp/src/groupby/sort/group_product.cu b/cpp/src/groupby/sort/group_product.cu
new file mode 100644
index 00000000000..e9cf8611b58
--- /dev/null
+++ b/cpp/src/groupby/sort/group_product.cu
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/dictionary/dictionary_column_view.hpp>
+#include <cudf/utilities/span.hpp>
+#include <groupby/sort/group_single_pass_reduction_util.cuh>
+
+#include <rmm/cuda_stream_view.hpp>
+
+namespace cudf {
+namespace groupby {
+namespace detail {
+std::unique_ptr<column> group_product(column_view const& values,
+                                      size_type num_groups,
+                                      cudf::device_span<size_type const> group_labels,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr)
+{
+  auto values_type = cudf::is_dictionary(values.type())
+                       ? dictionary_column_view(values).keys().type()
+                       : values.type();
+  return type_dispatcher(values_type,
+                         reduce_functor<aggregation::PRODUCT>{},
+                         values,
+                         num_groups,
+                         group_labels,
+                         stream,
+                         mr);
+}
+
+}  // namespace detail
+}  // namespace groupby
+}  // namespace cudf
diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp
index b69fe6a0291..71980082156 100644
--- a/cpp/src/groupby/sort/group_reductions.hpp
+++ b/cpp/src/groupby/sort/group_reductions.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,12 +24,24 @@
 
 #include <memory>
 
+/** @internal @file Internal API in this file are mostly segmented reduction operations on column,
+ * which are used in sort-based groupby aggregations.
+ *
+ */
 namespace cudf {
 namespace groupby {
 namespace detail {
 /**
  * @brief Internal API to calculate groupwise sum
  *
+ * @code{.pseudo}
+ * values       = [2, 1, 4, -1, -2, <NA>, 4, <NA>]
+ * group_labels = [0, 0, 0,  1,  1,    2, 2,    3]
+ * num_groups   = 4
+ *
+ * group_sum    = [7, -3, 4, <NA>]
+ * @endcode
+ *
  * @param values Grouped values to get sum of
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
@@ -42,9 +54,40 @@ std::unique_ptr<column> group_sum(column_view const& values,
                                   rmm::cuda_stream_view stream,
                                   rmm::mr::device_memory_resource* mr);
 
+/**
+ * @brief Internal API to calculate groupwise product
+ *
+ * @code{.pseudo}
+ * values        = [2, 1, 4, -1, -2, <NA>, 4, <NA>]
+ * group_labels  = [0, 0, 0,  1,  1,    2, 2,    3]
+ * num_groups    = 4
+ *
+ * group_product = [6, 2, 4, <NA>]
+ * @endcode
+ *
+ * @param values Grouped values to get product of
+ * @param num_groups Number of groups
+ * @param group_labels ID of group that the corresponding value belongs to
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> group_product(column_view const& values,
+                                      size_type num_groups,
+                                      cudf::device_span<size_type const> group_labels,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr);
+
 /**
  * @brief Internal API to calculate groupwise minimum value
  *
+ * @code{.pseudo}
+ * values       = [2, 1, 4, -1, -2, <NA>, 4, <NA>]
+ * group_labels = [0, 0, 0,  1,  1,    2, 2,    3]
+ * num_groups   = 4
+ *
+ * group_min    = [1, -2, 4, <NA>]
+ * @endcode
+ *
  * @param values Grouped values to get minimum from
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
@@ -60,6 +103,14 @@ std::unique_ptr<column> group_min(column_view const& values,
 /**
  * @brief Internal API to calculate groupwise maximum value
  *
+ * @code{.pseudo}
+ * values       = [2, 1, 4, -1, -2, <NA>, 4, <NA>]
+ * group_labels = [0, 0, 0,  1,  1,    2, 2,    3]
+ * num_groups   = 4
+ *
+ * group_max    = [4, -1, 4, <NA>]
+ * @endcode
+ *
  * @param values Grouped values to get maximum from
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
@@ -75,7 +126,15 @@ std::unique_ptr<column> group_max(column_view const& values,
 /**
  * @brief Internal API to calculate group-wise indices of maximum values.
  *
- * @param values Ungrouped values to get maximum value's index from
+ * @code{.pseudo}
+ * values       = [2, 1, 4, -1, -2, <NA>, 4, <NA>]
+ * group_labels = [0, 0, 0,  1,  1,    2, 2,    3]
+ * num_groups   = 4
+ *
+ * group_max    = [2, 0, 0, <NA>]
+ * @endcode
+ *
+ * @param values Grouped values to get maximum value's index from
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
  * @param key_sort_order Indices indicating sort order of groupby keys
@@ -92,7 +151,15 @@ std::unique_ptr<column> group_argmax(column_view const& values,
 /**
  * @brief Internal API to calculate group-wise indices of minimum values.
  *
- * @param values Ungrouped values to get minimum value's index from
+ * @code{.pseudo}
+ * values       = [2, 1, 4, -1, -2, <NA>, 4, <NA>]
+ * group_labels = [0, 0, 0,  1,  1,    2, 2,    3]
+ * num_groups   = 4
+ *
+ * group_max    = [1, 1, 0, <NA>]
+ * @endcode
+ *
+ * @param values Grouped values to get minimum value's index from
  * @param num_groups Number of groups
  * @param group_labels ID of group that the corresponding value belongs to
  * @param key_sort_order Indices indicating sort order of groupby keys
@@ -110,6 +177,14 @@ std::unique_ptr<column> group_argmin(column_view const& values,
  * @brief Internal API to calculate number of non-null values in each group of
  *  @p values
  *
+ * @code{.pseudo}
+ * values            = [2, 1, 4, -1, -2, <NA>, 4, <NA>]
+ * group_labels      = [0, 0, 0,  1,  1,    2, 2,    3]
+ * num_groups        = 4
+ *
+ * group_count_valid = [3, 2, 1, 0]
+ * @endcode
+ *
  * @param values Grouped values to get valid count of
  * @param group_labels ID of group that the corresponding value belongs to
  * @param num_groups Number of groups ( unique values in @p group_labels )
@@ -125,6 +200,13 @@ std::unique_ptr<column> group_count_valid(column_view const& values,
 /**
  * @brief Internal API to calculate number of values in each group of @p values
  *
+ * @code{.pseudo}
+ * group_offsets = [0, 3, 5, 7, 8]
+ * num_groups    = 4
+ *
+ * group_count_all = [3, 2, 2, 1]
+ * @endcode
+ *
  * @param group_offsets Offsets of groups' starting points within @p values
  * @param num_groups Number of groups ( unique values in @p group_labels )
  * @param mr Device memory resource used to allocate the returned column's device memory
@@ -138,6 +220,16 @@ std::unique_ptr<column> group_count_all(cudf::device_span<size_type const> group
 /**
  * @brief Internal API to calculate groupwise variance
  *
+ * @code{.pseudo}
+ * values       = [2, 1, 4, -1, -2, <NA>, 4, <NA>]
+ * group_labels = [0, 0, 0,  1,  1,    2, 2,    3]
+ * group_means  = [2.333333, -1.5, 4.0, <NA>]
+ * group_sizes  = [3, 2, 2, 1]
+ * ddof         = 1
+ *
+ * group_var    = [2.333333, 0.5, <NA>, <NA>]
+ * @endcode
+ *
  * @param values Grouped values to get variance of
  * @param group_means Pre-calculated groupwise MEAN
  * @param group_sizes Number of valid elements per group
@@ -158,6 +250,16 @@ std::unique_ptr<column> group_var(column_view const& values,
 /**
  * @brief Internal API to calculate groupwise quantiles
  *
+ * @code{.pseudo}
+ * values       = [1, 2, 4, -2, -1, <NA>, 4, <NA>]
+ * group_labels = [0, 0, 0,  1,  1,    2, 2,    3]
+ * group_sizes  = [3, 2, 2, 1]
+ * num_groups   = 4
+ * quantiles    = [0.25, 0.5]
+ *
+ * group_quantiles = [1.5, 2, -1.75, -1.5,  4,  4, <NA>, <NA>]
+ * @endcode
+ *
  * @param values Grouped and sorted (within group) values to get quantiles from
  * @param group_sizes Number of valid elements per group
  * @param group_offsets Offsets of groups' starting points within @p values
@@ -179,6 +281,16 @@ std::unique_ptr<column> group_quantiles(column_view const& values,
  * @brief Internal API to calculate number of unique values in each group of
  *  @p values
  *
+ * @code{.pseudo}
+ * values        = [2, 4, 4, -1, -2, <NA>, 4, <NA>]
+ * group_labels  = [0, 0, 0,  1,  1,    2, 2,    3]
+ * group_offsets = [0,        3,        5,       7, 8]
+ * num_groups    = 4
+ *
+ * group_nunique(null_policy::EXCLUDE) = [2, 2, 1, 0]
+ * group_nunique(null_policy::INCLUDE) = [2, 2, 2, 1]
+ * @endcode
+ *
  * @param values Grouped and sorted (within group) values to get unique count of
  * @param group_labels ID of group that the corresponding value belongs to
  * @param num_groups Number of groups ( unique values in @p group_labels )
@@ -200,6 +312,17 @@ std::unique_ptr<column> group_nunique(column_view const& values,
 /**
  * @brief Internal API to calculate nth values in each group of  @p values
  *
+ * @code{.pseudo}
+ * values        = [2, 1, 4, -1, -2, <NA>, 4, <NA>]
+ * group_sizes   = [3,        2,        2,       1]
+ * group_labels  = [0, 0, 0,  1,  1,    2, 2,    3]
+ * group_offsets = [0,        3,        5,       7, 8]
+ * num_groups    = 4
+ *
+ * group_nth_element(n=0, null_policy::EXCLUDE) = [2, -1, 4, <NA>]
+ * group_nth_element(n=0, null_policy::INCLUDE) = [2, -1, <NA>, <NA>]
+ * @endcode
+ *
  * @param values Grouped values to get nth value of
  * @param group_sizes Number of elements per group
  * @param group_labels ID of group that the corresponding value belongs to
@@ -223,6 +346,14 @@ std::unique_ptr<column> group_nth_element(column_view const& values,
 /**
  * @brief Internal API to collect grouped values into a lists column
  *
+ * @code{.pseudo}
+ * values        = [2, 1, 4, -1, -2, <NA>, 4, <NA>]
+ * group_offsets = [0,        3,        5,   7, 8]
+ * num_groups    = 4
+ *
+ * group_collect = [[2, 1, 4], [-1, -2] [<NA>, 4], [<NA>]]
+ * @endcode
+ *
  * @param values Grouped values to collect
  * @param group_offsets Offsets of groups' starting points within @p values
  * @param num_groups Number of groups
@@ -235,6 +366,9 @@ std::unique_ptr<column> group_collect(column_view const& values,
                                       rmm::cuda_stream_view stream,
                                       rmm::mr::device_memory_resource* mr);
 
+/** @endinternal
+ *
+ */
 }  // namespace detail
 }  // namespace groupby
 }  // namespace cudf
diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
index 63a68974d6b..e5e93bbef47 100644
--- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
+++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
@@ -40,14 +40,17 @@ struct reduce_functor {
   template <typename T>
   static constexpr bool is_supported()
   {
-    if (K == aggregation::SUM)
-      return cudf::is_numeric<T>() || cudf::is_duration<T>() || cudf::is_fixed_point<T>();
-    else if (K == aggregation::MIN or K == aggregation::MAX)
-      return cudf::is_fixed_width<T>() and is_relationally_comparable<T, T>();
-    else if (K == aggregation::ARGMIN or K == aggregation::ARGMAX)
-      return is_relationally_comparable<T, T>();
-    else
-      return false;
+    switch (K) {
+      case aggregation::SUM:
+        return cudf::is_numeric<T>() || cudf::is_duration<T>() || cudf::is_fixed_point<T>();
+      case aggregation::PRODUCT: return cudf::detail::is_product_supported<T>();
+      case aggregation::MIN:
+      case aggregation::MAX:
+        return cudf::is_fixed_width<T>() and is_relationally_comparable<T, T>();
+      case aggregation::ARGMIN:
+      case aggregation::ARGMAX: return is_relationally_comparable<T, T>();
+      default: return false;
+    }
   }
 
   template <typename T>
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index c8b7ac51615..9dbd4a881a6 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -63,6 +63,7 @@ ConfigureTest(GROUPBY_TEST
     groupby/group_sum_test.cpp
     groupby/group_min_test.cpp
     groupby/group_max_test.cpp
+    groupby/group_product_test.cpp
     groupby/group_sum_of_squares_test.cpp
     groupby/group_mean_test.cpp
     groupby/group_var_test.cpp
diff --git a/cpp/tests/groupby/group_product_test.cpp b/cpp/tests/groupby/group_product_test.cpp
new file mode 100644
index 00000000000..5af27585bee
--- /dev/null
+++ b/cpp/tests/groupby/group_product_test.cpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tests/groupby/groupby_test_util.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/detail/aggregation/aggregation.hpp>
+
+namespace cudf {
+namespace test {
+template <typename V>
+struct groupby_product_test : public cudf::test::BaseFixture {
+};
+
+using K               = int32_t;
+using supported_types = cudf::test::Types<int8_t, int16_t, int32_t, int64_t, float, double>;
+
+TYPED_TEST_CASE(groupby_product_test, supported_types);
+
+TYPED_TEST(groupby_product_test, basic)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::PRODUCT>;
+
+  // clang-format off
+  fixed_width_column_wrapper<K> keys        { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
+  fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+                                        //  { 1, 1, 1,  2, 2, 2, 2,  3, 3, 3}
+  fixed_width_column_wrapper<K> expect_keys { 1,        2,           3      };
+                                        //  { 0, 3, 6,  1, 4, 5, 9,  2, 7, 8}
+  fixed_width_column_wrapper<R> expect_vals({   0.,       180.,      112. }, all_valid());
+  // clang-format on
+
+  test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation());
+}
+
+TYPED_TEST(groupby_product_test, empty_cols)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::PRODUCT>;
+
+  fixed_width_column_wrapper<K> keys{};
+  fixed_width_column_wrapper<V> vals{};
+
+  fixed_width_column_wrapper<K> expect_keys{};
+  fixed_width_column_wrapper<R> expect_vals{};
+
+  test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation());
+}
+
+TYPED_TEST(groupby_product_test, zero_valid_keys)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::PRODUCT>;
+
+  fixed_width_column_wrapper<K> keys({1, 2, 3}, all_null());
+  fixed_width_column_wrapper<V> vals{3, 4, 5};
+
+  fixed_width_column_wrapper<K> expect_keys{};
+  fixed_width_column_wrapper<R> expect_vals{};
+
+  test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation());
+}
+
+TYPED_TEST(groupby_product_test, zero_valid_values)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::PRODUCT>;
+
+  fixed_width_column_wrapper<K> keys{1, 1, 1};
+  fixed_width_column_wrapper<V> vals({3, 4, 5}, all_null());
+
+  fixed_width_column_wrapper<K> expect_keys{1};
+  fixed_width_column_wrapper<R> expect_vals({0}, all_null());
+
+  test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation());
+}
+
+TYPED_TEST(groupby_product_test, null_keys_and_values)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::PRODUCT>;
+
+  // clang-format off
+  fixed_width_column_wrapper<K> keys(       { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
+                                            { 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
+  fixed_width_column_wrapper<V> vals(       { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 3},
+                                            { 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0});
+
+                                        //  { 1, 1,     2, 2, 2,   3, 3,    4}
+  fixed_width_column_wrapper<K> expect_keys({ 1,        2,         3,       4}, all_valid());
+                                        //  { _, 3, 6,  1, 4, 9,   2, 8,    _}
+  fixed_width_column_wrapper<R> expect_vals({ 18.,      36.,       16.,     3.},
+                                            { 1,        1,         1,       0});
+  // clang-format on
+
+  test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation());
+}
+
+// This test will not work until the following ptxas bug is fixed in 10.2
+// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317&cp=
+TYPED_TEST(groupby_product_test, DISABLED_dictionary)
+{
+  using V = TypeParam;
+  using R = cudf::detail::target_type_t<V, aggregation::PRODUCT>;
+
+  // clang-format off
+  fixed_width_column_wrapper<K> keys{ 1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
+  dictionary_column_wrapper<V>  vals{ 0, 2, 2, 3, 4, 5, 6, 7, 8, 9};
+
+                                        //  { 1, 1, 1,  2, 2, 2, 2,  3, 3, 3}
+  fixed_width_column_wrapper<K> expect_keys({ 1,        2,           3      });
+                                        //  { 0, 3, 6,  1, 4, 5, 9,  2, 7, 8}
+  fixed_width_column_wrapper<R> expect_vals({  0.,     180.,        112. }, all_valid());
+  // clang-format on
+
+  test_single_agg(keys, vals, expect_keys, expect_vals, cudf::make_product_aggregation());
+}
+
+}  // namespace test
+}  // namespace cudf
diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 682d8cbf329..bed1581ad95 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -163,6 +163,7 @@ cdef class Aggregation:
         cdef Aggregation agg = cls()
         agg.c_obj = move(libcudf_aggregation.make_product_aggregation())
         return agg
+    prod = product
 
     @classmethod
     def sum_of_squares(cls):
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 8d32c99b5b0..b643bc7f7fd 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -587,6 +587,10 @@ def sum(self):
         """Compute the column-wise sum of the values in each group."""
         return self.agg("sum")
 
+    def prod(self):
+        """Compute the column-wise product of the values in each group."""
+        return self.agg("prod")
+
     def idxmin(self):
         """Get the column-wise index of the minimum value in each group."""
         return self.agg("idxmin")
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 868387b100e..37840be8922 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -314,7 +314,18 @@ def emulate(df):
 @pytest.mark.parametrize("nelem", [2, 3, 100, 500, 1000])
 @pytest.mark.parametrize(
     "func",
-    ["mean", "std", "var", "min", "max", "idxmin", "idxmax", "count", "sum"],
+    [
+        "mean",
+        "std",
+        "var",
+        "min",
+        "max",
+        "idxmin",
+        "idxmax",
+        "count",
+        "sum",
+        "prod",
+    ],
 )
 def test_groupby_2keys_agg(nelem, func):
     # gdf (Note: lack of multiIndex)
@@ -390,7 +401,7 @@ def test_groupby_agg_decimal(num_groups, nelem_per_group, func):
 
 
 @pytest.mark.parametrize(
-    "agg", ["min", "max", "idxmin", "idxmax", "count", "sum", "mean"]
+    "agg", ["min", "max", "idxmin", "idxmax", "count", "sum", "prod", "mean"]
 )
 def test_series_groupby(agg):
     s = pd.Series([1, 2, 3])
@@ -404,7 +415,7 @@ def test_series_groupby(agg):
 
 
 @pytest.mark.parametrize(
-    "agg", ["min", "max", "idxmin", "idxmax", "count", "sum", "mean"]
+    "agg", ["min", "max", "idxmin", "idxmax", "count", "sum", "prod", "mean"]
 )
 def test_series_groupby_agg(agg):
     s = pd.Series([1, 2, 3])
@@ -422,6 +433,7 @@ def test_series_groupby_agg(agg):
         "max",
         "count",
         "sum",
+        "prod",
         "mean",
         pytest.param(
             "idxmin",
@@ -451,6 +463,7 @@ def test_groupby_level_zero(agg):
         "max",
         "count",
         "sum",
+        "prod",
         "mean",
         pytest.param(
             "idxmin",
@@ -815,7 +828,7 @@ def test_groupby_multi_agg_hash_groupby(agg):
 
 
 @pytest.mark.parametrize(
-    "agg", ["min", "max", "idxmax", "idxmax", "sum", "count", "mean"]
+    "agg", ["min", "max", "idxmax", "idxmax", "sum", "prod", "count", "mean"]
 )
 def test_groupby_nulls_basic(agg):
     check_dtype = False if agg in _index_type_aggs else True
@@ -855,7 +868,7 @@ def test_groupby_nulls_basic(agg):
     # Pandas' null semantics. Should we change it?
     assert_groupby_results_equal(
         getattr(pdf.groupby("a"), agg)().fillna(0),
-        getattr(gdf.groupby("a"), agg)().fillna(0),
+        getattr(gdf.groupby("a"), agg)().fillna(0 if agg != "prod" else 1),
         check_dtype=check_dtype,
     )