From 46c0fa09f89fd6b327e85eecd65d1113d28e75f6 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 12 Mar 2024 15:52:00 -0700 Subject: [PATCH 01/15] Remove redundant header file --- cpp/src/stream_compaction/distinct_count.cu | 3 +- cpp/src/stream_compaction/distinct_helpers.cu | 4 +-- .../stream_compaction/distinct_helpers.hpp | 5 ++- .../stream_compaction_common.cuh | 5 ++- .../stream_compaction_common.hpp | 35 ------------------- cpp/src/stream_compaction/unique.cu | 3 +- 6 files changed, 8 insertions(+), 47 deletions(-) delete mode 100644 cpp/src/stream_compaction/stream_compaction_common.hpp diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index b7aadbe14fa..819103ab7ed 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -15,16 +15,17 @@ */ #include "stream_compaction_common.cuh" -#include "stream_compaction_common.hpp" #include #include #include +#include #include #include #include #include #include +#include #include #include #include diff --git a/cpp/src/stream_compaction/distinct_helpers.cu b/cpp/src/stream_compaction/distinct_helpers.cu index 8f36ec98f4a..92a4fd8047f 100644 --- a/cpp/src/stream_compaction/distinct_helpers.cu +++ b/cpp/src/stream_compaction/distinct_helpers.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,6 @@ #include "distinct_helpers.hpp" -#include - namespace cudf::detail { namespace { diff --git a/cpp/src/stream_compaction/distinct_helpers.hpp b/cpp/src/stream_compaction/distinct_helpers.hpp index b667d0b04f0..28f1ca250c9 100644 --- a/cpp/src/stream_compaction/distinct_helpers.hpp +++ b/cpp/src/stream_compaction/distinct_helpers.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,7 @@ * limitations under the License. */ -#include "stream_compaction_common.hpp" - +#include #include #include #include diff --git a/cpp/src/stream_compaction/stream_compaction_common.cuh b/cpp/src/stream_compaction/stream_compaction_common.cuh index 839672d6a56..0f9bc18e258 100644 --- a/cpp/src/stream_compaction/stream_compaction_common.cuh +++ b/cpp/src/stream_compaction/stream_compaction_common.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,8 @@ */ #pragma once -#include "stream_compaction_common.hpp" - #include +#include #include #include diff --git a/cpp/src/stream_compaction/stream_compaction_common.hpp b/cpp/src/stream_compaction/stream_compaction_common.hpp deleted file mode 100644 index 13795f49781..00000000000 --- a/cpp/src/stream_compaction/stream_compaction_common.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include - -#include -#include - -#include - -namespace cudf { -namespace detail { - -using hash_map_type = cuco::legacy:: - static_map; - -} // namespace detail -} // namespace cudf diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu index db67daaa324..e1136ab1a74 100644 --- a/cpp/src/stream_compaction/unique.cu +++ b/cpp/src/stream_compaction/unique.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,6 @@ */ #include "stream_compaction_common.cuh" -#include "stream_compaction_common.hpp" #include #include From 7b4d0076c89ec95025a0417e1ad67daaf615466c Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 12 Mar 2024 17:42:33 -0700 Subject: [PATCH 02/15] Migrate to set-based algorithms --- cpp/src/stream_compaction/distinct.cu | 154 +++++++++++++------------- 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 11e2e77c253..992690c9967 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -27,6 +27,7 @@ #include +#include #include #include #include @@ -38,6 +39,54 @@ namespace cudf { namespace detail { +namespace { +/** + * @brief Invokes the given `func` with desired the row equality and probing method + * + * @tparam HasNested Flag indicating whether there are nested columns in the input + * @tparam Hasher Type of device hash function + * @tparam Func Type of the helper function doing `distinct` check + * + * @param compare_nulls Control whether nulls should be compared as equal or not + * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not + * @param has_nulls Flag indicating whether the input has nulls or not + * @param equal Self table comparator + * @param d_hash Device hash functor + * @param func The input functor to invoke + */ +template +rmm::device_uvector dispatch_hash_set( + null_equality compare_nulls, + nan_equality compare_nans, + bool has_nulls, + cudf::experimental::row::equality::self_comparator row_equal, + Hasher const& d_hash, + Func&& func) +{ + // Distinguish probing scheme CG sizes between nested and flat types for better performance + auto const probing_scheme = [&]() { + if constexpr (HasNested) { + return cuco::linear_probing<4, Hasher>{d_hash}; + } else { + return cuco::linear_probing<1, Hasher>{d_hash}; + } + }(); + + if (compare_nans == nan_equality::ALL_EQUAL) { + auto const d_equal = row_equal.equal_to( + nullate::DYNAMIC{has_nulls}, + compare_nulls, + cudf::experimental::row::equality::nan_equal_physical_equality_comparator{}); + return func(d_equal, probing_scheme); + } else { + auto const d_equal = row_equal.equal_to( + nullate::DYNAMIC{has_nulls}, + compare_nulls, + cudf::experimental::row::equality::physical_equality_comparator{}); + return func(d_equal, probing_scheme); + } +} +} // namespace rmm::device_uvector distinct_indices(table_view const& input, duplicate_keep_option keep, @@ -50,93 +99,44 @@ rmm::device_uvector distinct_indices(table_view const& input, return rmm::device_uvector(0, stream, mr); } - auto map = hash_map_type{compute_hash_table_size(input.num_rows()), - cuco::empty_key{-1}, - cuco::empty_value{std::numeric_limits::min()}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; - auto const preprocessed_input = cudf::experimental::row::hash::preprocessed_table::create(input, stream); auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; auto const has_nested_columns = cudf::detail::has_nested_columns(input); - auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); - auto const key_hasher = row_hasher.device_hasher(has_nulls); - - auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); - - auto const pair_iter = cudf::detail::make_counting_transform_iterator( - size_type{0}, - cuda::proclaim_return_type>( - [] __device__(size_type const i) { return cuco::make_pair(i, i); })); - - auto const insert_keys = [&](auto const value_comp) { - if (has_nested_columns) { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); - } else { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + auto const row_hash = cudf::experimental::row::hash::row_hasher(preprocessed_input); + auto const d_hash = row_hash.device_hasher(has_nulls); + + auto const row_equal = cudf::experimental::row::equality::self_comparator(preprocessed_input); + + auto const helper_func = [&](auto const& d_equal, auto const& probing_scheme) { + auto set = cuco::static_set{input.num_rows(), + 0.5, // desired load factor + cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, + d_equal, + probing_scheme, + {}, + {}, + cudf::detail::cuco_allocator{stream}, + stream.value()}; + auto const iter = thrust::counting_iterator{0}; + auto const size = set.insert(iter, iter + input.num_rows(), stream.value()); + + auto output_indices = rmm::device_uvector(size, stream, mr); + // If we don't care about order, just gather indices of distinct keys taken from map. + if (keep == duplicate_keep_option::KEEP_ANY) { + set.retrieve_all(output_indices.begin(), stream.value()); } + return output_indices; }; - if (nans_equal == nan_equality::ALL_EQUAL) { - using nan_equal_comparator = - cudf::experimental::row::equality::nan_equal_physical_equality_comparator; - insert_keys(nan_equal_comparator{}); + if (cudf::detail::has_nested_columns(input)) { + return dispatch_hash_set( + nulls_equal, nans_equal, has_nulls, row_equal, d_hash, helper_func); } else { - using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; - insert_keys(nan_unequal_comparator{}); + return dispatch_hash_set( + nulls_equal, nans_equal, has_nulls, row_equal, d_hash, helper_func); } - - auto output_indices = rmm::device_uvector(map.get_size(), stream, mr); - - // If we don't care about order, just gather indices of distinct keys taken from map. - if (keep == duplicate_keep_option::KEEP_ANY) { - map.retrieve_all(output_indices.begin(), thrust::make_discard_iterator(), stream.value()); - return output_indices; - } - - // For other keep options, reduce by row on rows that compare equal. - auto const reduction_results = reduce_by_row(map, - std::move(preprocessed_input), - input.num_rows(), - has_nulls, - has_nested_columns, - keep, - nulls_equal, - nans_equal, - stream, - rmm::mr::get_current_device_resource()); - - // Extract the desired output indices from reduction results. - auto const map_end = [&] { - if (keep == duplicate_keep_option::KEEP_NONE) { - // Reduction results with `KEEP_NONE` are either group sizes of equal rows, or `0`. - // Thus, we only output index of the rows in the groups having group size of `1`. - return thrust::copy_if(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.num_rows()), - output_indices.begin(), - [reduction_results = reduction_results.begin()] __device__( - auto const idx) { return reduction_results[idx] == size_type{1}; }); - } - - // Reduction results with `KEEP_FIRST` and `KEEP_LAST` are row indices of the first/last row in - // each group of equal rows (which are the desired output indices), or the value given by - // `reduction_init_value()`. - return thrust::copy_if(rmm::exec_policy(stream), - reduction_results.begin(), - reduction_results.end(), - output_indices.begin(), - [init_value = reduction_init_value(keep)] __device__(auto const idx) { - return idx != init_value; - }); - }(); - - output_indices.resize(thrust::distance(output_indices.begin(), map_end), stream); - return output_indices; } std::unique_ptr distinct(table_view const& input, From 9da0daf18f40ae4649af91cfc9555e7e84e2da89 Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Tue, 11 Jun 2024 22:54:35 +0000 Subject: [PATCH 03/15] add support for KEEP FIRST,LAST and NONE --- cpp/src/stream_compaction/distinct.cu | 87 +++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 992690c9967..4dfb07ec8bf 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -88,6 +88,89 @@ rmm::device_uvector dispatch_hash_set( } } // namespace +template +void distinct_first_last_none(SetRef set, + rmm::device_uvector& output_indices, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto reduction_results = rmm::device_uvector(num_rows, stream, mr); + thrust::uninitialized_fill(rmm::exec_policy(stream), + reduction_results.begin(), + reduction_results.end(), + reduction_init_value(keep)); + + static auto constexpr cg_size = SetRef::cg_size; + + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows * cg_size), + [set, keep, reduction_results = reduction_results.begin()] __device__( + size_type const idx) mutable { + size_type cg_idx = idx / cg_size; + + auto [out_ptr, inserted] = [&]() { + if constexpr (cg_size == 1) { + return set.insert_and_find(idx); + } else { + auto const tile = + cooperative_groups::tiled_partition(cooperative_groups::this_thread_block()); + return set.insert_and_find(tile, cg_idx); + } + }(); + + auto const tile = + cooperative_groups::tiled_partition(cooperative_groups::this_thread_block()); + if (keep == duplicate_keep_option::KEEP_FIRST and tile.thread_rank() == 0) { + // Store the smallest index of all rows that are equal. + auto ref = + cuda::atomic_ref{reduction_results[*out_ptr]}; + ref.fetch_min(cg_idx, cuda::memory_order_relaxed); + } + if (keep == duplicate_keep_option::KEEP_LAST and tile.thread_rank() == 0) { + // Store the greatest index of all rows that are equal. + auto ref = + cuda::atomic_ref{reduction_results[*out_ptr]}; + ref.fetch_max(cg_idx, cuda::memory_order_relaxed); + } + if (keep == duplicate_keep_option::KEEP_NONE and tile.thread_rank() == 0) { + // Count the number of rows in each group of rows that are compared equal. + auto ref = + cuda::atomic_ref{reduction_results[*out_ptr]}; + ref.fetch_add(size_type{1}, cuda::memory_order_relaxed); + } + }); + + auto const map_end = [&] { + if (keep == duplicate_keep_option::KEEP_NONE) { + // Reduction results with `KEEP_NONE` are either group sizes of equal rows, or `0`. + // Thus, we only output index of the rows in the groups having group size of `1`. + return thrust::copy_if(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + output_indices.begin(), + [reduction_results = reduction_results.begin()] __device__( + auto const idx) { return reduction_results[idx] == size_type{1}; }); + } + + // Reduction results with `KEEP_FIRST` and `KEEP_LAST` are row indices of the first/last row in + // each group of equal rows (which are the desired output indices), or the value given by + // `reduction_init_value()`. + return thrust::copy_if(rmm::exec_policy(stream), + reduction_results.begin(), + reduction_results.end(), + output_indices.begin(), + [init_value = reduction_init_value(keep)] __device__(auto const idx) { + return idx != init_value; + }); + }(); + + output_indices.resize(thrust::distance(output_indices.begin(), map_end), stream); +} + rmm::device_uvector distinct_indices(table_view const& input, duplicate_keep_option keep, null_equality nulls_equal, @@ -126,7 +209,11 @@ rmm::device_uvector distinct_indices(table_view const& input, // If we don't care about order, just gather indices of distinct keys taken from map. if (keep == duplicate_keep_option::KEEP_ANY) { set.retrieve_all(output_indices.begin(), stream.value()); + return output_indices; } + + distinct_first_last_none( + set.ref(cuco::op::insert_and_find), output_indices, input.num_rows(), keep, stream, mr); return output_indices; }; From d1f7b80ae0fd553b2bce58640349e9ec11c2f771 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 12 Mar 2024 15:52:00 -0700 Subject: [PATCH 04/15] Remove redundant header file --- cpp/src/stream_compaction/distinct_count.cu | 3 +- .../stream_compaction/distinct_helpers.hpp | 3 +- .../stream_compaction_common.cuh | 5 ++- .../stream_compaction_common.hpp | 35 ------------------- cpp/src/stream_compaction/unique.cu | 1 - 5 files changed, 5 insertions(+), 42 deletions(-) delete mode 100644 cpp/src/stream_compaction/stream_compaction_common.hpp diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index 99ca89cc021..9843bb889f4 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -15,16 +15,17 @@ */ #include "stream_compaction_common.cuh" -#include "stream_compaction_common.hpp" #include #include #include +#include #include #include #include #include #include +#include #include #include #include diff --git a/cpp/src/stream_compaction/distinct_helpers.hpp b/cpp/src/stream_compaction/distinct_helpers.hpp index 40f97e00ce5..127822e4103 100644 --- a/cpp/src/stream_compaction/distinct_helpers.hpp +++ b/cpp/src/stream_compaction/distinct_helpers.hpp @@ -14,8 +14,7 @@ * limitations under the License. */ -#include "stream_compaction_common.hpp" - +#include #include #include #include diff --git a/cpp/src/stream_compaction/stream_compaction_common.cuh b/cpp/src/stream_compaction/stream_compaction_common.cuh index 839672d6a56..0f9bc18e258 100644 --- a/cpp/src/stream_compaction/stream_compaction_common.cuh +++ b/cpp/src/stream_compaction/stream_compaction_common.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,8 @@ */ #pragma once -#include "stream_compaction_common.hpp" - #include +#include #include #include diff --git a/cpp/src/stream_compaction/stream_compaction_common.hpp b/cpp/src/stream_compaction/stream_compaction_common.hpp deleted file mode 100644 index 13795f49781..00000000000 --- a/cpp/src/stream_compaction/stream_compaction_common.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include - -#include -#include - -#include - -namespace cudf { -namespace detail { - -using hash_map_type = cuco::legacy:: - static_map; - -} // namespace detail -} // namespace cudf diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu index c1f8b17938c..edb47984d13 100644 --- a/cpp/src/stream_compaction/unique.cu +++ b/cpp/src/stream_compaction/unique.cu @@ -15,7 +15,6 @@ */ #include "stream_compaction_common.cuh" -#include "stream_compaction_common.hpp" #include #include From a5af297fa826042491cfcd436cf1a2e1f976a208 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 12 Mar 2024 17:42:33 -0700 Subject: [PATCH 05/15] Migrate to set-based algorithms --- cpp/src/stream_compaction/distinct.cu | 154 +++++++++++++------------- 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index a6f15cc49ec..20dd3c9ab51 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -39,6 +40,54 @@ namespace cudf { namespace detail { +namespace { +/** + * @brief Invokes the given `func` with desired the row equality and probing method + * + * @tparam HasNested Flag indicating whether there are nested columns in the input + * @tparam Hasher Type of device hash function + * @tparam Func Type of the helper function doing `distinct` check + * + * @param compare_nulls Control whether nulls should be compared as equal or not + * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not + * @param has_nulls Flag indicating whether the input has nulls or not + * @param equal Self table comparator + * @param d_hash Device hash functor + * @param func The input functor to invoke + */ +template +rmm::device_uvector dispatch_hash_set( + null_equality compare_nulls, + nan_equality compare_nans, + bool has_nulls, + cudf::experimental::row::equality::self_comparator row_equal, + Hasher const& d_hash, + Func&& func) +{ + // Distinguish probing scheme CG sizes between nested and flat types for better performance + auto const probing_scheme = [&]() { + if constexpr (HasNested) { + return cuco::linear_probing<4, Hasher>{d_hash}; + } else { + return cuco::linear_probing<1, Hasher>{d_hash}; + } + }(); + + if (compare_nans == nan_equality::ALL_EQUAL) { + auto const d_equal = row_equal.equal_to( + nullate::DYNAMIC{has_nulls}, + compare_nulls, + cudf::experimental::row::equality::nan_equal_physical_equality_comparator{}); + return func(d_equal, probing_scheme); + } else { + auto const d_equal = row_equal.equal_to( + nullate::DYNAMIC{has_nulls}, + compare_nulls, + cudf::experimental::row::equality::physical_equality_comparator{}); + return func(d_equal, probing_scheme); + } +} +} // namespace rmm::device_uvector distinct_indices(table_view const& input, duplicate_keep_option keep, @@ -51,93 +100,44 @@ rmm::device_uvector distinct_indices(table_view const& input, return rmm::device_uvector(0, stream, mr); } - auto map = hash_map_type{compute_hash_table_size(input.num_rows()), - cuco::empty_key{-1}, - cuco::empty_value{std::numeric_limits::min()}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; - auto const preprocessed_input = cudf::experimental::row::hash::preprocessed_table::create(input, stream); auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; auto const has_nested_columns = cudf::detail::has_nested_columns(input); - auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); - auto const key_hasher = row_hasher.device_hasher(has_nulls); - - auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); - - auto const pair_iter = cudf::detail::make_counting_transform_iterator( - size_type{0}, - cuda::proclaim_return_type>( - [] __device__(size_type const i) { return cuco::make_pair(i, i); })); - - auto const insert_keys = [&](auto const value_comp) { - if (has_nested_columns) { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); - } else { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + auto const row_hash = cudf::experimental::row::hash::row_hasher(preprocessed_input); + auto const d_hash = row_hash.device_hasher(has_nulls); + + auto const row_equal = cudf::experimental::row::equality::self_comparator(preprocessed_input); + + auto const helper_func = [&](auto const& d_equal, auto const& probing_scheme) { + auto set = cuco::static_set{input.num_rows(), + 0.5, // desired load factor + cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, + d_equal, + probing_scheme, + {}, + {}, + cudf::detail::cuco_allocator{stream}, + stream.value()}; + auto const iter = thrust::counting_iterator{0}; + auto const size = set.insert(iter, iter + input.num_rows(), stream.value()); + + auto output_indices = rmm::device_uvector(size, stream, mr); + // If we don't care about order, just gather indices of distinct keys taken from map. + if (keep == duplicate_keep_option::KEEP_ANY) { + set.retrieve_all(output_indices.begin(), stream.value()); } + return output_indices; }; - if (nans_equal == nan_equality::ALL_EQUAL) { - using nan_equal_comparator = - cudf::experimental::row::equality::nan_equal_physical_equality_comparator; - insert_keys(nan_equal_comparator{}); + if (cudf::detail::has_nested_columns(input)) { + return dispatch_hash_set( + nulls_equal, nans_equal, has_nulls, row_equal, d_hash, helper_func); } else { - using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; - insert_keys(nan_unequal_comparator{}); + return dispatch_hash_set( + nulls_equal, nans_equal, has_nulls, row_equal, d_hash, helper_func); } - - auto output_indices = rmm::device_uvector(map.get_size(), stream, mr); - - // If we don't care about order, just gather indices of distinct keys taken from map. - if (keep == duplicate_keep_option::KEEP_ANY) { - map.retrieve_all(output_indices.begin(), thrust::make_discard_iterator(), stream.value()); - return output_indices; - } - - // For other keep options, reduce by row on rows that compare equal. - auto const reduction_results = reduce_by_row(map, - std::move(preprocessed_input), - input.num_rows(), - has_nulls, - has_nested_columns, - keep, - nulls_equal, - nans_equal, - stream, - rmm::mr::get_current_device_resource()); - - // Extract the desired output indices from reduction results. - auto const map_end = [&] { - if (keep == duplicate_keep_option::KEEP_NONE) { - // Reduction results with `KEEP_NONE` are either group sizes of equal rows, or `0`. - // Thus, we only output index of the rows in the groups having group size of `1`. - return thrust::copy_if(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.num_rows()), - output_indices.begin(), - [reduction_results = reduction_results.begin()] __device__( - auto const idx) { return reduction_results[idx] == size_type{1}; }); - } - - // Reduction results with `KEEP_FIRST` and `KEEP_LAST` are row indices of the first/last row in - // each group of equal rows (which are the desired output indices), or the value given by - // `reduction_init_value()`. - return thrust::copy_if(rmm::exec_policy(stream), - reduction_results.begin(), - reduction_results.end(), - output_indices.begin(), - [init_value = reduction_init_value(keep)] __device__(auto const idx) { - return idx != init_value; - }); - }(); - - output_indices.resize(thrust::distance(output_indices.begin(), map_end), stream); - return output_indices; } std::unique_ptr
distinct(table_view const& input, From 0e2a5cdbf30d9ab6b73093a82125f9d5868c9409 Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Tue, 11 Jun 2024 22:54:35 +0000 Subject: [PATCH 06/15] add support for KEEP FIRST,LAST and NONE --- cpp/src/stream_compaction/distinct.cu | 87 +++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 20dd3c9ab51..07f1133b9c6 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -89,6 +89,89 @@ rmm::device_uvector dispatch_hash_set( } } // namespace +template +void distinct_first_last_none(SetRef set, + rmm::device_uvector& output_indices, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto reduction_results = rmm::device_uvector(num_rows, stream, mr); + thrust::uninitialized_fill(rmm::exec_policy(stream), + reduction_results.begin(), + reduction_results.end(), + reduction_init_value(keep)); + + static auto constexpr cg_size = SetRef::cg_size; + + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows * cg_size), + [set, keep, reduction_results = reduction_results.begin()] __device__( + size_type const idx) mutable { + size_type cg_idx = idx / cg_size; + + auto [out_ptr, inserted] = [&]() { + if constexpr (cg_size == 1) { + return set.insert_and_find(idx); + } else { + auto const tile = + cooperative_groups::tiled_partition(cooperative_groups::this_thread_block()); + return set.insert_and_find(tile, cg_idx); + } + }(); + + auto const tile = + cooperative_groups::tiled_partition(cooperative_groups::this_thread_block()); + if (keep == duplicate_keep_option::KEEP_FIRST and tile.thread_rank() == 0) { + // Store the smallest index of all rows that are equal. + auto ref = + cuda::atomic_ref{reduction_results[*out_ptr]}; + ref.fetch_min(cg_idx, cuda::memory_order_relaxed); + } + if (keep == duplicate_keep_option::KEEP_LAST and tile.thread_rank() == 0) { + // Store the greatest index of all rows that are equal. + auto ref = + cuda::atomic_ref{reduction_results[*out_ptr]}; + ref.fetch_max(cg_idx, cuda::memory_order_relaxed); + } + if (keep == duplicate_keep_option::KEEP_NONE and tile.thread_rank() == 0) { + // Count the number of rows in each group of rows that are compared equal. + auto ref = + cuda::atomic_ref{reduction_results[*out_ptr]}; + ref.fetch_add(size_type{1}, cuda::memory_order_relaxed); + } + }); + + auto const map_end = [&] { + if (keep == duplicate_keep_option::KEEP_NONE) { + // Reduction results with `KEEP_NONE` are either group sizes of equal rows, or `0`. + // Thus, we only output index of the rows in the groups having group size of `1`. + return thrust::copy_if(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + output_indices.begin(), + [reduction_results = reduction_results.begin()] __device__( + auto const idx) { return reduction_results[idx] == size_type{1}; }); + } + + // Reduction results with `KEEP_FIRST` and `KEEP_LAST` are row indices of the first/last row in + // each group of equal rows (which are the desired output indices), or the value given by + // `reduction_init_value()`. + return thrust::copy_if(rmm::exec_policy(stream), + reduction_results.begin(), + reduction_results.end(), + output_indices.begin(), + [init_value = reduction_init_value(keep)] __device__(auto const idx) { + return idx != init_value; + }); + }(); + + output_indices.resize(thrust::distance(output_indices.begin(), map_end), stream); +} + rmm::device_uvector distinct_indices(table_view const& input, duplicate_keep_option keep, null_equality nulls_equal, @@ -127,7 +210,11 @@ rmm::device_uvector distinct_indices(table_view const& input, // If we don't care about order, just gather indices of distinct keys taken from map. if (keep == duplicate_keep_option::KEEP_ANY) { set.retrieve_all(output_indices.begin(), stream.value()); + return output_indices; } + + distinct_first_last_none( + set.ref(cuco::op::insert_and_find), output_indices, input.num_rows(), keep, stream, mr); return output_indices; }; From 250998a2a8b347df45a473310426470732649507 Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Wed, 12 Jun 2024 17:07:53 +0000 Subject: [PATCH 07/15] cleanup --- cpp/src/stream_compaction/distinct.cu | 91 +++++++++++++-------------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 07f1133b9c6..971ad8d6f48 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -89,59 +89,65 @@ rmm::device_uvector dispatch_hash_set( } } // namespace -template -void distinct_first_last_none(SetRef set, - rmm::device_uvector& output_indices, - size_type num_rows, - duplicate_keep_option keep, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +template +rmm::device_uvector process_keep_option(Set& set, + size_type set_size, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { + auto output_indices = rmm::device_uvector(set_size, stream, mr); + + // If we don't care about order, just gather indices of distinct keys taken from set. + if (keep == duplicate_keep_option::KEEP_ANY) { + set.retrieve_all(output_indices.begin(), stream.value()); + return output_indices; + } + auto reduction_results = rmm::device_uvector(num_rows, stream, mr); thrust::uninitialized_fill(rmm::exec_policy(stream), reduction_results.begin(), reduction_results.end(), reduction_init_value(keep)); - static auto constexpr cg_size = SetRef::cg_size; + static auto constexpr cg_size = Set::cg_size; + + auto set_ref = set.ref(cuco::op::insert_and_find); thrust::for_each( rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_rows * cg_size), - [set, keep, reduction_results = reduction_results.begin()] __device__( - size_type const idx) mutable { - size_type cg_idx = idx / cg_size; + [set_ref, keep, reduction_results = reduction_results.begin()] __device__( + size_type const index) mutable { + auto const idx = index / cg_size; + auto const tile = + cooperative_groups::tiled_partition(cooperative_groups::this_thread_block()); auto [out_ptr, inserted] = [&]() { if constexpr (cg_size == 1) { - return set.insert_and_find(idx); + return set_ref.insert_and_find(idx); } else { - auto const tile = - cooperative_groups::tiled_partition(cooperative_groups::this_thread_block()); - return set.insert_and_find(tile, cg_idx); + return set_ref.insert_and_find(tile, idx); } }(); - auto const tile = - cooperative_groups::tiled_partition(cooperative_groups::this_thread_block()); - if (keep == duplicate_keep_option::KEEP_FIRST and tile.thread_rank() == 0) { - // Store the smallest index of all rows that are equal. - auto ref = - cuda::atomic_ref{reduction_results[*out_ptr]}; - ref.fetch_min(cg_idx, cuda::memory_order_relaxed); - } - if (keep == duplicate_keep_option::KEEP_LAST and tile.thread_rank() == 0) { - // Store the greatest index of all rows that are equal. + if (tile.thread_rank() == 0) { auto ref = cuda::atomic_ref{reduction_results[*out_ptr]}; - ref.fetch_max(cg_idx, cuda::memory_order_relaxed); - } - if (keep == duplicate_keep_option::KEEP_NONE and tile.thread_rank() == 0) { - // Count the number of rows in each group of rows that are compared equal. - auto ref = - cuda::atomic_ref{reduction_results[*out_ptr]}; - ref.fetch_add(size_type{1}, cuda::memory_order_relaxed); + if (keep == duplicate_keep_option::KEEP_FIRST) { + // Store the smallest index of all rows that are equal. + ref.fetch_min(idx, cuda::memory_order_relaxed); + } + if (keep == duplicate_keep_option::KEEP_LAST) { + // Store the greatest index of all rows that are equal. + ref.fetch_max(idx, cuda::memory_order_relaxed); + } + if (keep == duplicate_keep_option::KEEP_NONE) { + // Count the number of rows in each group of rows that are compared equal. + ref.fetch_add(size_type{1}, cuda::memory_order_relaxed); + } } }); @@ -170,6 +176,7 @@ void distinct_first_last_none(SetRef set, }(); output_indices.resize(thrust::distance(output_indices.begin(), map_end), stream); + return output_indices; } rmm::device_uvector distinct_indices(table_view const& input, @@ -179,7 +186,9 @@ rmm::device_uvector distinct_indices(table_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - if (input.num_rows() == 0 or input.num_columns() == 0) { + auto const num_rows = input.num_rows(); + + if (num_rows == 0 or input.num_columns() == 0) { return rmm::device_uvector(0, stream, mr); } @@ -194,7 +203,7 @@ rmm::device_uvector distinct_indices(table_view const& input, auto const row_equal = cudf::experimental::row::equality::self_comparator(preprocessed_input); auto const helper_func = [&](auto const& d_equal, auto const& probing_scheme) { - auto set = cuco::static_set{input.num_rows(), + auto set = cuco::static_set{num_rows, 0.5, // desired load factor cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, d_equal, @@ -204,18 +213,8 @@ rmm::device_uvector distinct_indices(table_view const& input, cudf::detail::cuco_allocator{stream}, stream.value()}; auto const iter = thrust::counting_iterator{0}; - auto const size = set.insert(iter, iter + input.num_rows(), stream.value()); - - auto output_indices = rmm::device_uvector(size, stream, mr); - // If we don't care about order, just gather indices of distinct keys taken from map. - if (keep == duplicate_keep_option::KEEP_ANY) { - set.retrieve_all(output_indices.begin(), stream.value()); - return output_indices; - } - - distinct_first_last_none( - set.ref(cuco::op::insert_and_find), output_indices, input.num_rows(), keep, stream, mr); - return output_indices; + auto const size = set.insert(iter, iter + num_rows, stream.value()); + return process_keep_option(set, size, num_rows, keep, stream, mr); }; if (cudf::detail::has_nested_columns(input)) { From a00b0db5a4cced266900e0b05f3a2ed49194c562 Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Mon, 17 Jun 2024 16:06:20 +0000 Subject: [PATCH 08/15] Move process_keep_option to distinct_helpers --- cpp/src/stream_compaction/distinct.cu | 144 +++----------- cpp/src/stream_compaction/distinct_helpers.cu | 187 ++++++++++-------- .../stream_compaction/distinct_helpers.hpp | 53 ++--- 3 files changed, 162 insertions(+), 222 deletions(-) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 971ad8d6f48..64d7fd3153b 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -17,14 +17,18 @@ #include "distinct_helpers.hpp" #include +#include #include #include #include #include +#include #include #include #include +#include +#include #include #include @@ -35,6 +39,7 @@ #include #include +#include #include #include @@ -45,140 +50,38 @@ namespace { * @brief Invokes the given `func` with desired the row equality and probing method * * @tparam HasNested Flag indicating whether there are nested columns in the input - * @tparam Hasher Type of device hash function * @tparam Func Type of the helper function doing `distinct` check * * @param compare_nulls Control whether nulls should be compared as equal or not * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not * @param has_nulls Flag indicating whether the input has nulls or not - * @param equal Self table comparator - * @param d_hash Device hash functor + * @param row_equal Self table comparator * @param func The input functor to invoke */ -template +template rmm::device_uvector dispatch_hash_set( null_equality compare_nulls, nan_equality compare_nans, bool has_nulls, cudf::experimental::row::equality::self_comparator row_equal, - Hasher const& d_hash, Func&& func) { - // Distinguish probing scheme CG sizes between nested and flat types for better performance - auto const probing_scheme = [&]() { - if constexpr (HasNested) { - return cuco::linear_probing<4, Hasher>{d_hash}; - } else { - return cuco::linear_probing<1, Hasher>{d_hash}; - } - }(); - if (compare_nans == nan_equality::ALL_EQUAL) { auto const d_equal = row_equal.equal_to( nullate::DYNAMIC{has_nulls}, compare_nulls, cudf::experimental::row::equality::nan_equal_physical_equality_comparator{}); - return func(d_equal, probing_scheme); + return func(d_equal); } else { auto const d_equal = row_equal.equal_to( nullate::DYNAMIC{has_nulls}, compare_nulls, cudf::experimental::row::equality::physical_equality_comparator{}); - return func(d_equal, probing_scheme); + return func(d_equal); } } } // namespace -template -rmm::device_uvector process_keep_option(Set& set, - size_type set_size, - size_type num_rows, - duplicate_keep_option keep, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto output_indices = rmm::device_uvector(set_size, stream, mr); - - // If we don't care about order, just gather indices of distinct keys taken from set. - if (keep == duplicate_keep_option::KEEP_ANY) { - set.retrieve_all(output_indices.begin(), stream.value()); - return output_indices; - } - - auto reduction_results = rmm::device_uvector(num_rows, stream, mr); - thrust::uninitialized_fill(rmm::exec_policy(stream), - reduction_results.begin(), - reduction_results.end(), - reduction_init_value(keep)); - - static auto constexpr cg_size = Set::cg_size; - - auto set_ref = set.ref(cuco::op::insert_and_find); - - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows * cg_size), - [set_ref, keep, reduction_results = reduction_results.begin()] __device__( - size_type const index) mutable { - auto const idx = index / cg_size; - auto const tile = - cooperative_groups::tiled_partition(cooperative_groups::this_thread_block()); - - auto [out_ptr, inserted] = [&]() { - if constexpr (cg_size == 1) { - return set_ref.insert_and_find(idx); - } else { - return set_ref.insert_and_find(tile, idx); - } - }(); - - if (tile.thread_rank() == 0) { - auto ref = - cuda::atomic_ref{reduction_results[*out_ptr]}; - if (keep == duplicate_keep_option::KEEP_FIRST) { - // Store the smallest index of all rows that are equal. - ref.fetch_min(idx, cuda::memory_order_relaxed); - } - if (keep == duplicate_keep_option::KEEP_LAST) { - // Store the greatest index of all rows that are equal. - ref.fetch_max(idx, cuda::memory_order_relaxed); - } - if (keep == duplicate_keep_option::KEEP_NONE) { - // Count the number of rows in each group of rows that are compared equal. - ref.fetch_add(size_type{1}, cuda::memory_order_relaxed); - } - } - }); - - auto const map_end = [&] { - if (keep == duplicate_keep_option::KEEP_NONE) { - // Reduction results with `KEEP_NONE` are either group sizes of equal rows, or `0`. - // Thus, we only output index of the rows in the groups having group size of `1`. - return thrust::copy_if(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows), - output_indices.begin(), - [reduction_results = reduction_results.begin()] __device__( - auto const idx) { return reduction_results[idx] == size_type{1}; }); - } - - // Reduction results with `KEEP_FIRST` and `KEEP_LAST` are row indices of the first/last row in - // each group of equal rows (which are the desired output indices), or the value given by - // `reduction_init_value()`. - return thrust::copy_if(rmm::exec_policy(stream), - reduction_results.begin(), - reduction_results.end(), - output_indices.begin(), - [init_value = reduction_init_value(keep)] __device__(auto const idx) { - return idx != init_value; - }); - }(); - - output_indices.resize(thrust::distance(output_indices.begin(), map_end), stream); - return output_indices; -} - rmm::device_uvector distinct_indices(table_view const& input, duplicate_keep_option keep, null_equality nulls_equal, @@ -202,27 +105,26 @@ rmm::device_uvector distinct_indices(table_view const& input, auto const row_equal = cudf::experimental::row::equality::self_comparator(preprocessed_input); - auto const helper_func = [&](auto const& d_equal, auto const& probing_scheme) { - auto set = cuco::static_set{num_rows, - 0.5, // desired load factor - cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, - d_equal, - probing_scheme, - {}, - {}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; + auto const helper_func = [&](auto const& d_equal) { + using RowHasher = std::decay_t; + auto set = hash_set_type{num_rows, + 0.5, // desired load factor + cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, + d_equal, + {d_hash}, + {}, + {}, + cudf::detail::cuco_allocator{stream}, + stream.value()}; auto const iter = thrust::counting_iterator{0}; auto const size = set.insert(iter, iter + num_rows, stream.value()); - return process_keep_option(set, size, num_rows, keep, stream, mr); + return detail::process_keep_option(set, size, num_rows, keep, stream, mr); }; if (cudf::detail::has_nested_columns(input)) { - return dispatch_hash_set( - nulls_equal, nans_equal, has_nulls, row_equal, d_hash, helper_func); + return dispatch_hash_set(nulls_equal, nans_equal, has_nulls, row_equal, helper_func); } else { - return dispatch_hash_set( - nulls_equal, nans_equal, has_nulls, row_equal, d_hash, helper_func); + return dispatch_hash_set(nulls_equal, nans_equal, has_nulls, row_equal, helper_func); } } diff --git a/cpp/src/stream_compaction/distinct_helpers.cu b/cpp/src/stream_compaction/distinct_helpers.cu index 13e89b15bb7..ee5fcd5c2a4 100644 --- a/cpp/src/stream_compaction/distinct_helpers.cu +++ b/cpp/src/stream_compaction/distinct_helpers.cu @@ -16,96 +16,127 @@ #include "distinct_helpers.hpp" -#include - #include namespace cudf::detail { -namespace { -/** - * @brief The functor to find the first/last/all duplicate row for rows that compared equal. - */ -template -struct reduce_fn : reduce_by_row_fn_base { - duplicate_keep_option const keep; - - reduce_fn(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - duplicate_keep_option const keep, - size_type* const d_output) - : reduce_by_row_fn_base{d_map, - d_hasher, - d_equal, - d_output}, - keep{keep} - { +template +rmm::device_uvector process_keep_option(hash_set_type& set, + size_type set_size, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + auto output_indices = rmm::device_uvector(set_size, stream, mr); + + // If we don't care about order, just gather indices of distinct keys taken from set. + if (keep == duplicate_keep_option::KEEP_ANY) { + set.retrieve_all(output_indices.begin(), stream.value()); + return output_indices; } - __device__ void operator()(size_type const idx) const - { - auto const out_ptr = this->get_output_ptr(idx); - - if (keep == duplicate_keep_option::KEEP_FIRST) { - // Store the smallest index of all rows that are equal. - atomicMin(out_ptr, idx); - } else if (keep == duplicate_keep_option::KEEP_LAST) { - // Store the greatest index of all rows that are equal. - atomicMax(out_ptr, idx); - } else { - // Count the number of rows in each group of rows that are compared equal. - atomicAdd(out_ptr, size_type{1}); + auto reduction_results = rmm::device_uvector(num_rows, stream, mr); + thrust::uninitialized_fill(rmm::exec_policy(stream), + reduction_results.begin(), + reduction_results.end(), + reduction_init_value(keep)); + + auto set_ref = set.ref(cuco::op::insert_and_find); + + thrust::for_each(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + [set_ref, keep, reduction_results = reduction_results.begin()] __device__( + size_type const idx) mutable { + auto [out_ptr, inserted] = set_ref.insert_and_find(idx); + + auto ref = cuda::atomic_ref{ + reduction_results[*out_ptr]}; + if (keep == duplicate_keep_option::KEEP_FIRST) { + // Store the smallest index of all rows that are equal. + ref.fetch_min(idx, cuda::memory_order_relaxed); + } + if (keep == duplicate_keep_option::KEEP_LAST) { + // Store the greatest index of all rows that are equal. + ref.fetch_max(idx, cuda::memory_order_relaxed); + } + if (keep == duplicate_keep_option::KEEP_NONE) { + // Count the number of rows in each group of rows that are compared equal. + ref.fetch_add(size_type{1}, cuda::memory_order_relaxed); + } + }); + + auto const map_end = [&] { + if (keep == duplicate_keep_option::KEEP_NONE) { + // Reduction results with `KEEP_NONE` are either group sizes of equal rows, or `0`. + // Thus, we only output index of the rows in the groups having group size of `1`. + return thrust::copy_if(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + output_indices.begin(), + [reduction_results = reduction_results.begin()] __device__( + auto const idx) { return reduction_results[idx] == size_type{1}; }); } - } -}; -/** - * @brief The builder to construct an instance of `reduce_fn` functor base on the given - * value of the `duplicate_keep_option` member variable. - */ -struct reduce_func_builder { - duplicate_keep_option const keep; - - template - auto build(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - size_type* const d_output) - { - return reduce_fn{d_map, d_hasher, d_equal, keep, d_output}; - } -}; + // Reduction results with `KEEP_FIRST` and `KEEP_LAST` are row indices of the first/last row in + // each group of equal rows (which are the desired output indices), or the value given by + // `reduction_init_value()`. + return thrust::copy_if(rmm::exec_policy(stream), + reduction_results.begin(), + reduction_results.end(), + output_indices.begin(), + [init_value = reduction_init_value(keep)] __device__(auto const idx) { + return idx != init_value; + }); + }(); -} // namespace + output_indices.resize(thrust::distance(output_indices.begin(), map_end), stream); + return output_indices; +} -// This function is split from `distinct.cu` to improve compile time. -rmm::device_uvector reduce_by_row( - hash_map_type const& map, - std::shared_ptr const preprocessed_input, +template rmm::device_uvector process_keep_option( + hash_set_type>& set, + size_type set_size, size_type num_rows, - cudf::nullate::DYNAMIC has_nulls, - bool has_nested_columns, duplicate_keep_option keep, - null_equality nulls_equal, - nan_equality nans_equal, rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_EXPECTS(keep != duplicate_keep_option::KEEP_ANY, - "This function should not be called with KEEP_ANY"); - - return hash_reduce_by_row(map, - preprocessed_input, - num_rows, - has_nulls, - has_nested_columns, - nulls_equal, - nans_equal, - reduce_func_builder{keep}, - reduction_init_value(keep), - stream, - mr); -} + rmm::device_async_resource_ref mr); + +template rmm::device_uvector process_keep_option( + hash_set_type>& set, + size_type set_size, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +template rmm::device_uvector process_keep_option( + hash_set_type>& set, + size_type set_size, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +template rmm::device_uvector process_keep_option( + hash_set_type>& set, + size_type set_size, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); } // namespace cudf::detail diff --git a/cpp/src/stream_compaction/distinct_helpers.hpp b/cpp/src/stream_compaction/distinct_helpers.hpp index 127822e4103..f70f1543bb3 100644 --- a/cpp/src/stream_compaction/distinct_helpers.hpp +++ b/cpp/src/stream_compaction/distinct_helpers.hpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -23,6 +24,8 @@ #include #include +#include + namespace cudf::detail { /** @@ -41,13 +44,28 @@ auto constexpr reduction_init_value(duplicate_keep_option keep) } } +template +using hash_set_type = + cuco::static_set>, + cudf::detail::cuco_allocator, + cuco::storage<1>>; + /** - * @brief Perform a reduction on groups of rows that are compared equal. + * @brief Perform a reduction on groups of rows that are compared equal and returns output indices + * of the occurrences of the distinct elements based on `keep` parameter. * * This is essentially a reduce-by-key operation with keys are non-contiguous rows and are compared - * equal. A hash table is used to find groups of equal rows. + * equal. A hash set is used to find groups of equal rows. * * Depending on the `keep` parameter, the reduction operation for each row group is: + * - If `keep == KEEP_ANY` : order does not matter. * - If `keep == KEEP_FIRST`: min of row indices in the group. * - If `keep == KEEP_LAST`: max of row indices in the group. * - If `keep == KEEP_NONE`: count of equivalent rows (group size). @@ -58,30 +76,19 @@ auto constexpr reduction_init_value(duplicate_keep_option keep) * the `reduction_init_value()` function. Then, the reduction result for each row group is written * into the output array at the index of an unspecified row in the group. * - * @param map The auxiliary map to perform reduction - * @param preprocessed_input The preprocessed of the input rows for computing row hashing and row - * comparisons + * @param set The auxiliary set to perform reduction + * @param set_size The number of elements in set * @param num_rows The number of all input rows - * @param has_nulls Indicate whether the input rows has any nulls at any nested levels - * @param has_nested_columns Indicates whether the input table has any nested columns * @param keep The parameter to determine what type of reduction to perform - * @param nulls_equal Flag to specify whether null elements should be considered as equal - * @param nans_equal Flag to specify whether NaN values in floating point column should be - * considered equal. * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned vector - * @return A device_uvector containing the reduction results + * @return A device_uvector containing the output indices */ -rmm::device_uvector reduce_by_row( - hash_map_type const& map, - std::shared_ptr const preprocessed_input, - size_type num_rows, - cudf::nullate::DYNAMIC has_nulls, - bool has_nested_columns, - duplicate_keep_option keep, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - +template +rmm::device_uvector process_keep_option(hash_set_type& set, + size_type set_size, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); } // namespace cudf::detail From 64554115e25f32916742a40e59b7c54b6e51d8cc Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Mon, 17 Jun 2024 16:38:45 +0000 Subject: [PATCH 09/15] header include cleanup --- cpp/src/stream_compaction/distinct_helpers.cu | 2 -- cpp/src/stream_compaction/distinct_helpers.hpp | 1 - 2 files changed, 3 deletions(-) diff --git a/cpp/src/stream_compaction/distinct_helpers.cu b/cpp/src/stream_compaction/distinct_helpers.cu index ee5fcd5c2a4..1de4244347c 100644 --- a/cpp/src/stream_compaction/distinct_helpers.cu +++ b/cpp/src/stream_compaction/distinct_helpers.cu @@ -16,8 +16,6 @@ #include "distinct_helpers.hpp" -#include - namespace cudf::detail { template diff --git a/cpp/src/stream_compaction/distinct_helpers.hpp b/cpp/src/stream_compaction/distinct_helpers.hpp index 8defc3747c2..37ad48ceeda 100644 --- a/cpp/src/stream_compaction/distinct_helpers.hpp +++ b/cpp/src/stream_compaction/distinct_helpers.hpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include From 5f36f4049a63b51498b74308873dcbcc2036c85d Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Mon, 17 Jun 2024 23:46:55 +0000 Subject: [PATCH 10/15] clean up and rename process_keep_option to reduce_by_row --- cpp/src/stream_compaction/distinct.cu | 16 ++--- cpp/src/stream_compaction/distinct_helpers.cu | 65 ++++++++++--------- .../stream_compaction/distinct_helpers.hpp | 13 ++-- 3 files changed, 48 insertions(+), 46 deletions(-) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 64d7fd3153b..fc8f45fd515 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -47,7 +47,7 @@ namespace cudf { namespace detail { namespace { /** - * @brief Invokes the given `func` with desired the row equality and probing method + * @brief Invokes the given `func` with desired the row equality * * @tparam HasNested Flag indicating whether there are nested columns in the input * @tparam Func Type of the helper function doing `distinct` check @@ -59,7 +59,7 @@ namespace { * @param func The input functor to invoke */ template -rmm::device_uvector dispatch_hash_set( +rmm::device_uvector dipatch_row_equal( null_equality compare_nulls, nan_equality compare_nans, bool has_nulls, @@ -100,9 +100,7 @@ rmm::device_uvector distinct_indices(table_view const& input, auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; auto const has_nested_columns = cudf::detail::has_nested_columns(input); - auto const row_hash = cudf::experimental::row::hash::row_hasher(preprocessed_input); - auto const d_hash = row_hash.device_hasher(has_nulls); - + auto const row_hash = cudf::experimental::row::hash::row_hasher(preprocessed_input); auto const row_equal = cudf::experimental::row::equality::self_comparator(preprocessed_input); auto const helper_func = [&](auto const& d_equal) { @@ -111,20 +109,20 @@ rmm::device_uvector distinct_indices(table_view const& input, 0.5, // desired load factor cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, d_equal, - {d_hash}, + {row_hash.device_hasher(has_nulls)}, {}, {}, cudf::detail::cuco_allocator{stream}, stream.value()}; auto const iter = thrust::counting_iterator{0}; auto const size = set.insert(iter, iter + num_rows, stream.value()); - return detail::process_keep_option(set, size, num_rows, keep, stream, mr); + return detail::reduce_by_row(set, size, num_rows, keep, stream, mr); }; if (cudf::detail::has_nested_columns(input)) { - return dispatch_hash_set(nulls_equal, nans_equal, has_nulls, row_equal, helper_func); + return dipatch_row_equal(nulls_equal, nans_equal, has_nulls, row_equal, helper_func); } else { - return dispatch_hash_set(nulls_equal, nans_equal, has_nulls, row_equal, helper_func); + return dipatch_row_equal(nulls_equal, nans_equal, has_nulls, row_equal, helper_func); } } diff --git a/cpp/src/stream_compaction/distinct_helpers.cu b/cpp/src/stream_compaction/distinct_helpers.cu index 1de4244347c..09d3f0bb9f6 100644 --- a/cpp/src/stream_compaction/distinct_helpers.cu +++ b/cpp/src/stream_compaction/distinct_helpers.cu @@ -16,15 +16,17 @@ #include "distinct_helpers.hpp" +#include + namespace cudf::detail { template -rmm::device_uvector process_keep_option(hash_set_type& set, - size_type set_size, - size_type num_rows, - duplicate_keep_option keep, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) +rmm::device_uvector reduce_by_row(hash_set_type& set, + size_type set_size, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { auto output_indices = rmm::device_uvector(set_size, stream, mr); @@ -35,31 +37,29 @@ rmm::device_uvector process_keep_option(hash_set_type& set } auto reduction_results = rmm::device_uvector(num_rows, stream, mr); - thrust::uninitialized_fill(rmm::exec_policy(stream), + thrust::uninitialized_fill(rmm::exec_policy_nosync(stream), reduction_results.begin(), reduction_results.end(), reduction_init_value(keep)); auto set_ref = set.ref(cuco::op::insert_and_find); - thrust::for_each(rmm::exec_policy(stream), + thrust::for_each(rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_rows), [set_ref, keep, reduction_results = reduction_results.begin()] __device__( size_type const idx) mutable { - auto [out_ptr, inserted] = set_ref.insert_and_find(idx); + auto const [inserted_idx_ptr, _] = set_ref.insert_and_find(idx); auto ref = cuda::atomic_ref{ - reduction_results[*out_ptr]}; + reduction_results[*inserted_idx_ptr]}; if (keep == duplicate_keep_option::KEEP_FIRST) { // Store the smallest index of all rows that are equal. ref.fetch_min(idx, cuda::memory_order_relaxed); - } - if (keep == duplicate_keep_option::KEEP_LAST) { + } else if (keep == duplicate_keep_option::KEEP_LAST) { // Store the greatest index of all rows that are equal. ref.fetch_max(idx, cuda::memory_order_relaxed); - } - if (keep == duplicate_keep_option::KEEP_NONE) { + } else { // Count the number of rows in each group of rows that are compared equal. ref.fetch_add(size_type{1}, cuda::memory_order_relaxed); } @@ -69,31 +69,34 @@ rmm::device_uvector process_keep_option(hash_set_type& set if (keep == duplicate_keep_option::KEEP_NONE) { // Reduction results with `KEEP_NONE` are either group sizes of equal rows, or `0`. // Thus, we only output index of the rows in the groups having group size of `1`. - return thrust::copy_if(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows), - output_indices.begin(), - [reduction_results = reduction_results.begin()] __device__( - auto const idx) { return reduction_results[idx] == size_type{1}; }); + return thrust::copy_if( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + output_indices.begin(), + cuda::proclaim_return_type( + [reduction_results = reduction_results.begin()] __device__(auto const idx) { + return reduction_results[idx] == size_type{1}; + })); } // Reduction results with `KEEP_FIRST` and `KEEP_LAST` are row indices of the first/last row in // each group of equal rows (which are the desired output indices), or the value given by // `reduction_init_value()`. - return thrust::copy_if(rmm::exec_policy(stream), - reduction_results.begin(), - reduction_results.end(), - output_indices.begin(), - [init_value = reduction_init_value(keep)] __device__(auto const idx) { - return idx != init_value; - }); + return thrust::copy_if( + rmm::exec_policy(stream), + reduction_results.begin(), + reduction_results.end(), + output_indices.begin(), + cuda::proclaim_return_type([init_value = reduction_init_value(keep)] __device__( + auto const idx) { return idx != init_value; })); }(); output_indices.resize(thrust::distance(output_indices.begin(), map_end), stream); return output_indices; } -template rmm::device_uvector process_keep_option( +template rmm::device_uvector reduce_by_row( hash_set_type process_keep_option( rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -template rmm::device_uvector process_keep_option( +template rmm::device_uvector reduce_by_row( hash_set_type process_keep_option( rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -template rmm::device_uvector process_keep_option( +template rmm::device_uvector reduce_by_row( hash_set_type process_keep_option( rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -template rmm::device_uvector process_keep_option( +template rmm::device_uvector reduce_by_row( hash_set_type #include #include #include @@ -83,10 +84,10 @@ using hash_set_type = * @return A device_uvector containing the output indices */ template -rmm::device_uvector process_keep_option(hash_set_type& set, - size_type set_size, - size_type num_rows, - duplicate_keep_option keep, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); +rmm::device_uvector reduce_by_row(hash_set_type& set, + size_type set_size, + size_type num_rows, + duplicate_keep_option keep, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); } // namespace cudf::detail From 341d892b96fa791372132a9d0acc140787bd3d9f Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Tue, 18 Jun 2024 15:24:29 +0000 Subject: [PATCH 11/15] handle hash_set overflow using int64_t --- bench.md | 49 + .../stream_compaction/distinct_helpers.hpp | 2 +- static_map.json | 3130 +++++++++++++++++ static_set.json | 3130 +++++++++++++++++ static_set_new.json | 3130 +++++++++++++++++ 5 files changed, 9440 insertions(+), 1 deletion(-) create mode 100644 bench.md create mode 100644 static_map.json create mode 100644 static_set.json create mode 100644 static_set_new.json diff --git a/bench.md b/bench.md new file mode 100644 index 00000000000..627b42a1941 --- /dev/null +++ b/bench.md @@ -0,0 +1,49 @@ +['static_map.json', 'static_set.json'] +# distinct + +## [0] Tesla T4 + +| Type | NumRows | Ref Time | Ref Noise | Cmp Time | Cmp Noise | Diff | %Diff | Status | +|--------------------|-----------|------------|-------------|------------|-------------|--------------|---------|----------| +| bool | 10000 | 210.785 us | 1.30% | 198.679 us | 25.30% | -12.106 us | -5.74% | FAIL | +| bool | 100000 | 253.103 us | 7.79% | 222.088 us | 26.73% | -31.015 us | -12.25% | FAIL | +| bool | 1000000 | 687.758 us | 18.16% | 342.133 us | 26.54% | -345.625 us | -50.25% | FAIL | +| bool | 10000000 | 5.538 ms | 1.36% | 1.669 ms | 4.68% | -3869.028 us | -69.87% | FAIL | +| I8 | 10000 | 174.922 us | 2.65% | 175.165 us | 1.58% | 0.243 us | 0.14% | PASS | +| I8 | 100000 | 220.113 us | 1.65% | 191.545 us | 9.57% | -28.567 us | -12.98% | FAIL | +| I8 | 1000000 | 678.456 us | 0.97% | 327.453 us | 1.63% | -351.003 us | -51.74% | FAIL | +| I8 | 10000000 | 5.514 ms | 0.79% | 1.670 ms | 0.93% | -3843.922 us | -69.71% | FAIL | +| I32 | 10000 | 174.878 us | 3.58% | 175.502 us | 1.31% | 0.624 us | 0.36% | PASS | +| I32 | 100000 | 217.380 us | 1.86% | 188.653 us | 3.68% | -28.727 us | -13.22% | FAIL | +| I32 | 1000000 | 685.808 us | 1.09% | 328.207 us | 1.16% | -357.601 us | -52.14% | FAIL | +| I32 | 10000000 | 5.636 ms | 0.76% | 1.722 ms | 1.19% | -3914.285 us | -69.45% | FAIL | +| I64 | 10000 | 175.257 us | 2.47% | 175.281 us | 1.25% | 0.024 us | 0.01% | PASS | +| I64 | 100000 | 217.467 us | 2.04% | 188.937 us | 2.14% | -28.530 us | -13.12% | FAIL | +| I64 | 1000000 | 711.008 us | 1.20% | 335.716 us | 2.40% | -375.292 us | -52.78% | FAIL | +| I64 | 10000000 | 5.868 ms | 0.96% | 1.805 ms | 1.22% | -4062.509 us | -69.23% | FAIL | +| F32 | 10000 | 178.242 us | 3.19% | 174.956 us | 1.34% | -3.287 us | -1.84% | FAIL | +| F32 | 100000 | 238.333 us | 0.71% | 199.978 us | 0.85% | -38.355 us | -16.09% | FAIL | +| F32 | 1000000 | 1.099 ms | 1.84% | 573.145 us | 1.12% | -525.641 us | -47.84% | FAIL | +| F32 | 10000000 | 13.033 ms | 0.56% | 7.656 ms | 0.43% | -5376.663 us | -41.25% | FAIL | +| cudf::timestamp_ms | 10000 | 175.692 us | 2.43% | 175.332 us | 10.54% | -0.360 us | -0.21% | PASS | +| cudf::timestamp_ms | 100000 | 222.362 us | 2.93% | 188.929 us | 2.17% | -33.433 us | -15.04% | FAIL | +| cudf::timestamp_ms | 1000000 | 723.055 us | 1.77% | 333.130 us | 1.34% | -389.925 us | -53.93% | FAIL | +| cudf::timestamp_ms | 10000000 | 5.952 ms | 0.67% | 1.835 ms | 1.27% | -4116.537 us | -69.16% | FAIL | + +# distinct_list + +## [0] Tesla T4 + +| Type | null_probability | ColumnSize | Ref Time | Ref Noise | Cmp Time | Cmp Noise | Diff | %Diff | Status | +|-----------------|--------------------|--------------|------------|-------------|------------|-------------|---------------|---------|----------| +| I32 | 0 | 100000000 | 12.206 ms | 0.58% | 3.692 ms | 0.45% | -8513.937 us | -69.75% | FAIL | +| I32 | 0.1 | 100000000 | 13.764 ms | 0.58% | 4.242 ms | 0.45% | -9522.336 us | -69.18% | FAIL | +| cudf::list_view | 0 | 100000000 | 13.145 ms | 0.67% | 3.567 ms | 1.02% | -9578.041 us | -72.86% | FAIL | +| cudf::list_view | 0.1 | 100000000 | 15.093 ms | 0.64% | 4.173 ms | 0.73% | -10920.317 us | -72.35% | FAIL | + +# Summary + +- Total Matches: 28 + - Pass (diff <= min_noise): 4 + - Unknown (infinite noise): 0 + - Failure (diff > min_noise): 24 diff --git a/cpp/src/stream_compaction/distinct_helpers.hpp b/cpp/src/stream_compaction/distinct_helpers.hpp index 661c4390f46..3e293f8b5e2 100644 --- a/cpp/src/stream_compaction/distinct_helpers.hpp +++ b/cpp/src/stream_compaction/distinct_helpers.hpp @@ -46,7 +46,7 @@ auto constexpr reduction_init_value(duplicate_keep_option keep) template using hash_set_type = cuco::static_set, cuda::thread_scope_device, RowHasher, cuco::linear_probing<1, diff --git a/static_map.json b/static_map.json new file mode 100644 index 00000000000..00d79037e92 --- /dev/null +++ b/static_map.json @@ -0,0 +1,3130 @@ +{ + "meta": { + "argv": [ + "./STREAM_COMPACTION_NVBENCH", + "-b", + "0", + "-b", + "1", + "--json", + "/home/coder/old.json" + ], + "version": { + "json": { + "major": 1, + "minor": 0, + "patch": 0, + "string": "1.0.0" + }, + "nvbench": { + "major": 0, + "minor": 1, + "patch": 0, + "string": "0.1.0", + "git_branch": "HEAD", + "git_sha": "555d628e9b250868c9da003e4407087ff1982e8e", + "git_version": "old-cmake-198-g555d628", + "git_is_dirty": false + } + } + }, + "devices": [ + { + "id": 0, + "name": "Tesla T4", + "sm_version": 750, + "ptx_version": 750, + "sm_default_clock_rate": 1590000000, + "number_of_sms": 40, + "max_blocks_per_sm": 16, + "max_threads_per_sm": 1024, + "max_threads_per_block": 1024, + "registers_per_sm": 65536, + "registers_per_block": 65536, + "global_memory_size": 15655829504, + "global_memory_bus_peak_clock_rate": 5001000000, + "global_memory_bus_width": 256, + "global_memory_bus_bandwidth": 320064000000, + "l2_cache_size": 4194304, + "shared_memory_per_sm": 65536, + "shared_memory_per_block": 49152, + "ecc_state": true + } + ], + "benchmarks": [ + { + "name": "distinct", + "index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "devices": [ + 0 + ], + "axes": [ + { + "name": "Type", + "type": "type", + "flags": "", + "values": [ + { + "input_string": "bool", + "description": "", + "is_active": true + }, + { + "input_string": "I8", + "description": "int8_t", + "is_active": true + }, + { + "input_string": "I32", + "description": "int32_t", + "is_active": true + }, + { + "input_string": "I64", + "description": "int64_t", + "is_active": true + }, + { + "input_string": "F32", + "description": "float", + "is_active": true + }, + { + "input_string": "cudf::timestamp_ms", + "description": "cudf::timestamp_ms", + "is_active": true + } + ] + }, + { + "name": "NumRows", + "type": "int64", + "flags": "", + "values": [ + { + "input_string": "10000", + "description": "", + "value": 10000 + }, + { + "input_string": "100000", + "description": "", + "value": 100000 + }, + { + "input_string": "1000000", + "description": "", + "value": 1000000 + }, + { + "input_string": "10000000", + "description": "", + "value": 10000000 + } + ] + } + ], + "states": [ + { + "name": "Device=0 Type=bool NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "bool" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2704" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00021700309800295835" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.03225664392272471" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0002107849822616609" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013019515397226355" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.6880439230000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=bool NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "bool" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2320" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0002584884831896559" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0825229457889664" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00025310259314318117" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.07789639597764537" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.682488128" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=bool NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "bool" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "3216" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0006925634906716414" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.181811140870714" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0006877581192671667" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.18155596725865705" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "2.308104612" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=bool NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "bool" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1888" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00554238944597458" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013594008837623673" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.005537591253549377" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013566081595658952" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "10.510662326" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I8 NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I8" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2864" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017964160300279323" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.037831079851420674" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017492174277571168" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.026503574914900213" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.612775248" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I8 NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I8" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2272" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00022487412984154958" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.027087343546926868" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0002201128727231747" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.016458676790133714" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.5864066920000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I8 NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I8" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1248" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0006832431274038457" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0119780711000842" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0006784556157027302" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.009651711564790999" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.8821486340000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I8 NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I8" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1408" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00551892489275568" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.007926213593380903" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.005514225204221232" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.007881799834783692" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "7.804920706000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 2, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2864" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017962303142458072" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.04502683046446124" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0001748775418213045" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.035847931504879825" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.612458348" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 2, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2304" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0002221182100694443" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.02864519735570142" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00021738018061215455" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.018636456128851456" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.588096504" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 2, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "976" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0006906091270491808" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.012971954965496222" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0006858079670394055" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.010922877403360709" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.6968690110000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 2, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1744" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.005640688180619259" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.007654157354299601" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0056359616881663565" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.007604988022631572" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "9.88047442" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I64 NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 3, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I64" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2864" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0001799898306564251" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.03646439847384236" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017525735183738616" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.024668904047286017" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.6137548540000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I64 NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 3, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I64" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2304" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0002222455308159723" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.029939720807146755" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0002174671943519364" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.020419852142566818" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.588633842" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I64 NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 3, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I64" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1152" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0007158363619791664" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013825314347670238" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0007110079728687802" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.011999047628734357" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.8518644750000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I64 NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 3, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I64" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2016" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0058727196036706345" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.009633820505230128" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.005867959742271706" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.009595975504553378" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "11.889661979000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=F32 NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 4, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "F32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2816" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00018296302982954513" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.04164499889216332" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017824229554654192" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.031947908329993516" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.611098432" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=F32 NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 4, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "F32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2112" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0002430979346590908" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0212128333097389" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00023833330347428576" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0070626629139464115" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.584252941" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=F32 NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 4, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "F32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1712" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00110359513609813" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.01897577394659825" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0010987860558328202" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.018369948997503344" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "1.9354934560000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=F32 NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 4, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "F32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1149" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.01303830392167101" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.005636086667007628" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013033159977254736" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.005617260081375752" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "15.010919779000002" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::timestamp_ms NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 5, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::timestamp_ms" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2848" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0001804409238061799" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.03665236791083376" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017569219105447947" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.024342661120343934" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.61079433" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::timestamp_ms NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 5, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::timestamp_ms" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2256" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00022707608466312074" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0361876511519216" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00022236219866875046" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.029331279236812065" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.586994089" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::timestamp_ms NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 5, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::timestamp_ms" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1280" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0007279055882812492" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0188987405518479" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0007230546253267675" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.017671083475709753" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.9620853460000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::timestamp_ms NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 5, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::timestamp_ms" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1072" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.005956714626865664" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.006745372038588295" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.005951931767944074" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.006696835739155452" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "6.411682865" + } + ] + } + ], + "is_skipped": false + } + ] + }, + { + "name": "distinct_list", + "index": 1, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "devices": [ + 0 + ], + "axes": [ + { + "name": "Type", + "type": "type", + "flags": "", + "values": [ + { + "input_string": "I32", + "description": "int32_t", + "is_active": true + }, + { + "input_string": "cudf::list_view", + "description": "", + "is_active": true + } + ] + }, + { + "name": "null_probability", + "type": "float64", + "flags": "", + "values": [ + { + "input_string": "0", + "description": "", + "value": 0.0 + }, + { + "input_string": "0.1", + "description": "", + "value": 0.1 + } + ] + }, + { + "name": "ColumnSize", + "type": "int64", + "flags": "", + "values": [ + { + "input_string": "100000000", + "description": "", + "value": 100000000 + } + ] + } + ], + "states": [ + { + "name": "Device=0 Type=I32 null_probability=0 ColumnSize=100000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "null_probability", + "type": "float64", + "value": "0" + }, + { + "name": "ColumnSize", + "type": "int64", + "value": "100000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "720" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.012210535058333331" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.005799153205286631" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.01220575648678673" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0057832412829464195" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "8.809005589" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 null_probability=0.1 ColumnSize=100000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "null_probability", + "type": "float64", + "value": "0.1" + }, + { + "name": "ColumnSize", + "type": "int64", + "value": "100000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "400" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013768925567500001" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.005836561789050461" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013764196693897265" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0058253933699660854" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "5.517115335000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::list_view null_probability=0 ColumnSize=100000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::list_view" + }, + { + "name": "null_probability", + "type": "float64", + "value": "0" + }, + { + "name": "ColumnSize", + "type": "int64", + "value": "100000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "688" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.01314988552034884" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.006676118608579812" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013145149074321568" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.006664777897553564" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "9.063916718" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::list_view null_probability=0.1 ColumnSize=100000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::list_view" + }, + { + "name": "null_probability", + "type": "float64", + "value": "0.1" + }, + { + "name": "ColumnSize", + "type": "int64", + "value": "100000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "768" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.015098254283854176" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.006360468181198888" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.015093491327017547" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0063505322932672" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "11.614478019" + } + ] + } + ], + "is_skipped": false + } + ] + } + ] +} diff --git a/static_set.json b/static_set.json new file mode 100644 index 00000000000..264ec19afd5 --- /dev/null +++ b/static_set.json @@ -0,0 +1,3130 @@ +{ + "meta": { + "argv": [ + "./STREAM_COMPACTION_NVBENCH", + "-b", + "0", + "-b", + "1", + "--json", + "/home/coder/cudf_sr/final_new.json" + ], + "version": { + "json": { + "major": 1, + "minor": 0, + "patch": 0, + "string": "1.0.0" + }, + "nvbench": { + "major": 0, + "minor": 1, + "patch": 0, + "string": "0.1.0", + "git_branch": "HEAD", + "git_sha": "555d628e9b250868c9da003e4407087ff1982e8e", + "git_version": "old-cmake-198-g555d628", + "git_is_dirty": false + } + } + }, + "devices": [ + { + "id": 0, + "name": "Tesla T4", + "sm_version": 750, + "ptx_version": 750, + "sm_default_clock_rate": 1590000000, + "number_of_sms": 40, + "max_blocks_per_sm": 16, + "max_threads_per_sm": 1024, + "max_threads_per_block": 1024, + "registers_per_sm": 65536, + "registers_per_block": 65536, + "global_memory_size": 15655829504, + "global_memory_bus_peak_clock_rate": 5001000000, + "global_memory_bus_width": 256, + "global_memory_bus_bandwidth": 320064000000, + "l2_cache_size": 4194304, + "shared_memory_per_sm": 65536, + "shared_memory_per_block": 49152, + "ecc_state": true + } + ], + "benchmarks": [ + { + "name": "distinct", + "index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "devices": [ + 0 + ], + "axes": [ + { + "name": "Type", + "type": "type", + "flags": "", + "values": [ + { + "input_string": "bool", + "description": "", + "is_active": true + }, + { + "input_string": "I8", + "description": "int8_t", + "is_active": true + }, + { + "input_string": "I32", + "description": "int32_t", + "is_active": true + }, + { + "input_string": "I64", + "description": "int64_t", + "is_active": true + }, + { + "input_string": "F32", + "description": "float", + "is_active": true + }, + { + "input_string": "cudf::timestamp_ms", + "description": "cudf::timestamp_ms", + "is_active": true + } + ] + }, + { + "name": "NumRows", + "type": "int64", + "flags": "", + "values": [ + { + "input_string": "10000", + "description": "", + "value": 10000 + }, + { + "input_string": "100000", + "description": "", + "value": 100000 + }, + { + "input_string": "1000000", + "description": "", + "value": 1000000 + }, + { + "input_string": "10000000", + "description": "", + "value": 10000000 + } + ] + } + ], + "states": [ + { + "name": "Device=0 Type=bool NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "bool" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "3184" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00020463040734924632" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.2549116074049181" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019867851274864938" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.2529735423260753" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.766678033" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=bool NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "bool" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2816" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00022764429083806798" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.2686811688421379" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00022208771594292107" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.26733362072811223" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.7389794270000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=bool NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "bool" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "3168" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00034674626167929254" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.26584529024389386" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00034213333334209356" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.2654280285503345" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "1.175269364" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=bool NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "bool" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "3232" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0016731476104579234" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.04685370904788834" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0016685629806084778" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.04676901028895584" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "5.487458677" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I8 NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I8" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2864" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017970776850558646" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.030282760182153284" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017516524050717724" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.01583017885707515" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.61163224" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I8 NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I8" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "3184" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019606095445979957" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.09858652347333118" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019154548725047686" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.09574844407095809" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.7315341860000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I8 NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I8" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1536" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0003320073300781252" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.021380772386558264" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00032745252101449245" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.01627086014386994" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.544849617" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I8 NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I8" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1360" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0016749257639705878" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.009715427955031629" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0016703032703084123" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.009306866704484352" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "2.3096378150000003" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 2, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2864" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0001800863334497208" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.02921082778608196" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0001755018102619068" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013110076978555549" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.6117084740000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 2, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2656" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019321554292168658" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.04401220138414076" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00018865301212049002" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.03675360005968246" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.600895811" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 2, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1536" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0003327889589843753" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.018137487099489237" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00032820708333747474" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.011554267029888642" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.54623782" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 2, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "992" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0017263192137096763" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.01221894764794557" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00172167654900301" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.011901485259957689" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "1.735332332" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I64 NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 3, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I64" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2864" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017982362150837972" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.02875273313753797" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017528116235290941" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.01248496857591803" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.61101256" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I64 NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 3, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I64" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2656" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019350087274096382" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.03222098390148629" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00018893709641892492" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.021403878022747635" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.6019312050000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I64 NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 3, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I64" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2592" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0003402656647376538" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.027564014649341353" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.000335715962976309" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.02403156898377349" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.9437561820000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I64 NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 3, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I64" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1792" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0018101229174107172" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.01243507431866239" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0018054511428012357" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.01215109328410885" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "3.2864291870000004" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=F32 NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 4, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "F32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2864" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017950566724860367" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.029202205469486174" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017495564269370126" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013360448343696698" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.609308449" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=F32 NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 4, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "F32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2512" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00020452501273885362" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.024284671183999556" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019997835693180946" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.008476231178531033" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.5967229780000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=F32 NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 4, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "F32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1456" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0005777499299450559" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013782681671566372" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.000573144769439331" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.011172880836395024" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.8784639270000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=F32 NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 4, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "F32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "66" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.007661241030303028" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004399868041618955" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.007656497464035497" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004342338061377496" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.5072121070000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::timestamp_ms NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 5, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::timestamp_ms" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "3168" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017986202398989878" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.10854232852546174" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017533174776142923" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.10542922246580139" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.676459247" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::timestamp_ms NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 5, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::timestamp_ms" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2656" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019347790850903615" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.03249202493314582" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.000188928807165518" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0217424042673909" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.601216791" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::timestamp_ms NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 5, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::timestamp_ms" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1504" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00033771549800531926" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.019249970628191195" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00033312976608013253" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013429441628131856" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.542008375" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::timestamp_ms NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 5, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::timestamp_ms" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1104" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0018400764048913038" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.01296987747373167" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0018353944362505654" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.012692964377608498" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "2.057332931" + } + ] + } + ], + "is_skipped": false + } + ] + }, + { + "name": "distinct_list", + "index": 1, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "devices": [ + 0 + ], + "axes": [ + { + "name": "Type", + "type": "type", + "flags": "", + "values": [ + { + "input_string": "I32", + "description": "int32_t", + "is_active": true + }, + { + "input_string": "cudf::list_view", + "description": "", + "is_active": true + } + ] + }, + { + "name": "null_probability", + "type": "float64", + "flags": "", + "values": [ + { + "input_string": "0", + "description": "", + "value": 0.0 + }, + { + "input_string": "0.1", + "description": "", + "value": 0.1 + } + ] + }, + { + "name": "ColumnSize", + "type": "int64", + "flags": "", + "values": [ + { + "input_string": "100000000", + "description": "", + "value": 100000000 + } + ] + } + ], + "states": [ + { + "name": "Device=0 Type=I32 null_probability=0 ColumnSize=100000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "null_probability", + "type": "float64", + "value": "0" + }, + { + "name": "ColumnSize", + "type": "int64", + "value": "100000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "136" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0036965089117647074" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004638343951439435" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.003691819760729284" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004468357669368835" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.505772738" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 null_probability=0.1 ColumnSize=100000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "null_probability", + "type": "float64", + "value": "0.1" + }, + { + "name": "ColumnSize", + "type": "int64", + "value": "100000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "118" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004246559415254238" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004625826474880542" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0042418603492995425" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004499271462981514" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.503750398" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::list_view null_probability=0 ColumnSize=100000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::list_view" + }, + { + "name": "null_probability", + "type": "float64", + "value": "0" + }, + { + "name": "ColumnSize", + "type": "int64", + "value": "100000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "688" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0035718760116279042" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.010282783102359158" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0035671077682528393" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.01018810592370407" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "2.4733659080000003" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::list_view null_probability=0.1 ColumnSize=100000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::list_view" + }, + { + "name": "null_probability", + "type": "float64", + "value": "0.1" + }, + { + "name": "ColumnSize", + "type": "int64", + "value": "100000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "560" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004177917276785719" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00741729093036538" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004173174507277357" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.007332881069064344" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "2.35264685" + } + ] + } + ], + "is_skipped": false + } + ] + } + ] +} diff --git a/static_set_new.json b/static_set_new.json new file mode 100644 index 00000000000..4aa66547224 --- /dev/null +++ b/static_set_new.json @@ -0,0 +1,3130 @@ +{ + "meta": { + "argv": [ + "./STREAM_COMPACTION_NVBENCH", + "-b", + "0", + "-b", + "1", + "--json", + "/home/coder/cudf_sr/static_set_new.json" + ], + "version": { + "json": { + "major": 1, + "minor": 0, + "patch": 0, + "string": "1.0.0" + }, + "nvbench": { + "major": 0, + "minor": 1, + "patch": 0, + "string": "0.1.0", + "git_branch": "HEAD", + "git_sha": "555d628e9b250868c9da003e4407087ff1982e8e", + "git_version": "old-cmake-198-g555d628", + "git_is_dirty": false + } + } + }, + "devices": [ + { + "id": 0, + "name": "Tesla T4", + "sm_version": 750, + "ptx_version": 750, + "sm_default_clock_rate": 1590000000, + "number_of_sms": 40, + "max_blocks_per_sm": 16, + "max_threads_per_sm": 1024, + "max_threads_per_block": 1024, + "registers_per_sm": 65536, + "registers_per_block": 65536, + "global_memory_size": 15655829504, + "global_memory_bus_peak_clock_rate": 5001000000, + "global_memory_bus_width": 256, + "global_memory_bus_bandwidth": 320064000000, + "l2_cache_size": 4194304, + "shared_memory_per_sm": 65536, + "shared_memory_per_block": 49152, + "ecc_state": true + } + ], + "benchmarks": [ + { + "name": "distinct", + "index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "devices": [ + 0 + ], + "axes": [ + { + "name": "Type", + "type": "type", + "flags": "", + "values": [ + { + "input_string": "bool", + "description": "", + "is_active": true + }, + { + "input_string": "I8", + "description": "int8_t", + "is_active": true + }, + { + "input_string": "I32", + "description": "int32_t", + "is_active": true + }, + { + "input_string": "I64", + "description": "int64_t", + "is_active": true + }, + { + "input_string": "F32", + "description": "float", + "is_active": true + }, + { + "input_string": "cudf::timestamp_ms", + "description": "cudf::timestamp_ms", + "is_active": true + } + ] + }, + { + "name": "NumRows", + "type": "int64", + "flags": "", + "values": [ + { + "input_string": "10000", + "description": "", + "value": 10000 + }, + { + "input_string": "100000", + "description": "", + "value": 100000 + }, + { + "input_string": "1000000", + "description": "", + "value": 1000000 + }, + { + "input_string": "10000000", + "description": "", + "value": 10000000 + } + ] + } + ], + "states": [ + { + "name": "Device=0 Type=bool NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "bool" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2592" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00020962153395061729" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.03481247428735653" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00020365303687544308" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.018760872805660522" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.637114516" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=bool NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "bool" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2256" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00023879397960992907" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.05689921886107442" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0002328773900628718" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.05050957742957333" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.618548776" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=bool NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "bool" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1408" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0003624086931818184" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.07584986504141206" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00035752504553899795" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.07368872492402007" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.543950841" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=bool NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "bool" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2896" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0016809379734116013" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.09644366952782124" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0016760039677359802" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0962733472209506" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "4.943738388" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I8 NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I8" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2672" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019215333907185656" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.30167644007876326" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00018730506598726366" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.2975232022646635" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.608213777" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I8 NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I8" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2560" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00020008568749999993" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.056285571959678815" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019534804983995885" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.05061348591249386" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.599631054" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I8 NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I8" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "3168" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00033529945959596025" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.08946313784011517" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00033049329292179674" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.08800871057770095" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "1.1411794320000002" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I8 NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I8" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "3184" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0016756483790829177" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.029108557493874032" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.001670798161371271" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.02872304489189686" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "5.418545515" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 2, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2816" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00018273358025568175" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.036821269554249435" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.000177922545530071" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.025147287230726973" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.6113254290000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 2, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2592" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019782819328703735" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.03222261483732147" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019302424668897816" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.02054330829137285" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.6012078270000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 2, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1536" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00033253364192708363" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.018468434300452058" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0003277398751039676" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.011258845370969921" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.546934508" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 2, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2880" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0017258599010416674" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.01959746966815728" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.001720910166700681" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.019351852083723333" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "5.044768596" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I64 NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 3, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I64" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2816" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00018297387464488637" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.04843229463169197" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017818213615100793" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.04020882852021604" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.611375803" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I64 NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 3, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I64" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2592" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019774994714506192" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.03662707504093914" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0001930186913268248" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.027106668559167435" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.6012601780000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I64 NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 3, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I64" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2816" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00035060977556818177" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.16575686289122357" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0003457193980945955" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.16331324107680964" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "1.0606701840000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I64 NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 3, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I64" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "976" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0018353044036885252" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0528516868882808" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0018299283616122672" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.05210675099057637" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "1.8183584910000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=F32 NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 4, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "F32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2800" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00018419710821428574" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.062114644119672054" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017943027411188386" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0558199906020224" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.611421219" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=F32 NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 4, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "F32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2384" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00021509222860738252" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.05268771936610105" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00021032308755230067" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.04704272436419841" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.595869002" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=F32 NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 4, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "F32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1792" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0005951878013392847" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.10355356380937762" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0005902473575635153" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.101935453702182" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "1.116224909" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=F32 NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 4, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "F32" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "66" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.007644699560606064" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.002960801310162206" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.007639635866338558" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.002871069146675316" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.5061769730000001" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::timestamp_ms NumRows=10000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 5, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::timestamp_ms" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2800" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00018425268178571438" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.052555383014023994" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00017940852597888" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.04506205074979422" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.611657742" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::timestamp_ms NumRows=100000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 5, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::timestamp_ms" + }, + { + "name": "NumRows", + "type": "int64", + "value": "100000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2592" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019819958371913572" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.032858441966953277" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00019340903695994673" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.02164960552343306" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.602109671" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::timestamp_ms NumRows=1000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 5, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::timestamp_ms" + }, + { + "name": "NumRows", + "type": "int64", + "value": "1000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1616" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00034110811943069294" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.017868883575269848" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.00033627986172121927" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.010594833617849946" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.589410058" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::timestamp_ms NumRows=10000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 5, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::timestamp_ms" + }, + { + "name": "NumRows", + "type": "int64", + "value": "10000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1056" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0018283935303030304" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013675976125737184" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.001823343545311329" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.013338660630380415" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "1.956281308" + } + ] + } + ], + "is_skipped": false + } + ] + }, + { + "name": "distinct_list", + "index": 1, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "devices": [ + 0 + ], + "axes": [ + { + "name": "Type", + "type": "type", + "flags": "", + "values": [ + { + "input_string": "I32", + "description": "int32_t", + "is_active": true + }, + { + "input_string": "cudf::list_view", + "description": "", + "is_active": true + } + ] + }, + { + "name": "null_probability", + "type": "float64", + "flags": "", + "values": [ + { + "input_string": "0", + "description": "", + "value": 0.0 + }, + { + "input_string": "0.1", + "description": "", + "value": 0.1 + } + ] + }, + { + "name": "ColumnSize", + "type": "int64", + "flags": "", + "values": [ + { + "input_string": "100000000", + "description": "", + "value": 100000000 + } + ] + } + ], + "states": [ + { + "name": "Device=0 Type=I32 null_probability=0 ColumnSize=100000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "null_probability", + "type": "float64", + "value": "0" + }, + { + "name": "ColumnSize", + "type": "int64", + "value": "100000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "137" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0036715400948905126" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.005019671829007967" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.003666419620931584" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004826717056517512" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.50617562" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=I32 null_probability=0.1 ColumnSize=100000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 0, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "I32" + }, + { + "name": "null_probability", + "type": "float64", + "value": "0.1" + }, + { + "name": "ColumnSize", + "type": "int64", + "value": "100000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "2016" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004204665274801583" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.010056348310144908" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004199421334834326" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.009867343458454745" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "8.528077473" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::list_view null_probability=0 ColumnSize=100000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::list_view" + }, + { + "name": "null_probability", + "type": "float64", + "value": "0" + }, + { + "name": "ColumnSize", + "type": "int64", + "value": "100000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "672" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0035897424151785716" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.006954773468319603" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.0035844565754135464" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.006789676257025493" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "2.4285585540000003" + } + ] + } + ], + "is_skipped": false + }, + { + "name": "Device=0 Type=cudf::list_view null_probability=0.1 ColumnSize=100000000", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 15.0, + "device": 0, + "type_config_index": 1, + "axis_values": [ + { + "name": "Type", + "type": "string", + "value": "cudf::list_view" + }, + { + "name": "null_probability", + "type": "float64", + "value": "0.1" + }, + { + "name": "ColumnSize", + "type": "int64", + "value": "100000000" + } + ], + "summaries": [ + { + "tag": "nv/cold/sample_size", + "name": "Samples", + "description": "Number of isolated kernel executions", + "hint": "sample_size", + "data": [ + { + "name": "value", + "type": "int64", + "value": "1632" + } + ] + }, + { + "tag": "nv/cold/time/cpu/mean", + "name": "CPU Time", + "description": "Mean isolated kernel execution time (measured on host CPU)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004210844107230388" + } + ] + }, + { + "tag": "nv/cold/time/cpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated CPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.017998529931187918" + } + ] + }, + { + "tag": "nv/cold/time/gpu/mean", + "name": "GPU Time", + "description": "Mean isolated kernel execution time (measured with CUDA events)", + "hint": "duration", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.004205538742682514" + } + ] + }, + { + "tag": "nv/cold/time/gpu/stdev/relative", + "name": "Noise", + "description": "Relative standard deviation of isolated GPU times", + "hint": "percentage", + "data": [ + { + "name": "value", + "type": "float64", + "value": "0.017847989809283395" + } + ] + }, + { + "tag": "nv/cold/walltime", + "name": "Walltime", + "description": "Walltime used for isolated measurements", + "hint": "duration", + "hide": "Hidden by default.", + "data": [ + { + "name": "value", + "type": "float64", + "value": "6.914157977" + } + ] + } + ], + "is_skipped": false + } + ] + } + ] +} From e5ae22b2c23e45b225a8dec8985509598ce616b5 Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Tue, 18 Jun 2024 20:34:35 +0000 Subject: [PATCH 12/15] remove accidentally added benchmark files --- bench.md | 49 - static_map.json | 3130 ------------------------------------------- static_set.json | 3130 ------------------------------------------- static_set_new.json | 3130 ------------------------------------------- 4 files changed, 9439 deletions(-) delete mode 100644 bench.md delete mode 100644 static_map.json delete mode 100644 static_set.json delete mode 100644 static_set_new.json diff --git a/bench.md b/bench.md deleted file mode 100644 index 627b42a1941..00000000000 --- a/bench.md +++ /dev/null @@ -1,49 +0,0 @@ -['static_map.json', 'static_set.json'] -# distinct - -## [0] Tesla T4 - -| Type | NumRows | Ref Time | Ref Noise | Cmp Time | Cmp Noise | Diff | %Diff | Status | -|--------------------|-----------|------------|-------------|------------|-------------|--------------|---------|----------| -| bool | 10000 | 210.785 us | 1.30% | 198.679 us | 25.30% | -12.106 us | -5.74% | FAIL | -| bool | 100000 | 253.103 us | 7.79% | 222.088 us | 26.73% | -31.015 us | -12.25% | FAIL | -| bool | 1000000 | 687.758 us | 18.16% | 342.133 us | 26.54% | -345.625 us | -50.25% | FAIL | -| bool | 10000000 | 5.538 ms | 1.36% | 1.669 ms | 4.68% | -3869.028 us | -69.87% | FAIL | -| I8 | 10000 | 174.922 us | 2.65% | 175.165 us | 1.58% | 0.243 us | 0.14% | PASS | -| I8 | 100000 | 220.113 us | 1.65% | 191.545 us | 9.57% | -28.567 us | -12.98% | FAIL | -| I8 | 1000000 | 678.456 us | 0.97% | 327.453 us | 1.63% | -351.003 us | -51.74% | FAIL | -| I8 | 10000000 | 5.514 ms | 0.79% | 1.670 ms | 0.93% | -3843.922 us | -69.71% | FAIL | -| I32 | 10000 | 174.878 us | 3.58% | 175.502 us | 1.31% | 0.624 us | 0.36% | PASS | -| I32 | 100000 | 217.380 us | 1.86% | 188.653 us | 3.68% | -28.727 us | -13.22% | FAIL | -| I32 | 1000000 | 685.808 us | 1.09% | 328.207 us | 1.16% | -357.601 us | -52.14% | FAIL | -| I32 | 10000000 | 5.636 ms | 0.76% | 1.722 ms | 1.19% | -3914.285 us | -69.45% | FAIL | -| I64 | 10000 | 175.257 us | 2.47% | 175.281 us | 1.25% | 0.024 us | 0.01% | PASS | -| I64 | 100000 | 217.467 us | 2.04% | 188.937 us | 2.14% | -28.530 us | -13.12% | FAIL | -| I64 | 1000000 | 711.008 us | 1.20% | 335.716 us | 2.40% | -375.292 us | -52.78% | FAIL | -| I64 | 10000000 | 5.868 ms | 0.96% | 1.805 ms | 1.22% | -4062.509 us | -69.23% | FAIL | -| F32 | 10000 | 178.242 us | 3.19% | 174.956 us | 1.34% | -3.287 us | -1.84% | FAIL | -| F32 | 100000 | 238.333 us | 0.71% | 199.978 us | 0.85% | -38.355 us | -16.09% | FAIL | -| F32 | 1000000 | 1.099 ms | 1.84% | 573.145 us | 1.12% | -525.641 us | -47.84% | FAIL | -| F32 | 10000000 | 13.033 ms | 0.56% | 7.656 ms | 0.43% | -5376.663 us | -41.25% | FAIL | -| cudf::timestamp_ms | 10000 | 175.692 us | 2.43% | 175.332 us | 10.54% | -0.360 us | -0.21% | PASS | -| cudf::timestamp_ms | 100000 | 222.362 us | 2.93% | 188.929 us | 2.17% | -33.433 us | -15.04% | FAIL | -| cudf::timestamp_ms | 1000000 | 723.055 us | 1.77% | 333.130 us | 1.34% | -389.925 us | -53.93% | FAIL | -| cudf::timestamp_ms | 10000000 | 5.952 ms | 0.67% | 1.835 ms | 1.27% | -4116.537 us | -69.16% | FAIL | - -# distinct_list - -## [0] Tesla T4 - -| Type | null_probability | ColumnSize | Ref Time | Ref Noise | Cmp Time | Cmp Noise | Diff | %Diff | Status | -|-----------------|--------------------|--------------|------------|-------------|------------|-------------|---------------|---------|----------| -| I32 | 0 | 100000000 | 12.206 ms | 0.58% | 3.692 ms | 0.45% | -8513.937 us | -69.75% | FAIL | -| I32 | 0.1 | 100000000 | 13.764 ms | 0.58% | 4.242 ms | 0.45% | -9522.336 us | -69.18% | FAIL | -| cudf::list_view | 0 | 100000000 | 13.145 ms | 0.67% | 3.567 ms | 1.02% | -9578.041 us | -72.86% | FAIL | -| cudf::list_view | 0.1 | 100000000 | 15.093 ms | 0.64% | 4.173 ms | 0.73% | -10920.317 us | -72.35% | FAIL | - -# Summary - -- Total Matches: 28 - - Pass (diff <= min_noise): 4 - - Unknown (infinite noise): 0 - - Failure (diff > min_noise): 24 diff --git a/static_map.json b/static_map.json deleted file mode 100644 index 00d79037e92..00000000000 --- a/static_map.json +++ /dev/null @@ -1,3130 +0,0 @@ -{ - "meta": { - "argv": [ - "./STREAM_COMPACTION_NVBENCH", - "-b", - "0", - "-b", - "1", - "--json", - "/home/coder/old.json" - ], - "version": { - "json": { - "major": 1, - "minor": 0, - "patch": 0, - "string": "1.0.0" - }, - "nvbench": { - "major": 0, - "minor": 1, - "patch": 0, - "string": "0.1.0", - "git_branch": "HEAD", - "git_sha": "555d628e9b250868c9da003e4407087ff1982e8e", - "git_version": "old-cmake-198-g555d628", - "git_is_dirty": false - } - } - }, - "devices": [ - { - "id": 0, - "name": "Tesla T4", - "sm_version": 750, - "ptx_version": 750, - "sm_default_clock_rate": 1590000000, - "number_of_sms": 40, - "max_blocks_per_sm": 16, - "max_threads_per_sm": 1024, - "max_threads_per_block": 1024, - "registers_per_sm": 65536, - "registers_per_block": 65536, - "global_memory_size": 15655829504, - "global_memory_bus_peak_clock_rate": 5001000000, - "global_memory_bus_width": 256, - "global_memory_bus_bandwidth": 320064000000, - "l2_cache_size": 4194304, - "shared_memory_per_sm": 65536, - "shared_memory_per_block": 49152, - "ecc_state": true - } - ], - "benchmarks": [ - { - "name": "distinct", - "index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "devices": [ - 0 - ], - "axes": [ - { - "name": "Type", - "type": "type", - "flags": "", - "values": [ - { - "input_string": "bool", - "description": "", - "is_active": true - }, - { - "input_string": "I8", - "description": "int8_t", - "is_active": true - }, - { - "input_string": "I32", - "description": "int32_t", - "is_active": true - }, - { - "input_string": "I64", - "description": "int64_t", - "is_active": true - }, - { - "input_string": "F32", - "description": "float", - "is_active": true - }, - { - "input_string": "cudf::timestamp_ms", - "description": "cudf::timestamp_ms", - "is_active": true - } - ] - }, - { - "name": "NumRows", - "type": "int64", - "flags": "", - "values": [ - { - "input_string": "10000", - "description": "", - "value": 10000 - }, - { - "input_string": "100000", - "description": "", - "value": 100000 - }, - { - "input_string": "1000000", - "description": "", - "value": 1000000 - }, - { - "input_string": "10000000", - "description": "", - "value": 10000000 - } - ] - } - ], - "states": [ - { - "name": "Device=0 Type=bool NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "bool" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2704" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00021700309800295835" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.03225664392272471" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0002107849822616609" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013019515397226355" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.6880439230000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=bool NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "bool" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2320" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0002584884831896559" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0825229457889664" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00025310259314318117" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.07789639597764537" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.682488128" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=bool NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "bool" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "3216" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0006925634906716414" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.181811140870714" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0006877581192671667" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.18155596725865705" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "2.308104612" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=bool NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "bool" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1888" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00554238944597458" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013594008837623673" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.005537591253549377" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013566081595658952" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "10.510662326" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I8 NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I8" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2864" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017964160300279323" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.037831079851420674" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017492174277571168" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.026503574914900213" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.612775248" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I8 NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I8" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2272" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00022487412984154958" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.027087343546926868" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0002201128727231747" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.016458676790133714" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.5864066920000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I8 NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I8" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1248" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0006832431274038457" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0119780711000842" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0006784556157027302" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.009651711564790999" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.8821486340000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I8 NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I8" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1408" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00551892489275568" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.007926213593380903" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.005514225204221232" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.007881799834783692" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "7.804920706000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 2, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2864" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017962303142458072" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.04502683046446124" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0001748775418213045" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.035847931504879825" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.612458348" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 2, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2304" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0002221182100694443" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.02864519735570142" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00021738018061215455" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.018636456128851456" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.588096504" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 2, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "976" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0006906091270491808" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.012971954965496222" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0006858079670394055" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.010922877403360709" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.6968690110000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 2, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1744" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.005640688180619259" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.007654157354299601" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0056359616881663565" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.007604988022631572" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "9.88047442" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I64 NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 3, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I64" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2864" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0001799898306564251" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.03646439847384236" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017525735183738616" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.024668904047286017" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.6137548540000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I64 NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 3, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I64" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2304" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0002222455308159723" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.029939720807146755" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0002174671943519364" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.020419852142566818" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.588633842" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I64 NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 3, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I64" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1152" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0007158363619791664" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013825314347670238" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0007110079728687802" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.011999047628734357" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.8518644750000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I64 NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 3, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I64" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2016" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0058727196036706345" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.009633820505230128" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.005867959742271706" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.009595975504553378" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "11.889661979000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=F32 NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 4, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "F32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2816" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00018296302982954513" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.04164499889216332" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017824229554654192" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.031947908329993516" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.611098432" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=F32 NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 4, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "F32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2112" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0002430979346590908" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0212128333097389" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00023833330347428576" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0070626629139464115" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.584252941" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=F32 NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 4, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "F32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1712" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00110359513609813" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.01897577394659825" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0010987860558328202" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.018369948997503344" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "1.9354934560000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=F32 NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 4, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "F32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1149" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.01303830392167101" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.005636086667007628" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013033159977254736" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.005617260081375752" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "15.010919779000002" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::timestamp_ms NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 5, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::timestamp_ms" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2848" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0001804409238061799" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.03665236791083376" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017569219105447947" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.024342661120343934" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.61079433" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::timestamp_ms NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 5, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::timestamp_ms" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2256" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00022707608466312074" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0361876511519216" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00022236219866875046" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.029331279236812065" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.586994089" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::timestamp_ms NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 5, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::timestamp_ms" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1280" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0007279055882812492" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0188987405518479" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0007230546253267675" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.017671083475709753" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.9620853460000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::timestamp_ms NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 5, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::timestamp_ms" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1072" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.005956714626865664" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.006745372038588295" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.005951931767944074" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.006696835739155452" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "6.411682865" - } - ] - } - ], - "is_skipped": false - } - ] - }, - { - "name": "distinct_list", - "index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "devices": [ - 0 - ], - "axes": [ - { - "name": "Type", - "type": "type", - "flags": "", - "values": [ - { - "input_string": "I32", - "description": "int32_t", - "is_active": true - }, - { - "input_string": "cudf::list_view", - "description": "", - "is_active": true - } - ] - }, - { - "name": "null_probability", - "type": "float64", - "flags": "", - "values": [ - { - "input_string": "0", - "description": "", - "value": 0.0 - }, - { - "input_string": "0.1", - "description": "", - "value": 0.1 - } - ] - }, - { - "name": "ColumnSize", - "type": "int64", - "flags": "", - "values": [ - { - "input_string": "100000000", - "description": "", - "value": 100000000 - } - ] - } - ], - "states": [ - { - "name": "Device=0 Type=I32 null_probability=0 ColumnSize=100000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "null_probability", - "type": "float64", - "value": "0" - }, - { - "name": "ColumnSize", - "type": "int64", - "value": "100000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "720" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.012210535058333331" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.005799153205286631" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.01220575648678673" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0057832412829464195" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "8.809005589" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 null_probability=0.1 ColumnSize=100000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "null_probability", - "type": "float64", - "value": "0.1" - }, - { - "name": "ColumnSize", - "type": "int64", - "value": "100000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "400" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013768925567500001" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.005836561789050461" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013764196693897265" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0058253933699660854" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "5.517115335000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::list_view null_probability=0 ColumnSize=100000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::list_view" - }, - { - "name": "null_probability", - "type": "float64", - "value": "0" - }, - { - "name": "ColumnSize", - "type": "int64", - "value": "100000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "688" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.01314988552034884" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.006676118608579812" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013145149074321568" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.006664777897553564" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "9.063916718" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::list_view null_probability=0.1 ColumnSize=100000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::list_view" - }, - { - "name": "null_probability", - "type": "float64", - "value": "0.1" - }, - { - "name": "ColumnSize", - "type": "int64", - "value": "100000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "768" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.015098254283854176" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.006360468181198888" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.015093491327017547" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0063505322932672" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "11.614478019" - } - ] - } - ], - "is_skipped": false - } - ] - } - ] -} diff --git a/static_set.json b/static_set.json deleted file mode 100644 index 264ec19afd5..00000000000 --- a/static_set.json +++ /dev/null @@ -1,3130 +0,0 @@ -{ - "meta": { - "argv": [ - "./STREAM_COMPACTION_NVBENCH", - "-b", - "0", - "-b", - "1", - "--json", - "/home/coder/cudf_sr/final_new.json" - ], - "version": { - "json": { - "major": 1, - "minor": 0, - "patch": 0, - "string": "1.0.0" - }, - "nvbench": { - "major": 0, - "minor": 1, - "patch": 0, - "string": "0.1.0", - "git_branch": "HEAD", - "git_sha": "555d628e9b250868c9da003e4407087ff1982e8e", - "git_version": "old-cmake-198-g555d628", - "git_is_dirty": false - } - } - }, - "devices": [ - { - "id": 0, - "name": "Tesla T4", - "sm_version": 750, - "ptx_version": 750, - "sm_default_clock_rate": 1590000000, - "number_of_sms": 40, - "max_blocks_per_sm": 16, - "max_threads_per_sm": 1024, - "max_threads_per_block": 1024, - "registers_per_sm": 65536, - "registers_per_block": 65536, - "global_memory_size": 15655829504, - "global_memory_bus_peak_clock_rate": 5001000000, - "global_memory_bus_width": 256, - "global_memory_bus_bandwidth": 320064000000, - "l2_cache_size": 4194304, - "shared_memory_per_sm": 65536, - "shared_memory_per_block": 49152, - "ecc_state": true - } - ], - "benchmarks": [ - { - "name": "distinct", - "index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "devices": [ - 0 - ], - "axes": [ - { - "name": "Type", - "type": "type", - "flags": "", - "values": [ - { - "input_string": "bool", - "description": "", - "is_active": true - }, - { - "input_string": "I8", - "description": "int8_t", - "is_active": true - }, - { - "input_string": "I32", - "description": "int32_t", - "is_active": true - }, - { - "input_string": "I64", - "description": "int64_t", - "is_active": true - }, - { - "input_string": "F32", - "description": "float", - "is_active": true - }, - { - "input_string": "cudf::timestamp_ms", - "description": "cudf::timestamp_ms", - "is_active": true - } - ] - }, - { - "name": "NumRows", - "type": "int64", - "flags": "", - "values": [ - { - "input_string": "10000", - "description": "", - "value": 10000 - }, - { - "input_string": "100000", - "description": "", - "value": 100000 - }, - { - "input_string": "1000000", - "description": "", - "value": 1000000 - }, - { - "input_string": "10000000", - "description": "", - "value": 10000000 - } - ] - } - ], - "states": [ - { - "name": "Device=0 Type=bool NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "bool" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "3184" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00020463040734924632" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.2549116074049181" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019867851274864938" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.2529735423260753" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.766678033" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=bool NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "bool" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2816" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00022764429083806798" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.2686811688421379" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00022208771594292107" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.26733362072811223" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.7389794270000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=bool NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "bool" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "3168" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00034674626167929254" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.26584529024389386" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00034213333334209356" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.2654280285503345" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "1.175269364" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=bool NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "bool" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "3232" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0016731476104579234" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.04685370904788834" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0016685629806084778" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.04676901028895584" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "5.487458677" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I8 NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I8" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2864" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017970776850558646" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.030282760182153284" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017516524050717724" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.01583017885707515" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.61163224" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I8 NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I8" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "3184" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019606095445979957" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.09858652347333118" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019154548725047686" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.09574844407095809" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.7315341860000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I8 NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I8" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1536" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0003320073300781252" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.021380772386558264" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00032745252101449245" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.01627086014386994" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.544849617" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I8 NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I8" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1360" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0016749257639705878" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.009715427955031629" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0016703032703084123" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.009306866704484352" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "2.3096378150000003" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 2, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2864" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0001800863334497208" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.02921082778608196" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0001755018102619068" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013110076978555549" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.6117084740000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 2, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2656" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019321554292168658" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.04401220138414076" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00018865301212049002" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.03675360005968246" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.600895811" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 2, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1536" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0003327889589843753" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.018137487099489237" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00032820708333747474" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.011554267029888642" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.54623782" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 2, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "992" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0017263192137096763" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.01221894764794557" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00172167654900301" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.011901485259957689" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "1.735332332" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I64 NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 3, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I64" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2864" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017982362150837972" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.02875273313753797" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017528116235290941" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.01248496857591803" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.61101256" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I64 NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 3, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I64" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2656" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019350087274096382" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.03222098390148629" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00018893709641892492" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.021403878022747635" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.6019312050000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I64 NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 3, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I64" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2592" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0003402656647376538" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.027564014649341353" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.000335715962976309" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.02403156898377349" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.9437561820000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I64 NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 3, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I64" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1792" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0018101229174107172" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.01243507431866239" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0018054511428012357" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.01215109328410885" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "3.2864291870000004" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=F32 NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 4, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "F32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2864" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017950566724860367" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.029202205469486174" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017495564269370126" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013360448343696698" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.609308449" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=F32 NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 4, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "F32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2512" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00020452501273885362" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.024284671183999556" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019997835693180946" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.008476231178531033" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.5967229780000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=F32 NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 4, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "F32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1456" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0005777499299450559" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013782681671566372" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.000573144769439331" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.011172880836395024" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.8784639270000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=F32 NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 4, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "F32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "66" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.007661241030303028" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004399868041618955" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.007656497464035497" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004342338061377496" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.5072121070000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::timestamp_ms NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 5, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::timestamp_ms" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "3168" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017986202398989878" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.10854232852546174" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017533174776142923" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.10542922246580139" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.676459247" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::timestamp_ms NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 5, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::timestamp_ms" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2656" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019347790850903615" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.03249202493314582" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.000188928807165518" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0217424042673909" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.601216791" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::timestamp_ms NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 5, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::timestamp_ms" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1504" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00033771549800531926" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.019249970628191195" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00033312976608013253" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013429441628131856" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.542008375" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::timestamp_ms NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 5, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::timestamp_ms" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1104" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0018400764048913038" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.01296987747373167" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0018353944362505654" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.012692964377608498" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "2.057332931" - } - ] - } - ], - "is_skipped": false - } - ] - }, - { - "name": "distinct_list", - "index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "devices": [ - 0 - ], - "axes": [ - { - "name": "Type", - "type": "type", - "flags": "", - "values": [ - { - "input_string": "I32", - "description": "int32_t", - "is_active": true - }, - { - "input_string": "cudf::list_view", - "description": "", - "is_active": true - } - ] - }, - { - "name": "null_probability", - "type": "float64", - "flags": "", - "values": [ - { - "input_string": "0", - "description": "", - "value": 0.0 - }, - { - "input_string": "0.1", - "description": "", - "value": 0.1 - } - ] - }, - { - "name": "ColumnSize", - "type": "int64", - "flags": "", - "values": [ - { - "input_string": "100000000", - "description": "", - "value": 100000000 - } - ] - } - ], - "states": [ - { - "name": "Device=0 Type=I32 null_probability=0 ColumnSize=100000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "null_probability", - "type": "float64", - "value": "0" - }, - { - "name": "ColumnSize", - "type": "int64", - "value": "100000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "136" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0036965089117647074" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004638343951439435" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.003691819760729284" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004468357669368835" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.505772738" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 null_probability=0.1 ColumnSize=100000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "null_probability", - "type": "float64", - "value": "0.1" - }, - { - "name": "ColumnSize", - "type": "int64", - "value": "100000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "118" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004246559415254238" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004625826474880542" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0042418603492995425" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004499271462981514" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.503750398" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::list_view null_probability=0 ColumnSize=100000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::list_view" - }, - { - "name": "null_probability", - "type": "float64", - "value": "0" - }, - { - "name": "ColumnSize", - "type": "int64", - "value": "100000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "688" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0035718760116279042" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.010282783102359158" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0035671077682528393" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.01018810592370407" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "2.4733659080000003" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::list_view null_probability=0.1 ColumnSize=100000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::list_view" - }, - { - "name": "null_probability", - "type": "float64", - "value": "0.1" - }, - { - "name": "ColumnSize", - "type": "int64", - "value": "100000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "560" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004177917276785719" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00741729093036538" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004173174507277357" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.007332881069064344" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "2.35264685" - } - ] - } - ], - "is_skipped": false - } - ] - } - ] -} diff --git a/static_set_new.json b/static_set_new.json deleted file mode 100644 index 4aa66547224..00000000000 --- a/static_set_new.json +++ /dev/null @@ -1,3130 +0,0 @@ -{ - "meta": { - "argv": [ - "./STREAM_COMPACTION_NVBENCH", - "-b", - "0", - "-b", - "1", - "--json", - "/home/coder/cudf_sr/static_set_new.json" - ], - "version": { - "json": { - "major": 1, - "minor": 0, - "patch": 0, - "string": "1.0.0" - }, - "nvbench": { - "major": 0, - "minor": 1, - "patch": 0, - "string": "0.1.0", - "git_branch": "HEAD", - "git_sha": "555d628e9b250868c9da003e4407087ff1982e8e", - "git_version": "old-cmake-198-g555d628", - "git_is_dirty": false - } - } - }, - "devices": [ - { - "id": 0, - "name": "Tesla T4", - "sm_version": 750, - "ptx_version": 750, - "sm_default_clock_rate": 1590000000, - "number_of_sms": 40, - "max_blocks_per_sm": 16, - "max_threads_per_sm": 1024, - "max_threads_per_block": 1024, - "registers_per_sm": 65536, - "registers_per_block": 65536, - "global_memory_size": 15655829504, - "global_memory_bus_peak_clock_rate": 5001000000, - "global_memory_bus_width": 256, - "global_memory_bus_bandwidth": 320064000000, - "l2_cache_size": 4194304, - "shared_memory_per_sm": 65536, - "shared_memory_per_block": 49152, - "ecc_state": true - } - ], - "benchmarks": [ - { - "name": "distinct", - "index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "devices": [ - 0 - ], - "axes": [ - { - "name": "Type", - "type": "type", - "flags": "", - "values": [ - { - "input_string": "bool", - "description": "", - "is_active": true - }, - { - "input_string": "I8", - "description": "int8_t", - "is_active": true - }, - { - "input_string": "I32", - "description": "int32_t", - "is_active": true - }, - { - "input_string": "I64", - "description": "int64_t", - "is_active": true - }, - { - "input_string": "F32", - "description": "float", - "is_active": true - }, - { - "input_string": "cudf::timestamp_ms", - "description": "cudf::timestamp_ms", - "is_active": true - } - ] - }, - { - "name": "NumRows", - "type": "int64", - "flags": "", - "values": [ - { - "input_string": "10000", - "description": "", - "value": 10000 - }, - { - "input_string": "100000", - "description": "", - "value": 100000 - }, - { - "input_string": "1000000", - "description": "", - "value": 1000000 - }, - { - "input_string": "10000000", - "description": "", - "value": 10000000 - } - ] - } - ], - "states": [ - { - "name": "Device=0 Type=bool NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "bool" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2592" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00020962153395061729" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.03481247428735653" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00020365303687544308" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.018760872805660522" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.637114516" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=bool NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "bool" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2256" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00023879397960992907" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.05689921886107442" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0002328773900628718" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.05050957742957333" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.618548776" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=bool NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "bool" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1408" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0003624086931818184" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.07584986504141206" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00035752504553899795" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.07368872492402007" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.543950841" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=bool NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "bool" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2896" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0016809379734116013" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.09644366952782124" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0016760039677359802" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0962733472209506" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "4.943738388" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I8 NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I8" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2672" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019215333907185656" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.30167644007876326" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00018730506598726366" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.2975232022646635" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.608213777" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I8 NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I8" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2560" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00020008568749999993" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.056285571959678815" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019534804983995885" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.05061348591249386" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.599631054" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I8 NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I8" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "3168" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00033529945959596025" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.08946313784011517" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00033049329292179674" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.08800871057770095" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "1.1411794320000002" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I8 NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I8" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "3184" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0016756483790829177" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.029108557493874032" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.001670798161371271" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.02872304489189686" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "5.418545515" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 2, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2816" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00018273358025568175" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.036821269554249435" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.000177922545530071" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.025147287230726973" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.6113254290000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 2, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2592" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019782819328703735" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.03222261483732147" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019302424668897816" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.02054330829137285" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.6012078270000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 2, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1536" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00033253364192708363" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.018468434300452058" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0003277398751039676" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.011258845370969921" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.546934508" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 2, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2880" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0017258599010416674" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.01959746966815728" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.001720910166700681" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.019351852083723333" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "5.044768596" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I64 NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 3, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I64" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2816" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00018297387464488637" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.04843229463169197" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017818213615100793" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.04020882852021604" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.611375803" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I64 NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 3, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I64" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2592" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019774994714506192" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.03662707504093914" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0001930186913268248" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.027106668559167435" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.6012601780000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I64 NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 3, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I64" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2816" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00035060977556818177" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.16575686289122357" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0003457193980945955" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.16331324107680964" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "1.0606701840000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I64 NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 3, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I64" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "976" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0018353044036885252" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0528516868882808" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0018299283616122672" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.05210675099057637" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "1.8183584910000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=F32 NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 4, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "F32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2800" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00018419710821428574" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.062114644119672054" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017943027411188386" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0558199906020224" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.611421219" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=F32 NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 4, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "F32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2384" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00021509222860738252" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.05268771936610105" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00021032308755230067" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.04704272436419841" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.595869002" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=F32 NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 4, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "F32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1792" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0005951878013392847" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.10355356380937762" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0005902473575635153" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.101935453702182" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "1.116224909" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=F32 NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 4, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "F32" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "66" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.007644699560606064" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.002960801310162206" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.007639635866338558" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.002871069146675316" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.5061769730000001" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::timestamp_ms NumRows=10000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 5, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::timestamp_ms" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2800" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00018425268178571438" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.052555383014023994" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00017940852597888" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.04506205074979422" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.611657742" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::timestamp_ms NumRows=100000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 5, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::timestamp_ms" - }, - { - "name": "NumRows", - "type": "int64", - "value": "100000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2592" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019819958371913572" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.032858441966953277" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00019340903695994673" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.02164960552343306" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.602109671" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::timestamp_ms NumRows=1000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 5, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::timestamp_ms" - }, - { - "name": "NumRows", - "type": "int64", - "value": "1000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1616" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00034110811943069294" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.017868883575269848" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.00033627986172121927" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.010594833617849946" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.589410058" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::timestamp_ms NumRows=10000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 5, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::timestamp_ms" - }, - { - "name": "NumRows", - "type": "int64", - "value": "10000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1056" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0018283935303030304" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013675976125737184" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.001823343545311329" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.013338660630380415" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "1.956281308" - } - ] - } - ], - "is_skipped": false - } - ] - }, - { - "name": "distinct_list", - "index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "devices": [ - 0 - ], - "axes": [ - { - "name": "Type", - "type": "type", - "flags": "", - "values": [ - { - "input_string": "I32", - "description": "int32_t", - "is_active": true - }, - { - "input_string": "cudf::list_view", - "description": "", - "is_active": true - } - ] - }, - { - "name": "null_probability", - "type": "float64", - "flags": "", - "values": [ - { - "input_string": "0", - "description": "", - "value": 0.0 - }, - { - "input_string": "0.1", - "description": "", - "value": 0.1 - } - ] - }, - { - "name": "ColumnSize", - "type": "int64", - "flags": "", - "values": [ - { - "input_string": "100000000", - "description": "", - "value": 100000000 - } - ] - } - ], - "states": [ - { - "name": "Device=0 Type=I32 null_probability=0 ColumnSize=100000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "null_probability", - "type": "float64", - "value": "0" - }, - { - "name": "ColumnSize", - "type": "int64", - "value": "100000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "137" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0036715400948905126" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.005019671829007967" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.003666419620931584" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004826717056517512" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.50617562" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=I32 null_probability=0.1 ColumnSize=100000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 0, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "I32" - }, - { - "name": "null_probability", - "type": "float64", - "value": "0.1" - }, - { - "name": "ColumnSize", - "type": "int64", - "value": "100000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "2016" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004204665274801583" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.010056348310144908" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004199421334834326" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.009867343458454745" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "8.528077473" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::list_view null_probability=0 ColumnSize=100000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::list_view" - }, - { - "name": "null_probability", - "type": "float64", - "value": "0" - }, - { - "name": "ColumnSize", - "type": "int64", - "value": "100000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "672" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0035897424151785716" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.006954773468319603" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.0035844565754135464" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.006789676257025493" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "2.4285585540000003" - } - ] - } - ], - "is_skipped": false - }, - { - "name": "Device=0 Type=cudf::list_view null_probability=0.1 ColumnSize=100000000", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 15.0, - "device": 0, - "type_config_index": 1, - "axis_values": [ - { - "name": "Type", - "type": "string", - "value": "cudf::list_view" - }, - { - "name": "null_probability", - "type": "float64", - "value": "0.1" - }, - { - "name": "ColumnSize", - "type": "int64", - "value": "100000000" - } - ], - "summaries": [ - { - "tag": "nv/cold/sample_size", - "name": "Samples", - "description": "Number of isolated kernel executions", - "hint": "sample_size", - "data": [ - { - "name": "value", - "type": "int64", - "value": "1632" - } - ] - }, - { - "tag": "nv/cold/time/cpu/mean", - "name": "CPU Time", - "description": "Mean isolated kernel execution time (measured on host CPU)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004210844107230388" - } - ] - }, - { - "tag": "nv/cold/time/cpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated CPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.017998529931187918" - } - ] - }, - { - "tag": "nv/cold/time/gpu/mean", - "name": "GPU Time", - "description": "Mean isolated kernel execution time (measured with CUDA events)", - "hint": "duration", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.004205538742682514" - } - ] - }, - { - "tag": "nv/cold/time/gpu/stdev/relative", - "name": "Noise", - "description": "Relative standard deviation of isolated GPU times", - "hint": "percentage", - "data": [ - { - "name": "value", - "type": "float64", - "value": "0.017847989809283395" - } - ] - }, - { - "tag": "nv/cold/walltime", - "name": "Walltime", - "description": "Walltime used for isolated measurements", - "hint": "duration", - "hide": "Hidden by default.", - "data": [ - { - "name": "value", - "type": "float64", - "value": "6.914157977" - } - ] - } - ], - "is_skipped": false - } - ] - } - ] -} From ee43ccfb0c7e8102b85da2b0fbaf706a5c356d01 Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Tue, 18 Jun 2024 22:33:37 +0000 Subject: [PATCH 13/15] update header include --- cpp/src/stream_compaction/distinct.cu | 1 - cpp/src/stream_compaction/distinct_helpers.cu | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index fc8f45fd515..f74c13ec2a5 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -39,7 +39,6 @@ #include #include -#include #include #include diff --git a/cpp/src/stream_compaction/distinct_helpers.cu b/cpp/src/stream_compaction/distinct_helpers.cu index 09d3f0bb9f6..f75dc41eec6 100644 --- a/cpp/src/stream_compaction/distinct_helpers.cu +++ b/cpp/src/stream_compaction/distinct_helpers.cu @@ -17,6 +17,7 @@ #include "distinct_helpers.hpp" #include +#include namespace cudf::detail { From 51528d3f591c7cf42ad55755676b8cd0406aca1a Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Tue, 25 Jun 2024 22:10:21 +0000 Subject: [PATCH 14/15] Minor improvements and cleanup --- cpp/src/stream_compaction/distinct.cu | 11 +---------- cpp/src/stream_compaction/distinct_helpers.cu | 10 ++++------ cpp/src/stream_compaction/distinct_helpers.hpp | 4 +++- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index f74c13ec2a5..e5cf29f3ebf 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -32,13 +32,6 @@ #include #include -#include -#include -#include -#include -#include -#include - #include #include @@ -113,9 +106,7 @@ rmm::device_uvector distinct_indices(table_view const& input, {}, cudf::detail::cuco_allocator{stream}, stream.value()}; - auto const iter = thrust::counting_iterator{0}; - auto const size = set.insert(iter, iter + num_rows, stream.value()); - return detail::reduce_by_row(set, size, num_rows, keep, stream, mr); + return detail::reduce_by_row(set, num_rows, keep, stream, mr); }; if (cudf::detail::has_nested_columns(input)) { diff --git a/cpp/src/stream_compaction/distinct_helpers.cu b/cpp/src/stream_compaction/distinct_helpers.cu index f75dc41eec6..149e39e6279 100644 --- a/cpp/src/stream_compaction/distinct_helpers.cu +++ b/cpp/src/stream_compaction/distinct_helpers.cu @@ -23,17 +23,19 @@ namespace cudf::detail { template rmm::device_uvector reduce_by_row(hash_set_type& set, - size_type set_size, size_type num_rows, duplicate_keep_option keep, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - auto output_indices = rmm::device_uvector(set_size, stream, mr); + auto output_indices = rmm::device_uvector(num_rows, stream, mr); // If we don't care about order, just gather indices of distinct keys taken from set. if (keep == duplicate_keep_option::KEEP_ANY) { + auto const iter = thrust::counting_iterator{0}; + auto const size = set.insert(iter, iter + num_rows, stream.value()); set.retrieve_all(output_indices.begin(), stream.value()); + output_indices.resize(size, stream); return output_indices; } @@ -102,7 +104,6 @@ template rmm::device_uvector reduce_by_row( false, cudf::nullate::DYNAMIC, cudf::experimental::row::equality::nan_equal_physical_equality_comparator>>& set, - size_type set_size, size_type num_rows, duplicate_keep_option keep, rmm::cuda_stream_view stream, @@ -113,7 +114,6 @@ template rmm::device_uvector reduce_by_row( true, cudf::nullate::DYNAMIC, cudf::experimental::row::equality::nan_equal_physical_equality_comparator>>& set, - size_type set_size, size_type num_rows, duplicate_keep_option keep, rmm::cuda_stream_view stream, @@ -124,7 +124,6 @@ template rmm::device_uvector reduce_by_row( false, cudf::nullate::DYNAMIC, cudf::experimental::row::equality::physical_equality_comparator>>& set, - size_type set_size, size_type num_rows, duplicate_keep_option keep, rmm::cuda_stream_view stream, @@ -135,7 +134,6 @@ template rmm::device_uvector reduce_by_row( true, cudf::nullate::DYNAMIC, cudf::experimental::row::equality::physical_equality_comparator>>& set, - size_type set_size, size_type num_rows, duplicate_keep_option keep, rmm::cuda_stream_view stream, diff --git a/cpp/src/stream_compaction/distinct_helpers.hpp b/cpp/src/stream_compaction/distinct_helpers.hpp index 3e293f8b5e2..d904133bb02 100644 --- a/cpp/src/stream_compaction/distinct_helpers.hpp +++ b/cpp/src/stream_compaction/distinct_helpers.hpp @@ -24,6 +24,9 @@ #include #include +#include +#include +#include namespace cudf::detail { @@ -85,7 +88,6 @@ using hash_set_type = */ template rmm::device_uvector reduce_by_row(hash_set_type& set, - size_type set_size, size_type num_rows, duplicate_keep_option keep, rmm::cuda_stream_view stream, From 4dd53a7dcfac0e3222d5f55fba78e4b373f90875 Mon Sep 17 00:00:00 2001 From: Srinivas Yadav Singanaboina Date: Wed, 26 Jun 2024 00:47:03 +0000 Subject: [PATCH 15/15] use set.insert_async --- cpp/src/stream_compaction/distinct_helpers.cu | 6 +++--- cpp/src/stream_compaction/distinct_helpers.hpp | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/stream_compaction/distinct_helpers.cu b/cpp/src/stream_compaction/distinct_helpers.cu index 149e39e6279..c3a004b7f28 100644 --- a/cpp/src/stream_compaction/distinct_helpers.cu +++ b/cpp/src/stream_compaction/distinct_helpers.cu @@ -33,9 +33,9 @@ rmm::device_uvector reduce_by_row(hash_set_type& set, // If we don't care about order, just gather indices of distinct keys taken from set. if (keep == duplicate_keep_option::KEEP_ANY) { auto const iter = thrust::counting_iterator{0}; - auto const size = set.insert(iter, iter + num_rows, stream.value()); - set.retrieve_all(output_indices.begin(), stream.value()); - output_indices.resize(size, stream); + set.insert_async(iter, iter + num_rows, stream.value()); + auto const output_end = set.retrieve_all(output_indices.begin(), stream.value()); + output_indices.resize(thrust::distance(output_indices.begin(), output_end), stream); return output_indices; } diff --git a/cpp/src/stream_compaction/distinct_helpers.hpp b/cpp/src/stream_compaction/distinct_helpers.hpp index d904133bb02..fca67c98873 100644 --- a/cpp/src/stream_compaction/distinct_helpers.hpp +++ b/cpp/src/stream_compaction/distinct_helpers.hpp @@ -26,6 +26,7 @@ #include #include #include +#include #include namespace cudf::detail {