Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configurable NaN handling in device_row_comparators #10870

Merged
merged 37 commits into from
Jun 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
eaffdea
NaN handling in device_row_comparators
rwlee May 17, 2022
3d2a475
Merge remote-tracking branch 'pub/branch-22.06' into rwlee/nanconfig
rwlee May 18, 2022
e518668
template the comparator
rwlee May 20, 2022
c99e3c5
partial fix to performance regression
rwlee May 23, 2022
bb00193
Merge remote-tracking branch 'pub/branch-22.08' into rwlee/nanconfig
rwlee May 24, 2022
798d6c5
Template NaN config lexicographic and equality
rwlee May 31, 2022
c5f9961
Add experimental row operator tests
rwlee May 31, 2022
0d4e798
switch to CUDF_ENABLE_IF
rwlee Jun 1, 2022
f35d9e3
Naming and add equality tests
rwlee Jun 1, 2022
e4cee95
reorder cmake test file
rwlee Jun 1, 2022
58d2663
fix cmake formatting
rwlee Jun 1, 2022
3a6cd68
Merge remote-tracking branch 'pub/branch-22.08' into rwlee/nanconfig
rwlee Jun 1, 2022
49be087
Apply suggestions from code review
rwlee Jun 2, 2022
d0f64f2
ctad refactor + split device_comaparator call
rwlee Jun 2, 2022
0576e6c
Merge branch 'rwlee/nanconfig' of github.com:rwlee/cudf into rwlee/na…
rwlee Jun 2, 2022
1bd5405
rename experimental op test file
rwlee Jun 2, 2022
70da3b4
comment cleanup and pr fixes
rwlee Jun 2, 2022
e4a7029
physical comparator clarification docs
rwlee Jun 2, 2022
1240c85
fix whitespace
rwlee Jun 2, 2022
3304615
fix formatting
rwlee Jun 2, 2022
4f80b1c
Merge remote-tracking branch 'pub/branch-22.08' into rwlee/nanconfig
rwlee Jun 2, 2022
9ca2d29
fix copyright
rwlee Jun 2, 2022
51938fd
Functors as arguments, enabling CTAD
rwlee Jun 2, 2022
3f545fe
Docs, cleanup, and renaming
rwlee Jun 3, 2022
2eb8103
device_comparator --> equal_to
rwlee Jun 3, 2022
7293b61
Fix docstring.
bdice Jun 3, 2022
b42c895
Fix check_nulls parameter docstring.
bdice Jun 3, 2022
25fcff2
Rename c to comparator.
bdice Jun 3, 2022
20e498a
List members in initialization order.
bdice Jun 3, 2022
1eb657e
Style.
bdice Jun 3, 2022
f80cdc3
Rename to nan_equal_expected.
bdice Jun 3, 2022
5cdd94a
Include <cmath>.
bdice Jun 3, 2022
3a7f118
Clean up test file.
bdice Jun 3, 2022
a139131
Move tests to cpp file.
bdice Jun 3, 2022
78af755
Merge remote-tracking branch 'upstream/branch-22.08' into rwlee/nanco…
bdice Jun 3, 2022
ec519e9
Revert "Move tests to cpp file."
bdice Jun 3, 2022
6da4a4c
Style.
bdice Jun 3, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
369 changes: 295 additions & 74 deletions cpp/include/cudf/table/experimental/row_operators.cuh

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ std::unique_ptr<table> groupby(table_view const& keys,
auto preprocessed_keys = cudf::experimental::row::hash::preprocessed_table::create(keys, stream);
auto const comparator = cudf::experimental::row::equality::self_comparator{preprocessed_keys};
auto const row_hash = cudf::experimental::row::hash::row_hasher{std::move(preprocessed_keys)};
auto const d_key_equal = comparator.device_comparator(has_null, null_keys_are_equal);
auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal);
auto const d_row_hash = row_hash.device_hasher(has_null);

size_type constexpr unused_key{std::numeric_limits<size_type>::max()};
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/reductions/scan/rank_scan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ std::unique_ptr<column> rank_generator(column_view const& order_by,
{
auto comp = cudf::experimental::row::equality::self_comparator(table_view{{order_by}}, stream);
auto const device_comparator =
comp.device_comparator(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))});
comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))});
auto ranks = make_fixed_width_column(
data_type{type_to_id<size_type>()}, order_by.size(), mask_state::UNALLOCATED, stream, mr);
auto mutable_ranks = ranks->mutable_view();
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/search/contains_nested.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ bool contains_nested_element(column_view const& haystack,

auto const comparator =
cudf::experimental::row::equality::two_table_comparator(haystack_tv, needle_tv, stream);
auto const d_comp = comparator.device_comparator(nullate::DYNAMIC{has_nulls});
auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls});

auto const begin = cudf::experimental::row::lhs_iterator(0);
auto const end = begin + haystack.size();
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/search/search_ordered.cu
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ std::unique_ptr<column> search_ordered(table_view const& haystack,
auto const comparator = cudf::experimental::row::lexicographic::two_table_comparator(
matched_haystack, matched_needles, column_order, null_precedence, stream);
auto const has_nulls = has_nested_nulls(matched_haystack) or has_nested_nulls(matched_needles);
auto const d_comparator = comparator.device_comparator(nullate::DYNAMIC{has_nulls});
auto const d_comparator = comparator.less(nullate::DYNAMIC{has_nulls});

auto const haystack_it = cudf::experimental::row::lhs_iterator(0);
auto const needles_it = cudf::experimental::row::rhs_iterator(0);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/sort/sort_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ std::unique_ptr<column> sorted_order(table_view input,

auto comp =
experimental::row::lexicographic::self_comparator(input, column_order, null_precedence, stream);
auto comparator = comp.device_comparator(nullate::DYNAMIC{has_nested_nulls(input)});
auto comparator = comp.less(nullate::DYNAMIC{has_nested_nulls(input)});

if (stable) {
thrust::stable_sort(rmm::exec_policy(stream),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/stream_compaction/distinct.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ std::unique_ptr<table> distinct(table_view const& input,
experimental::compaction_hash hash_key(row_hash.device_hasher(has_null));

cudf::experimental::row::equality::self_comparator row_equal(preprocessed_keys);
auto key_equal = row_equal.device_comparator(has_null, nulls_equal);
auto key_equal = row_equal.equal_to(has_null, nulls_equal);

auto iter = cudf::detail::make_counting_transform_iterator(
0, [] __device__(size_type i) { return cuco::make_pair(i, i); });
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ ConfigureTest(TRANSPOSE_TEST transpose/transpose_test.cpp)
# * table tests -----------------------------------------------------------------------------------
ConfigureTest(
TABLE_TEST table/table_tests.cpp table/table_view_tests.cu table/row_operators_tests.cpp
table/experimental_row_operator_tests.cu
)

# ##################################################################################################
Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/groupby/lists_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ inline void test_hash_based_sum_agg(column_view const& keys,

auto const null_keys_are_equal =
include_null_keys == null_policy::INCLUDE ? null_equality::EQUAL : null_equality::UNEQUAL;
auto row_equal = comparator.device_comparator(nullate::DYNAMIC{true}, null_keys_are_equal);
auto row_equal = comparator.equal_to(nullate::DYNAMIC{true}, null_keys_are_equal);
auto func = match_expected_fn{num_rows, row_equal};

// For each row in expected table `t[0, num_rows)`, there must be a match
Expand Down
231 changes: 231 additions & 0 deletions cpp/tests/table/experimental_row_operator_tests.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/column/column_view.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/table/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
#include <cudf/table/table_view.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/iterator/counting_iterator.h>
#include <thrust/transform.h>

#include <cmath>
#include <vector>

using namespace cudf::test;
using namespace cudf::experimental::row;

template <typename T>
struct TypedTableViewTest : public cudf::test::BaseFixture {
};

using NumericTypesNotBool = Concat<IntegralTypesNotBool, FloatingPointTypes>;
TYPED_TEST_SUITE(TypedTableViewTest, NumericTypesNotBool);

template <typename PhysicalElementComparator>
auto self_comparison(cudf::table_view input,
std::vector<cudf::order> const& column_order,
PhysicalElementComparator comparator)
{
rmm::cuda_stream_view stream{};

auto const table_comparator = lexicographic::self_comparator{input, column_order, {}, stream};
auto const less_comparator = table_comparator.less(cudf::nullate::NO{}, comparator);

auto output = cudf::make_numeric_column(
cudf::data_type(cudf::type_id::BOOL8), input.num_rows(), cudf::mask_state::UNALLOCATED);

thrust::transform(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.num_rows()),
thrust::make_counting_iterator(0),
output->mutable_view().data<bool>(),
less_comparator);
return output;
}

template <typename PhysicalElementComparator>
auto two_table_comparison(cudf::table_view lhs,
cudf::table_view rhs,
std::vector<cudf::order> const& column_order,
PhysicalElementComparator comparator)
{
rmm::cuda_stream_view stream{};

auto const table_comparator =
lexicographic::two_table_comparator{lhs, rhs, column_order, {}, stream};
auto const less_comparator = table_comparator.less(cudf::nullate::NO{}, comparator);
auto const lhs_it = cudf::experimental::row::lhs_iterator(0);
auto const rhs_it = cudf::experimental::row::rhs_iterator(0);

auto output = cudf::make_numeric_column(
cudf::data_type(cudf::type_id::BOOL8), lhs.num_rows(), cudf::mask_state::UNALLOCATED);

thrust::transform(rmm::exec_policy(stream),
lhs_it,
lhs_it + lhs.num_rows(),
rhs_it,
output->mutable_view().data<bool>(),
less_comparator);
return output;
}

template <typename PhysicalElementComparator>
auto self_equality(cudf::table_view input,
std::vector<cudf::order> const& column_order,
PhysicalElementComparator comparator)
{
rmm::cuda_stream_view stream{};

auto const table_comparator = equality::self_comparator{input, stream};
auto const equal_comparator =
table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator);

auto output = cudf::make_numeric_column(
cudf::data_type(cudf::type_id::BOOL8), input.num_rows(), cudf::mask_state::UNALLOCATED);

thrust::transform(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.num_rows()),
thrust::make_counting_iterator(0),
output->mutable_view().data<bool>(),
equal_comparator);
return output;
}

template <typename PhysicalElementComparator>
auto two_table_equality(cudf::table_view lhs,
cudf::table_view rhs,
std::vector<cudf::order> const& column_order,
PhysicalElementComparator comparator)
{
rmm::cuda_stream_view stream{};

auto const table_comparator = equality::two_table_comparator{lhs, rhs, stream};
auto const equal_comparator =
table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator);
auto const lhs_it = cudf::experimental::row::lhs_iterator(0);
auto const rhs_it = cudf::experimental::row::rhs_iterator(0);

auto output = cudf::make_numeric_column(
cudf::data_type(cudf::type_id::BOOL8), lhs.num_rows(), cudf::mask_state::UNALLOCATED);

thrust::transform(rmm::exec_policy(stream),
lhs_it,
lhs_it + lhs.num_rows(),
rhs_it,
output->mutable_view().data<bool>(),
equal_comparator);
return output;
}

TYPED_TEST(TypedTableViewTest, TestLexicographicalComparatorTwoTables)
{
using T = TypeParam;

auto const col1 = fixed_width_column_wrapper<T>{{1, 2, 3, 4}};
auto const col2 = fixed_width_column_wrapper<T>{{0, 1, 4, 3}};
auto const column_order = std::vector{cudf::order::DESCENDING};
auto const lhs = cudf::table_view{{col1}};
auto const rhs = cudf::table_view{{col2}};

auto const expected = fixed_width_column_wrapper<bool>{{1, 1, 0, 1}};
auto const got =
two_table_comparison(lhs, rhs, column_order, lexicographic::physical_element_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());

auto const sorting_got = two_table_comparison(
lhs, rhs, column_order, lexicographic::sorting_physical_element_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorting_got->view());
}

TYPED_TEST(TypedTableViewTest, TestLexicographicalComparatorSameTable)
{
using T = TypeParam;

auto const col1 = fixed_width_column_wrapper<T>{{1, 2, 3, 4}};
auto const column_order = std::vector{cudf::order::DESCENDING};
auto const input_table = cudf::table_view{{col1}};

auto const expected = fixed_width_column_wrapper<bool>{{0, 0, 0, 0}};
auto const got =
self_comparison(input_table, column_order, lexicographic::physical_element_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());

auto const sorting_got = self_comparison(
input_table, column_order, lexicographic::sorting_physical_element_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorting_got->view());
}

template <typename T>
struct NaNTableViewTest : public cudf::test::BaseFixture {
};

TYPED_TEST_SUITE(NaNTableViewTest, FloatingPointTypes);

TYPED_TEST(NaNTableViewTest, TestLexicographicalComparatorTwoTableNaNCase)
{
using T = TypeParam;

auto const col1 = fixed_width_column_wrapper<T>{{T(NAN), T(NAN), T(1), T(1)}};
auto const col2 = fixed_width_column_wrapper<T>{{T(NAN), T(1), T(NAN), T(1)}};
auto const column_order = std::vector{cudf::order::DESCENDING};

auto const lhs = cudf::table_view{{col1}};
auto const rhs = cudf::table_view{{col2}};

auto const expected = fixed_width_column_wrapper<bool>{{0, 0, 0, 0}};
auto const got =
two_table_comparison(lhs, rhs, column_order, lexicographic::physical_element_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());

auto const sorting_expected = fixed_width_column_wrapper<bool>{{0, 1, 0, 0}};
auto const sorting_got = two_table_comparison(
lhs, rhs, column_order, lexicographic::sorting_physical_element_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorting_expected, sorting_got->view());
}

TYPED_TEST(NaNTableViewTest, TestEqualityComparatorTwoTableNaNCase)
{
using T = TypeParam;

auto const col1 = fixed_width_column_wrapper<T>{{T(NAN), T(NAN), T(1), T(1)}};
auto const col2 = fixed_width_column_wrapper<T>{{T(NAN), T(1), T(NAN), T(1)}};
auto const column_order = std::vector{cudf::order::DESCENDING};

auto const lhs = cudf::table_view{{col1}};
auto const rhs = cudf::table_view{{col2}};

auto const expected = fixed_width_column_wrapper<bool>{{0, 0, 0, 1}};
auto const got =
two_table_equality(lhs, rhs, column_order, equality::physical_equality_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());

auto const nan_equal_expected = fixed_width_column_wrapper<bool>{{1, 0, 0, 1}};
auto const nan_equal_got =
two_table_equality(lhs, rhs, column_order, equality::nan_equal_physical_equality_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(nan_equal_expected, nan_equal_got->view());
}