Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[gpuCI] Forward-merge branch-22.06 to branch-22.08 [skip gpuci] #10902

Merged
merged 1 commit into from
May 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions cpp/include/cudf_test/column_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1441,7 +1441,44 @@ class lists_column_wrapper : public detail::column_wrapper {
build_from_nested(elements, validity);
}

/**
* @brief Construct a list column containing a single empty, optionally null row.
*
* @param valid Whether or not the empty row is also null
*/
static lists_column_wrapper<T> make_one_empty_row_column(bool valid = true)
{
cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets{0, 0};
cudf::test::fixed_width_column_wrapper<int> values{};
return lists_column_wrapper<T>(
1,
offsets.release(),
values.release(),
valid ? 0 : 1,
valid ? rmm::device_buffer{} : cudf::create_null_mask(1, cudf::mask_state::ALL_NULL));
}

private:
/**
* @brief Construct a list column from constituent parts.
*
* @param num_rows The number of lists the column represents
* @param offsets The column of offset values for this column
* @param values The column of values bounded by the offsets
* @param null_count The number of null list entries
* @param null_mask The bits specifying the null lists in device memory
*/
lists_column_wrapper(size_type num_rows,
std::unique_ptr<cudf::column>&& offsets,
std::unique_ptr<cudf::column>&& values,
size_type null_count,
rmm::device_buffer&& null_mask)
{
// construct the list column
wrapped = make_lists_column(
num_rows, std::move(offsets), std::move(values), null_count, std::move(null_mask));
}

/**
* @brief Initialize as a nested list column composed of other list columns.
*
Expand Down
3 changes: 2 additions & 1 deletion cpp/tests/quantiles/percentile_approx_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,8 @@ TEST_F(PercentileApproxTest, EmptyInput)
3,
cudf::test::detail::make_null_mask(nulls.begin(), nulls.end()));

CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected);
// TODO: change percentile_approx to produce sanitary list outputs for this case.
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
}

TEST_F(PercentileApproxTest, EmptyPercentiles)
Expand Down
86 changes: 60 additions & 26 deletions cpp/tests/utilities/column_utilities.cu
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,50 @@ namespace test {

namespace {

// expand all non-null rows in a list column into a column of child row indices.
std::unique_ptr<column> generate_all_row_indices(size_type num_rows)
{
auto indices =
cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows, mask_state::UNALLOCATED);
thrust::sequence(rmm::exec_policy(),
indices->mutable_view().begin<size_type>(),
indices->mutable_view().end<size_type>(),
0);
return indices;
}

// generate the rows indices that should be checked for the child column of a list column.
//
// - if we are just checking for equivalence, we can skip any rows that are nulls. this allows
// things like non-empty rows that have been nullified after creation. they may actually contain
// values, but since the row is null they don't matter for equivalency.
//
// - if we are checking for exact equality, we need to check all rows.
//
// This allows us to differentiate between:
//
// List<int32_t>:
// Length : 1
// Offsets : 0, 4
// Null count: 1
// 0
// 0, 1, 2, 3
//
// List<int32_t>:
// Length : 1
// Offsets : 0, 0
// Null count: 1
// 0
//
std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
column_view const& row_indices)
column_view const& row_indices,
bool check_exact_equality)
{
// if we are checking for exact equality, we should be checking for "unsanitized" data that may
// be hiding underneath nulls. so check all rows instead of just non-null rows
if (check_exact_equality) {
return generate_all_row_indices(c.get_sliced_child(rmm::cuda_stream_default).size());
}

// Example input
// List<int32_t>:
// Length : 7
Expand Down Expand Up @@ -280,13 +320,16 @@ struct column_property_comparator {
cudf::lists_column_view rhs_l(rhs);

// recurse
auto lhs_child = lhs_l.get_sliced_child(rmm::cuda_stream_default);
// note: if a column is all nulls or otherwise empty, no indices are generated and no recursion
// happens
auto lhs_child_indices = generate_child_row_indices(lhs_l, lhs_row_indices);

// note: if a column is all nulls (and we are checking for exact equality) or otherwise empty,
// no indices are generated and no recursion happens
auto lhs_child_indices =
generate_child_row_indices(lhs_l, lhs_row_indices, check_exact_equality);
if (lhs_child_indices->size() > 0) {
auto rhs_child = rhs_l.get_sliced_child(rmm::cuda_stream_default);
auto rhs_child_indices = generate_child_row_indices(rhs_l, rhs_row_indices);
auto lhs_child = lhs_l.get_sliced_child(rmm::cuda_stream_default);
auto rhs_child = rhs_l.get_sliced_child(rmm::cuda_stream_default);
auto rhs_child_indices =
generate_child_row_indices(rhs_l, rhs_row_indices, check_exact_equality);
return cudf::type_dispatcher(lhs_child.type(),
column_property_comparator<check_exact_equality>{},
lhs_child,
Expand Down Expand Up @@ -647,14 +690,16 @@ struct column_comparator_impl<list_view, check_exact_equality> {
return false;
}

// recurse.
auto lhs_child = lhs_l.get_sliced_child(rmm::cuda_stream_default);
// note: if a column is all nulls or otherwise empty, no indices are generated and no recursion
// happens
auto lhs_child_indices = generate_child_row_indices(lhs_l, lhs_row_indices);
// recurse
// note: if a column is all nulls (and we are only checking for equivalence) or otherwise empty,
// no indices are generated and no recursion happens
auto lhs_child_indices =
generate_child_row_indices(lhs_l, lhs_row_indices, check_exact_equality);
if (lhs_child_indices->size() > 0) {
auto rhs_child = rhs_l.get_sliced_child(rmm::cuda_stream_default);
auto rhs_child_indices = generate_child_row_indices(rhs_l, rhs_row_indices);
auto lhs_child = lhs_l.get_sliced_child(rmm::cuda_stream_default);
auto rhs_child = rhs_l.get_sliced_child(rmm::cuda_stream_default);
auto rhs_child_indices =
generate_child_row_indices(rhs_l, rhs_row_indices, check_exact_equality);
return cudf::type_dispatcher(lhs_child.type(),
column_comparator<check_exact_equality>{},
lhs_child,
Expand Down Expand Up @@ -733,17 +778,6 @@ struct column_comparator {
}
};

std::unique_ptr<column> generate_all_row_indices(size_type num_rows)
{
auto indices =
cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows, mask_state::UNALLOCATED);
thrust::sequence(rmm::exec_policy(),
indices->mutable_view().begin<size_type>(),
indices->mutable_view().end<size_type>(),
0);
return indices;
}

} // namespace

/**
Expand Down
32 changes: 32 additions & 0 deletions cpp/tests/utilities_tests/column_utilities_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,38 @@ TEST_F(ColumnUtilitiesListsTest, Equivalence)
}
}

TEST_F(ColumnUtilitiesListsTest, UnsanitaryLists)
{
// unsanitary
//
// List<int32_t>:
// Length : 1
// Offsets : 0, 3
// Null count: 1
// 0
// 0, 1, 2
cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets{0, 3};
cudf::test::fixed_width_column_wrapper<int> values{0, 1, 2};
auto l0 = cudf::make_lists_column(1,
offsets.release(),
values.release(),
1,
cudf::create_null_mask(1, cudf::mask_state::ALL_NULL));

// sanitary
//
// List<int32_t>:
// Length : 1
// Offsets : 0, 0
// Null count: 1
// 0
auto l1 = cudf::test::lists_column_wrapper<int>::make_one_empty_row_column(false);

// equivalent, but not equal
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*l0, l1);
EXPECT_FALSE(cudf::test::expect_columns_equal(*l0, l1, cudf::test::debug_output_level::QUIET));
}

TEST_F(ColumnUtilitiesListsTest, DifferentPhysicalStructure)
{
// list<int>
Expand Down