Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix gather for sliced input structs column #9218

Merged
merged 5 commits into from
Sep 17, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions cpp/include/cudf/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -450,16 +450,24 @@ struct column_gatherer_impl<struct_view> {
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
structs_column_view structs_column(column);
auto gather_map_size{std::distance(gather_map_begin, gather_map_end)};
auto const gather_map_size = std::distance(gather_map_begin, gather_map_end);
if (gather_map_size == 0) { return empty_like(column); }

// Gathering needs to operate on the sliced children since they need to take into account the
// offset of the parent structs column.
std::vector<cudf::column_view> sliced_children;
std::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(column.num_children()),
std::back_inserter(sliced_children),
[structs_view = structs_column_view{column}](auto const idx) {
return structs_view.get_sliced_child(idx);
});

std::vector<std::unique_ptr<cudf::column>> output_struct_members;
std::transform(structs_column.child_begin(),
structs_column.child_end(),
std::transform(sliced_children.begin(),
sliced_children.end(),
std::back_inserter(output_struct_members),
[&gather_map_begin, &gather_map_end, nullify_out_of_bounds, stream, mr](
cudf::column_view const& col) {
[&](auto const& col) {
return cudf::type_dispatcher<dispatch_storage_type>(col.type(),
column_gatherer{},
col,
Expand All @@ -470,14 +478,14 @@ struct column_gatherer_impl<struct_view> {
mr);
});

auto const nullable = std::any_of(structs_column.child_begin(),
structs_column.child_end(),
auto const nullable = std::any_of(sliced_children.begin(),
sliced_children.end(),
[](auto const& col) { return col.nullable(); });
if (nullable) {
gather_bitmask(
// Table view of struct column.
cudf::table_view{
std::vector<cudf::column_view>{structs_column.child_begin(), structs_column.child_end()}},
std::vector<cudf::column_view>{sliced_children.begin(), sliced_children.end()}},
gather_map_begin,
output_struct_members,
nullify_out_of_bounds ? gather_bitmask_op::NULLIFY : gather_bitmask_op::DONT_CHECK,
Expand Down
65 changes: 65 additions & 0 deletions cpp/tests/copying/gather_struct_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/cudf_gtest.hpp>
#include <cudf_test/iterator_utilities.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/column/column_factories.hpp>
Expand Down Expand Up @@ -178,6 +179,70 @@ TYPED_TEST(TypedStructGatherTest, TestSimpleStructGather)
expect_columns_equivalent(*expected_struct_column, gathered_struct_col);
}

TYPED_TEST(TypedStructGatherTest, TestSlicedStructsColumnGatherNoNulls)
{
using namespace cudf::test;

auto const structs_original = [] {
auto child1 =
cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
auto child2 = strings_column_wrapper{
"One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten"};
return structs_column_wrapper{{child1, child2}};
}();

auto const expected = [] {
auto child1 = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>{6, 10, 8};
auto child2 = strings_column_wrapper{"Six", "Ten", "Eight"};
return structs_column_wrapper{{child1, child2}};
}();

auto const structs = cudf::slice(structs_original, {4, 10})[0];
auto const gather_map = cudf::test::fixed_width_column_wrapper<int32_t>{1, 5, 3};
auto const result = cudf::gather(cudf::table_view{{structs}}, gather_map)->get_column(0);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.view(), expected);
}

TYPED_TEST(TypedStructGatherTest, TestSlicedStructsColumnGatherWithNulls)
{
using namespace cudf::test;
using namespace cudf::test::iterators;
auto constexpr null = int32_t{0}; // null at child
auto constexpr XXX = int32_t{0}; // null at parent

auto const structs_original = [] {
auto child1 = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>{
{null, XXX, 3, null, null, 6, XXX, null, null, 10}, nulls_at({0, 3, 4, 7, 8})};
auto child2 = strings_column_wrapper{{"One",
"" /*NULL at both parent and child*/,
"Three",
"" /*NULL*/,
"Five",
"" /*NULL*/,
"" /*NULL at parent*/,
"" /*NULL*/,
"Nine",
"" /*NULL*/},
nulls_at({1, 3, 5, 7, 9})};
return structs_column_wrapper{{child1, child2}, nulls_at({1, 6})};
}();

auto const expected = [] {
auto child1 =
cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>{{6, 10, null, XXX}, null_at(2)};
auto child2 = strings_column_wrapper{{
"" /*NULL*/, "" /*NULL*/, "Nine", "" /*NULL at parent*/
},
nulls_at({0, 1})};
return structs_column_wrapper{{child1, child2}, null_at(3)};
}();

auto const structs = cudf::slice(structs_original, {4, 10})[0];
auto const gather_map = cudf::test::fixed_width_column_wrapper<int32_t>{1, 5, 4, 2};
auto const result = cudf::gather(cudf::table_view{{structs}}, gather_map)->get_column(0);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.view(), expected);
}

TYPED_TEST(TypedStructGatherTest, TestGatherStructOfLists)
{
using namespace cudf::test;
Expand Down