From 970cc3c80d8e76cc3dece1f62205eb906466ca1f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 10 Sep 2021 21:20:06 -0600 Subject: [PATCH 1/4] Add tests --- cpp/tests/copying/gather_struct_tests.cpp | 66 +++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/cpp/tests/copying/gather_struct_tests.cpp b/cpp/tests/copying/gather_struct_tests.cpp index 96d72ee4e89..f8f68b26345 100644 --- a/cpp/tests/copying/gather_struct_tests.cpp +++ b/cpp/tests/copying/gather_struct_tests.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -178,6 +179,71 @@ TYPED_TEST(TypedStructGatherTest, TestSimpleStructGather) expect_columns_equivalent(*expected_struct_column, gathered_struct_col); } +TYPED_TEST(TypedStructGatherTest, TestSlicedStructsColumnGatherNoNulls) +{ + using namespace cudf::test; + + auto const structs_original = [] { + auto child1 = + cudf::test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + auto child2 = strings_column_wrapper{ + "One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten"}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const expected = [] { + auto child1 = cudf::test::fixed_width_column_wrapper{6, 10, 8}; + auto child2 = strings_column_wrapper{"Six", "Ten", "Eight"}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const structs = cudf::slice(structs_original, {4, 10})[0]; + auto const gather_map = cudf::test::fixed_width_column_wrapper{1, 5, 3}; + auto const result = cudf::gather(cudf::table_view{{structs}}, gather_map)->get_column(0); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.view(), expected); +} + +TYPED_TEST(TypedStructGatherTest, TestSlicedStructsColumnGatherWithNulls) +{ + using namespace cudf::test; + using namespace cudf::test::iterators; + auto constexpr null = int32_t{0}; // null at child + auto constexpr XXX = int32_t{0}; // null at parent + + auto const structs_original = [] { + auto child1 = cudf::test::fixed_width_column_wrapper{ + {null, XXX, 3, null, null, 6, XXX, null, null, 10}, nulls_at({0, 3, 4, 7, 8})}; + auto child2 = strings_column_wrapper{{"One", + "" /*NULL at both parent and child*/, + "Three", + "" /*NULL*/, + "Five", + "" /*NULL*/, + "" /*NULL at parent*/, + "" /*NULL*/, + "Nine", + "" /*NULL*/}, + nulls_at({1, 3, 5, 7, 9})}; + return structs_column_wrapper{{child1, child2}, nulls_at({1, 6})}; + }(); + + auto const expected = [] { + auto child1 = + cudf::test::fixed_width_column_wrapper{{6, 10, null, XXX}, null_at(2)}; + auto child2 = strings_column_wrapper{{ + "" /*NULL*/, "" /*NULL*/, "Nine", "" /*NULL at parent*/ + }, + nulls_at({0, 1})}; + return structs_column_wrapper{{child1, child2}, null_at(3)}; + }(); + + auto const structs = cudf::slice(structs_original, {4, 10})[0]; + auto const gather_map = cudf::test::fixed_width_column_wrapper{1, 5, 4, 2}; + auto const result = cudf::gather(cudf::table_view{{structs}}, gather_map)->get_column(0); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.view(), expected); +} + TYPED_TEST(TypedStructGatherTest, TestGatherStructOfLists) { using namespace cudf::test; From 378ce991f348454dff677fff011b38e947bfb73a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 10 Sep 2021 23:12:07 -0600 Subject: [PATCH 2/4] Fix gather.cuh --- cpp/include/cudf/detail/gather.cuh | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 1dd0d472d0d..527d1ebbe11 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -450,16 +450,23 @@ struct column_gatherer_impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - structs_column_view structs_column(column); - auto gather_map_size{std::distance(gather_map_begin, gather_map_end)}; + auto const gather_map_size = std::distance(gather_map_begin, gather_map_end); if (gather_map_size == 0) { return empty_like(column); } + // Gathering needs to operate on the sliced children to properly handle the column offset. + std::vector sliced_children; + std::transform(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(column.num_children()), + std::back_inserter(sliced_children), + [structs_view = structs_column_view{column}](auto const idx) { + return structs_view.get_sliced_child(idx); + }); + std::vector> output_struct_members; - std::transform(structs_column.child_begin(), - structs_column.child_end(), + std::transform(sliced_children.begin(), + sliced_children.end(), std::back_inserter(output_struct_members), - [&gather_map_begin, &gather_map_end, nullify_out_of_bounds, stream, mr]( - cudf::column_view const& col) { + [&](auto const& col) { return cudf::type_dispatcher(col.type(), column_gatherer{}, col, @@ -470,14 +477,14 @@ struct column_gatherer_impl { mr); }); - auto const nullable = std::any_of(structs_column.child_begin(), - structs_column.child_end(), + auto const nullable = std::any_of(sliced_children.begin(), + sliced_children.end(), [](auto const& col) { return col.nullable(); }); if (nullable) { gather_bitmask( // Table view of struct column. cudf::table_view{ - std::vector{structs_column.child_begin(), structs_column.child_end()}}, + std::vector{sliced_children.begin(), sliced_children.end()}}, gather_map_begin, output_struct_members, nullify_out_of_bounds ? gather_bitmask_op::NULLIFY : gather_bitmask_op::DONT_CHECK, From dd7491991273515ac9182a432f88449ef00492d8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 10 Sep 2021 23:18:49 -0600 Subject: [PATCH 3/4] Fix comment --- cpp/include/cudf/detail/gather.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 527d1ebbe11..e8f54329c26 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -453,7 +453,8 @@ struct column_gatherer_impl { auto const gather_map_size = std::distance(gather_map_begin, gather_map_end); if (gather_map_size == 0) { return empty_like(column); } - // Gathering needs to operate on the sliced children to properly handle the column offset. + // Gathering needs to operate on the sliced children since they need to take into account the + // offset of the parent structs column. std::vector sliced_children; std::transform(thrust::make_counting_iterator(0), thrust::make_counting_iterator(column.num_children()), From 13b7c0f954516b6347eb46e184110cab3df910fc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 10 Sep 2021 23:23:48 -0600 Subject: [PATCH 4/4] Remove space --- cpp/tests/copying/gather_struct_tests.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/tests/copying/gather_struct_tests.cpp b/cpp/tests/copying/gather_struct_tests.cpp index f8f68b26345..ea027a17d36 100644 --- a/cpp/tests/copying/gather_struct_tests.cpp +++ b/cpp/tests/copying/gather_struct_tests.cpp @@ -200,7 +200,6 @@ TYPED_TEST(TypedStructGatherTest, TestSlicedStructsColumnGatherNoNulls) auto const structs = cudf::slice(structs_original, {4, 10})[0]; auto const gather_map = cudf::test::fixed_width_column_wrapper{1, 5, 3}; auto const result = cudf::gather(cudf::table_view{{structs}}, gather_map)->get_column(0); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.view(), expected); }