Skip to content

Commit

Permalink
stable_distinct public api now has a stream parameter (#16068)
Browse files Browse the repository at this point in the history
As part of #15982 we determined that the cudf  `stable_distinct` public API needs to be updated so that a user provided stream can be provided.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Srinivas Yadav (https://github.com/srinivasyadav18)
  - Bradley Dice (https://github.com/bdice)

URL: #16068
  • Loading branch information
robertmaynard authored Jun 28, 2024
1 parent 6b04fd3 commit 57862a3
Show file tree
Hide file tree
Showing 5 changed files with 240 additions and 4 deletions.
2 changes: 0 additions & 2 deletions cpp/include/cudf/detail/stream_compaction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,6 @@ std::unique_ptr<table> distinct(table_view const& input,

/**
* @copydoc cudf::stable_distinct
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<table> stable_distinct(table_view const& input,
std::vector<size_type> const& keys,
Expand Down
2 changes: 2 additions & 0 deletions cpp/include/cudf/stream_compaction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ std::unique_ptr<column> distinct_indices(
* @param keep Copy any, first, last, or none of the found duplicates
* @param nulls_equal Flag to specify whether null elements should be considered as equal
* @param nans_equal Flag to specify whether NaN elements should be considered as equal
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned table
* @return Table with distinct rows, preserving input order
*/
Expand All @@ -329,6 +330,7 @@ std::unique_ptr<table> stable_distinct(
duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY,
null_equality nulls_equal = null_equality::EQUAL,
nan_equality nans_equal = nan_equality::ALL_EQUAL,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());

/**
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/stream_compaction/stable_distinct.cu
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ std::unique_ptr<table> stable_distinct(table_view const& input,
duplicate_keep_option keep,
null_equality nulls_equal,
nan_equality nans_equal,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::stable_distinct(
input, keys, keep, nulls_equal, nans_equal, cudf::get_default_stream(), mr);
return detail::stable_distinct(input, keys, keep, nulls_equal, nans_equal, stream, mr);
}

} // namespace cudf
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,7 @@ ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_ROLLING_TEST streams/rolling_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_STREAM_COMPACTION_TEST streams/stream_compaction_test.cpp STREAM_MODE testing)
ConfigureTest(
STREAM_STRINGS_TEST
streams/strings/case_test.cpp
Expand Down
235 changes: 235 additions & 0 deletions cpp/tests/streams/stream_compaction_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/iterator_utilities.hpp>
#include <cudf_test/table_utilities.hpp>

#include <cudf/copying.hpp>
#include <cudf/sorting.hpp>
#include <cudf/stream_compaction.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>

#include <cmath>

auto constexpr null{0}; // null at current level
auto constexpr XXX{0}; // null pushed down from parent level
auto constexpr NaN = std::numeric_limits<double>::quiet_NaN();
auto constexpr KEEP_ANY = cudf::duplicate_keep_option::KEEP_ANY;
auto constexpr KEEP_FIRST = cudf::duplicate_keep_option::KEEP_FIRST;
auto constexpr KEEP_LAST = cudf::duplicate_keep_option::KEEP_LAST;
auto constexpr KEEP_NONE = cudf::duplicate_keep_option::KEEP_NONE;
auto constexpr NULL_EQUAL = cudf::null_equality::EQUAL;
auto constexpr NULL_UNEQUAL = cudf::null_equality::UNEQUAL;
auto constexpr NAN_EQUAL = cudf::nan_equality::ALL_EQUAL;
auto constexpr NAN_UNEQUAL = cudf::nan_equality::UNEQUAL;

using int32s_col = cudf::test::fixed_width_column_wrapper<int32_t>;
using floats_col = cudf::test::fixed_width_column_wrapper<float>;

using cudf::nan_policy;
using cudf::null_equality;
using cudf::null_policy;
using cudf::test::iterators::no_nulls;
using cudf::test::iterators::null_at;
using cudf::test::iterators::nulls_at;

struct StableDistinctKeepAny : public cudf::test::BaseFixture {};

struct StableDistinctKeepFirstLastNone : public cudf::test::BaseFixture {};

TEST_F(StableDistinctKeepAny, NoNullsTableWithNaNs)
{
// Column(s) used to test KEEP_ANY needs to have same rows in contiguous
// groups for equivalent keys because KEEP_ANY is nondeterministic.
auto const col1 = int32s_col{6, 6, 6, 1, 1, 1, 3, 5, 8, 5};
auto const col2 = floats_col{6, 6, 6, 1, 1, 1, 3, 4, 9, 4};
auto const keys1 = int32s_col{20, 20, 20, 15, 15, 15, 20, 19, 21, 9};
auto const keys2 = floats_col{19., 19., 19., NaN, NaN, NaN, 20., 20., 9., 21.};

auto const input = cudf::table_view{{col1, col2, keys1, keys2}};
auto const key_idx = std::vector<cudf::size_type>{2, 3};

// NaNs are unequal.
{
auto const exp_col1 = int32s_col{6, 1, 1, 1, 3, 5, 8, 5};
auto const exp_col2 = floats_col{6, 1, 1, 1, 3, 4, 9, 4};
auto const exp_keys1 = int32s_col{20, 15, 15, 15, 20, 19, 21, 9};
auto const exp_keys2 = floats_col{19., NaN, NaN, NaN, 20., 20., 9., 21.};
auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};

auto const result = cudf::stable_distinct(
input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}

// NaNs are equal.
{
auto const exp_col1 = int32s_col{6, 1, 3, 5, 8, 5};
auto const exp_col2 = floats_col{6, 1, 3, 4, 9, 4};
auto const exp_keys1 = int32s_col{20, 15, 20, 19, 21, 9};
auto const exp_keys2 = floats_col{19., NaN, 20., 20., 9., 21.};
auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};

auto const result = cudf::stable_distinct(
input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}
}

TEST_F(StableDistinctKeepAny, InputWithNullsAndNaNs)
{
auto constexpr null{0.0}; // shadow the global `null` variable of type int

// Column(s) used to test KEEP_ANY needs to have same rows in contiguous
// groups for equivalent keys because KEEP_ANY is nondeterministic.
auto const col = int32s_col{5, 4, 4, 1, 1, 1, 8, 8, 1};
auto const keys = floats_col{{20., null, null, NaN, NaN, NaN, 19., 19., 21.}, nulls_at({1, 2})};
auto const input = cudf::table_view{{col, keys}};
auto const key_idx = std::vector<cudf::size_type>{1};

// Nulls are equal, NaNs are unequal.
{
auto const exp_col = int32s_col{5, 4, 1, 1, 1, 8, 1};
auto const exp_keys = floats_col{{20., null, NaN, NaN, NaN, 19., 21.}, null_at(1)};
auto const expected = cudf::table_view{{exp_col, exp_keys}};

auto const result = cudf::stable_distinct(
input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}

// Nulls are equal, NaNs are equal.
{
auto const exp_col = int32s_col{5, 4, 1, 8, 1};
auto const exp_keys = floats_col{{20., null, NaN, 19., 21.}, null_at(1)};
auto const expected = cudf::table_view{{exp_col, exp_keys}};

auto const result = cudf::stable_distinct(
input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}

// Nulls are unequal, NaNs are unequal.
{
auto const exp_col = int32s_col{5, 4, 4, 1, 1, 1, 8, 1};
auto const exp_keys = floats_col{{20., null, null, NaN, NaN, NaN, 19., 21.}, nulls_at({1, 2})};
auto const expected = cudf::table_view{{exp_col, exp_keys}};

auto const result = cudf::stable_distinct(
input, key_idx, KEEP_ANY, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}

// Nulls are unequal, NaNs are equal.
{
auto const exp_col = int32s_col{5, 4, 4, 1, 8, 1};
auto const exp_keys = floats_col{{20., null, null, NaN, 19., 21.}, nulls_at({1, 2})};
auto const expected = cudf::table_view{{exp_col, exp_keys}};

auto const result = cudf::stable_distinct(
input, key_idx, KEEP_ANY, NULL_UNEQUAL, NAN_EQUAL, cudf::test::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}
}

TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsEqual)
{
// Column(s) used to test needs to have different rows for the same keys.
auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6};
auto const keys = floats_col{20., NaN, NaN, 19., 21., 19., 22.};
auto const input = cudf::table_view{{col, keys}};
auto const key_idx = std::vector<cudf::size_type>{1};

// KEEP_FIRST
{
auto const exp_col = int32s_col{0, 1, 3, 4, 6};
auto const exp_keys = floats_col{20., NaN, 19., 21., 22.};
auto const expected = cudf::table_view{{exp_col, exp_keys}};

auto const result = cudf::stable_distinct(
input, key_idx, KEEP_FIRST, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}

// KEEP_LAST
{
auto const exp_col = int32s_col{0, 2, 4, 5, 6};
auto const exp_keys = floats_col{20., NaN, 21., 19., 22.};
auto const expected = cudf::table_view{{exp_col, exp_keys}};

auto const result = cudf::stable_distinct(
input, key_idx, KEEP_LAST, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}

// KEEP_NONE
{
auto const exp_col = int32s_col{0, 4, 6};
auto const exp_keys = floats_col{20., 21., 22.};
auto const expected = cudf::table_view{{exp_col, exp_keys}};

auto const result = cudf::stable_distinct(
input, key_idx, KEEP_NONE, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}
}

TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsUnequal)
{
// Column(s) used to test needs to have different rows for the same keys.
auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6, 7};
auto const keys = floats_col{20., NaN, NaN, 19., 21., 19., 22., 20.};
auto const input = cudf::table_view{{col, keys}};
auto const key_idx = std::vector<cudf::size_type>{1};

// KEEP_FIRST
{
auto const exp_col = int32s_col{0, 1, 2, 3, 4, 6};
auto const exp_keys = floats_col{20., NaN, NaN, 19., 21., 22.};
auto const expected = cudf::table_view{{exp_col, exp_keys}};

auto const result = cudf::stable_distinct(
input, key_idx, KEEP_FIRST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}

// KEEP_LAST
{
auto const exp_col = int32s_col{1, 2, 4, 5, 6, 7};
auto const exp_keys = floats_col{NaN, NaN, 21., 19., 22., 20.};
auto const expected = cudf::table_view{{exp_col, exp_keys}};

auto const result = cudf::stable_distinct(
input, key_idx, KEEP_LAST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}

// KEEP_NONE
{
auto const exp_col = int32s_col{1, 2, 4, 6};
auto const exp_keys = floats_col{NaN, NaN, 21., 22.};
auto const expected = cudf::table_view{{exp_col, exp_keys}};

auto const result = cudf::stable_distinct(
input, key_idx, KEEP_NONE, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}
}

0 comments on commit 57862a3

Please sign in to comment.