Skip to content

Commit

Permalink
Use distinct in Cython
Browse files Browse the repository at this point in the history
  • Loading branch information
ttnghia committed Jun 9, 2022
1 parent 374545a commit 9bf540a
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 16 deletions.
6 changes: 6 additions & 0 deletions python/cudf/cudf/_lib/cpp/stream_compaction.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" \
duplicate_keep_option keep,
null_equality nulls_equal) except +

cdef unique_ptr[table] distinct(
table_view source_table,
vector[size_type] keys,
duplicate_keep_option keep,
null_equality nulls_equal) except +

cdef size_type distinct_count(
column_view source_table,
null_policy null_handling,
Expand Down
19 changes: 3 additions & 16 deletions python/cudf/cudf/_lib/stream_compaction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@ from libcpp.vector cimport vector

from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.sorting cimport stable_sort_by_key as cpp_stable_sort_by_key
from cudf._lib.cpp.stream_compaction cimport (
apply_boolean_mask as cpp_apply_boolean_mask,
distinct_count as cpp_distinct_count,
drop_nulls as cpp_drop_nulls,
duplicate_keep_option,
unique as cpp_unique,
distinct as cpp_distinct,
)
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
Expand Down Expand Up @@ -167,21 +166,9 @@ def drop_duplicates(list columns,
cdef unique_ptr[table] c_result

with nogil:
# cudf::unique keeps unique rows in each consecutive group of
# equivalent rows. To match the behavior of pandas.DataFrame.
# drop_duplicates, users need to stable sort the input first
# and then invoke cudf::unique.
sorted_source_table = move(
cpp_stable_sort_by_key(
source_table_view,
keys_view,
column_order,
null_precedence
)
)
c_result = move(
cpp_unique(
sorted_source_table.get().view(),
cpp_distinct(
source_table_view,
cpp_keys,
cpp_keep_option,
cpp_nulls_equal
Expand Down

0 comments on commit 9bf540a

Please sign in to comment.