Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate lists/contains to pylibcudf #15981

Merged
merged 20 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 20 additions & 54 deletions python/cudf/cudf/_lib/lists.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,6 @@ from libcpp.utility cimport move
from cudf._lib.column cimport Column
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.lists.contains cimport (
contains,
index_of as cpp_index_of,
)
from cudf._lib.pylibcudf.libcudf.lists.count_elements cimport (
count_elements as cpp_count_elements,
)
Expand All @@ -26,15 +22,13 @@ from cudf._lib.pylibcudf.libcudf.lists.sorting cimport (
from cudf._lib.pylibcudf.libcudf.lists.stream_compaction cimport (
distinct as cpp_distinct,
)
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.types cimport (
nan_equality,
null_equality,
null_order,
order,
size_type,
)
from cudf._lib.scalar cimport DeviceScalar
from cudf._lib.utils cimport columns_from_pylibcudf_table

from cudf._lib import pylibcudf
Expand Down Expand Up @@ -153,64 +147,36 @@ def extract_element_column(Column col, Column index):


@acquire_spill_lock()
def contains_scalar(Column col, object py_search_key):

cdef DeviceScalar search_key = py_search_key.device_value

cdef shared_ptr[lists_column_view] list_view = (
make_shared[lists_column_view](col.view())
def contains_scalar(Column col, py_search_key):
return Column.from_pylibcudf(
pylibcudf.lists.contains(
col.to_pylibcudf(mode="read"),
py_search_key.device_value,
)
)
cdef const scalar* search_key_value = search_key.get_raw_ptr()

cdef unique_ptr[column] c_result

with nogil:
c_result = move(contains(
list_view.get()[0],
search_key_value[0],
))
result = Column.from_unique_ptr(move(c_result))
return result


@acquire_spill_lock()
def index_of_scalar(Column col, object py_search_key):

cdef DeviceScalar search_key = py_search_key.device_value

cdef shared_ptr[lists_column_view] list_view = (
make_shared[lists_column_view](col.view())
def index_of_scalar(Column col, object py_search_key, find_first_option=True):
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
return Column.from_pylibcudf(
pylibcudf.lists.index_of(
col.to_pylibcudf(mode="read"),
py_search_key.device_value,
find_first_option,
)
)
cdef const scalar* search_key_value = search_key.get_raw_ptr()

cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_index_of(
list_view.get()[0],
search_key_value[0],
))
return Column.from_unique_ptr(move(c_result))


@acquire_spill_lock()
def index_of_column(Column col, Column search_keys):

cdef column_view keys_view = search_keys.view()

cdef shared_ptr[lists_column_view] list_view = (
make_shared[lists_column_view](col.view())
def index_of_column(Column col, Column search_keys, find_first_option=True):
return Column.from_pylibcudf(
pylibcudf.lists.index_of(
col.to_pylibcudf(mode="read"),
search_keys.to_pylibcudf(mode="read"),
find_first_option,
)
)

cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_index_of(
list_view.get()[0],
keys_view,
))
return Column.from_unique_ptr(move(c_result))


@acquire_spill_lock()
def concatenate_rows(list source_columns):
Expand Down
29 changes: 23 additions & 6 deletions python/cudf/cudf/_lib/pylibcudf/libcudf/lists/contains.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

from libc.stdint cimport int32_t
from libcpp.memory cimport unique_ptr

from cudf._lib.exception_handler cimport cudf_exception_handler
Expand All @@ -12,17 +13,33 @@ from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar


cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil:

cpdef enum class duplicate_find_option(int32_t):
FIND_FIRST "cudf::lists::duplicate_find_option::FIND_FIRST"
FIND_LAST "cudf::lists::duplicate_find_option::FIND_LAST"
Matt711 marked this conversation as resolved.
Show resolved Hide resolved

cdef unique_ptr[column] contains(
lists_column_view lists,
scalar search_key,
const lists_column_view& lists,
const scalar& search_key,
) except +cudf_exception_handler

cdef unique_ptr[column] contains(
const lists_column_view& lists,
const column_view& search_keys,
) except +cudf_exception_handler

cdef unique_ptr[column] contains_nulls(
const lists_column_view& lists,
) except +cudf_exception_handler

cdef unique_ptr[column] index_of(
lists_column_view lists,
scalar search_key,
const lists_column_view& lists,
const scalar& search_key,
duplicate_find_option find_option,
) except +cudf_exception_handler

cdef unique_ptr[column] index_of(
lists_column_view lists,
column_view search_keys,
const lists_column_view& lists,
const column_view& search_keys,
duplicate_find_option find_option,
) except +cudf_exception_handler
11 changes: 11 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/lists.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,24 @@
from libcpp cimport bool

from cudf._lib.pylibcudf.libcudf.types cimport size_type
from cudf._lib.scalar cimport DeviceScalar

from .column cimport Column
from .scalar cimport Scalar
from .table cimport Table

ctypedef fused ColumnOrScalar:
Column
DeviceScalar

cpdef Table explode_outer(Table, size_type explode_column_idx)

cpdef Column concatenate_rows(Table)

cpdef Column concatenate_list_elements(Column, bool dropna)

cpdef Column contains(Column, ColumnOrScalar)

cpdef Column contains_nulls(Column)

cpdef Column index_of(Column, ColumnOrScalar, bool)
126 changes: 120 additions & 6 deletions python/cudf/cudf/_lib/pylibcudf/lists.pyx
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.memory cimport make_shared, shared_ptr, unique_ptr
from libcpp.utility cimport move

from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.lists cimport explode as cpp_explode
from cudf._lib.pylibcudf.libcudf.lists cimport (
contains as cpp_contains,
explode as cpp_explode,
)
from cudf._lib.pylibcudf.libcudf.lists.combine cimport (
concatenate_list_elements as cpp_concatenate_list_elements,
concatenate_null_policy,
concatenate_rows as cpp_concatenate_rows,
)
from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
lists_column_view,
)
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.types cimport size_type

Expand Down Expand Up @@ -71,15 +78,15 @@ cpdef Column concatenate_list_elements(Column input, bool dropna):
----------
input : Column
The input column
dropna : bool
If true, null list elements will be ignored
from concatenation. Otherwise any input null values will result in
the corresponding output row being set to null.

Returns
-------
Column
A new Column of concatenated list elements
dropna : bool
If true, null list elements will be ignored
from concatenation. Otherwise any input null values will result in
the corresponding output row being set to null.
"""
cdef concatenate_null_policy null_policy = (
concatenate_null_policy.IGNORE if dropna
Expand All @@ -94,3 +101,110 @@ cpdef Column concatenate_list_elements(Column input, bool dropna):
))

return Column.from_libcudf(move(c_result))


cpdef Column contains(Column input, ColumnOrScalar search_key):
"""Create a column of bool values based upon the search key.

Matt711 marked this conversation as resolved.
Show resolved Hide resolved
Parameters
----------
input : Column
The input column.
search_key : Union[Column, Scalar]
The search key.

Returns
-------
Column
A new Column of bools
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
"""
cdef unique_ptr[column] c_result
cdef shared_ptr[lists_column_view] list_view = (
make_shared[lists_column_view](input.view())
)
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
cdef const scalar* search_key_value = NULL
Matt711 marked this conversation as resolved.
Show resolved Hide resolved

if ColumnOrScalar is Column:
with nogil:
c_result = move(cpp_contains.contains(
list_view.get()[0],
search_key.view(),
))
else:
search_key_value = search_key.get_raw_ptr()
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
with nogil:
c_result = move(cpp_contains.contains(
list_view.get()[0],
search_key_value[0],
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
))
return Column.from_libcudf(move(c_result))


cpdef Column contains_nulls(Column input):
"""Create a column of bool values indicating whether
each row in the lists column contains a null value.

Parameters
----------
input : Column
The input column.

Returns
-------
Column
A new Column of bools
"""
cdef unique_ptr[column] c_result
cdef shared_ptr[lists_column_view] list_view = (
make_shared[lists_column_view](input.view())
)
with nogil:
c_result = move(cpp_contains.contains_nulls(list_view.get()[0]))
return Column.from_libcudf(move(c_result))


cpdef Column index_of(Column input, ColumnOrScalar search_key, bool find_first_option):
"""Create a column of values indicating the position of a search
key row within the corresponding list row in the lists column.

Parameters
----------
input : Column
The input column.
search_key : Union[Column, Scalar]
The search key.
find_first_option : bool
If true, index_of returns the first match.
Otherwise the last match is returned.

Returns
-------
Column
A new Column of bools
"""
cdef unique_ptr[column] c_result
cdef shared_ptr[lists_column_view] list_view = (
make_shared[lists_column_view](input.view())
)
cdef const scalar* search_key_value = NULL
cdef cpp_contains.duplicate_find_option find_option = (
cpp_contains.duplicate_find_option.FIND_FIRST if find_first_option
else cpp_contains.duplicate_find_option.FIND_LAST
)

if ColumnOrScalar is Column:
with nogil:
c_result = move(cpp_contains.index_of(
list_view.get()[0],
search_key.view(),
find_option,
))
else:
search_key_value = search_key.get_raw_ptr()
with nogil:
c_result = move(cpp_contains.index_of(
list_view.get()[0],
search_key_value[0],
find_option,
))
return Column.from_libcudf(move(c_result))
Loading
Loading