Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move unnecessary utilities from cudf._lib.scalar #17636

Merged
merged 6 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions python/cudf/cudf/_lib/scalar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -263,26 +263,3 @@ cdef class DeviceScalar:
self._dtype = LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
<underlying_type_t_type_id>(cdtype.id())
]


def as_device_scalar(val, dtype=None):
if isinstance(val, (cudf.Scalar, DeviceScalar)):
if dtype == val.dtype or dtype is None:
if isinstance(val, DeviceScalar):
return val
else:
return val.device_value
else:
raise TypeError("Can't update dtype of existing GPU scalar")
else:
return cudf.Scalar(val, dtype=dtype).device_value


def _is_null_host_scalar(slr):
if cudf.utils.utils.is_na_like(slr):
return True
elif (isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr)) or \
slr is pd.NaT:
return True
else:
return False
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ def ordered(self) -> bool:
def __setitem__(self, key, value):
if cudf.api.types.is_scalar(
value
) and cudf._lib.scalar._is_null_host_scalar(value):
) and cudf.utils.utils._is_null_host_scalar(value):
to_add_categories = 0
else:
if cudf.api.types.is_scalar(value):
Expand Down
15 changes: 6 additions & 9 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import cudf
from cudf import _lib as libcudf
from cudf._lib.column import Column
from cudf._lib.scalar import as_device_scalar
from cudf._lib.types import dtype_to_pylibcudf_type, size_type_dtype
from cudf.api.types import (
_is_non_decimal_numeric_dtype,
Expand Down Expand Up @@ -71,7 +70,7 @@
min_signed_type,
min_unsigned_type,
)
from cudf.utils.utils import _array_ufunc, mask_dtype
from cudf.utils.utils import _array_ufunc, _is_null_host_scalar, mask_dtype

if TYPE_CHECKING:
import builtins
Expand Down Expand Up @@ -777,9 +776,7 @@ def fillna(
if not self.has_nulls(include_nan=True):
return self.copy()
elif method is None:
if is_scalar(fill_value) and libcudf.scalar._is_null_host_scalar(
fill_value
):
if is_scalar(fill_value) and _is_null_host_scalar(fill_value):
return self.copy()
else:
fill_value = self._validate_fillna_value(fill_value)
Expand Down Expand Up @@ -1984,12 +1981,12 @@ def as_column(
column = Column.from_pylibcudf(
plc.filling.sequence(
len(arbitrary),
as_device_scalar(
cudf.Scalar(
arbitrary.start, dtype=np.dtype(np.int64)
).c_value,
as_device_scalar(
).device_value.c_value,
cudf.Scalar(
arbitrary.step, dtype=np.dtype(np.int64)
).c_value,
).device_value.c_value,
)
)
if cudf.get_option("default_integer_bitwidth") and dtype is None:
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def from_sequences(

# Build Data, Mask & Offsets
for data in arbitrary:
if cudf._lib.scalar._is_null_host_scalar(data):
if cudf.utils.utils._is_null_host_scalar(data):
mask_col.append(False)
offset_vals.append(offset)
else:
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def __setitem__(self, key: Any, value: Any):
cudf.Scalar(
value,
dtype=self.dtype
if cudf._lib.scalar._is_null_host_scalar(value)
if cudf.utils.utils._is_null_host_scalar(value)
else None,
)
if is_scalar(value)
Expand Down Expand Up @@ -789,7 +789,7 @@ def _normalize_find_and_replace_input(
)
# Scalar case
if len(col_to_normalize) == 1:
if cudf._lib.scalar._is_null_host_scalar(col_to_normalize[0]):
if cudf.utils.utils._is_null_host_scalar(col_to_normalize[0]):
return normalized_column.astype(input_column_dtype)
if np.isinf(col_to_normalize[0]):
return normalized_column
Expand Down
8 changes: 6 additions & 2 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,11 @@
min_signed_type,
)
from cudf.utils.performance_tracking import _performance_tracking
from cudf.utils.utils import GetAttrGetItemMixin, _external_only_api
from cudf.utils.utils import (
GetAttrGetItemMixin,
_external_only_api,
_is_null_host_scalar,
)

if TYPE_CHECKING:
from cudf._typing import ColumnLike, Dtype, NotImplementedType
Expand Down Expand Up @@ -3371,7 +3375,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
if isinstance(value, (np.ndarray, cupy.ndarray)):
dtype = value.dtype
value = value.item()
if libcudf.scalar._is_null_host_scalar(value):
if _is_null_host_scalar(value):
dtype = "str"
value = as_column(
value,
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,13 @@ def dtype(self):
def is_valid(self):
if not self._is_host_value_current:
self._device_value_to_host()
return not cudf._lib.scalar._is_null_host_scalar(self._host_value)
return not cudf.utils.utils._is_null_host_scalar(self._host_value)

def _device_value_to_host(self):
self._host_value = self._device_value._to_host_scalar()

def _preprocess_host_value(self, value, dtype):
valid = not cudf._lib.scalar._is_null_host_scalar(value)
valid = not cudf.utils.utils._is_null_host_scalar(value)

if isinstance(value, list):
if dtype is not None:
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/utils/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def to_cudf_compatible_scalar(val, dtype=None):
If `val` is None, returns None.
"""

if cudf._lib.scalar._is_null_host_scalar(val) or isinstance(
if cudf.utils.utils._is_null_host_scalar(val) or isinstance(
val, cudf.Scalar
):
return val
Expand Down
11 changes: 11 additions & 0 deletions python/cudf/cudf/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,17 @@ def is_na_like(obj):
return obj is None or obj is cudf.NA or obj is cudf.NaT


def _is_null_host_scalar(slr) -> bool:
if is_na_like(slr):
return True
elif (
isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr)
) or slr is pd.NaT:
return True
else:
return False
Copy link
Contributor

@bdice bdice Dec 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Try to combine this into something like:

return is_na_like(slr) or (isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr) or slr is pd.NaT

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or if you leave it the long way, maybe add comments that help explain the second case.



def _warn_no_dask_cudf(fn):
@functools.wraps(fn)
def wrapper(self):
Expand Down
Loading