Skip to content

Commit

Permalink
Move unnecessary utilities from cudf._lib.scalar (#17636)
Browse files Browse the repository at this point in the history
In preparation for transitioning `DeviceScalar` to pylibcudf's `Scalar`, moving `_is_null_host_scalar` (a pure Python function) to `cudf.utils.utils` and removes `as_device_scalar` in favor of going through `cudf.Scalar`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: #17636
  • Loading branch information
mroeschke authored Jan 7, 2025
1 parent 7182745 commit a0487be
Show file tree
Hide file tree
Showing 9 changed files with 28 additions and 41 deletions.
23 changes: 0 additions & 23 deletions python/cudf/cudf/_lib/scalar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -260,26 +260,3 @@ cdef class DeviceScalar:
self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
<underlying_type_t_type_id>(cdtype_id)
]


def as_device_scalar(val, dtype=None):
if isinstance(val, (cudf.Scalar, DeviceScalar)):
if dtype == val.dtype or dtype is None:
if isinstance(val, DeviceScalar):
return val
else:
return val.device_value
else:
raise TypeError("Can't update dtype of existing GPU scalar")
else:
return cudf.Scalar(val, dtype=dtype).device_value


def _is_null_host_scalar(slr):
if cudf.utils.utils.is_na_like(slr):
return True
elif (isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr)) or \
slr is pd.NaT:
return True
else:
return False
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ def ordered(self) -> bool:
def __setitem__(self, key, value):
if cudf.api.types.is_scalar(
value
) and cudf._lib.scalar._is_null_host_scalar(value):
) and cudf.utils.utils._is_null_host_scalar(value):
to_add_categories = 0
else:
if cudf.api.types.is_scalar(value):
Expand Down
15 changes: 6 additions & 9 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import cudf
from cudf import _lib as libcudf
from cudf._lib.column import Column
from cudf._lib.scalar import as_device_scalar
from cudf._lib.types import dtype_to_pylibcudf_type, size_type_dtype
from cudf.api.types import (
_is_non_decimal_numeric_dtype,
Expand Down Expand Up @@ -71,7 +70,7 @@
min_signed_type,
min_unsigned_type,
)
from cudf.utils.utils import _array_ufunc, mask_dtype
from cudf.utils.utils import _array_ufunc, _is_null_host_scalar, mask_dtype

if TYPE_CHECKING:
import builtins
Expand Down Expand Up @@ -777,9 +776,7 @@ def fillna(
if not self.has_nulls(include_nan=True):
return self.copy()
elif method is None:
if is_scalar(fill_value) and libcudf.scalar._is_null_host_scalar(
fill_value
):
if is_scalar(fill_value) and _is_null_host_scalar(fill_value):
return self.copy()
else:
fill_value = self._validate_fillna_value(fill_value)
Expand Down Expand Up @@ -1984,12 +1981,12 @@ def as_column(
column = Column.from_pylibcudf(
plc.filling.sequence(
len(arbitrary),
as_device_scalar(
cudf.Scalar(
arbitrary.start, dtype=np.dtype(np.int64)
).c_value,
as_device_scalar(
).device_value.c_value,
cudf.Scalar(
arbitrary.step, dtype=np.dtype(np.int64)
).c_value,
).device_value.c_value,
)
)
if cudf.get_option("default_integer_bitwidth") and dtype is None:
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def from_sequences(

# Build Data, Mask & Offsets
for data in arbitrary:
if cudf._lib.scalar._is_null_host_scalar(data):
if cudf.utils.utils._is_null_host_scalar(data):
mask_col.append(False)
offset_vals.append(offset)
else:
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def __setitem__(self, key: Any, value: Any):
cudf.Scalar(
value,
dtype=self.dtype
if cudf._lib.scalar._is_null_host_scalar(value)
if cudf.utils.utils._is_null_host_scalar(value)
else None,
)
if is_scalar(value)
Expand Down Expand Up @@ -789,7 +789,7 @@ def _normalize_find_and_replace_input(
)
# Scalar case
if len(col_to_normalize) == 1:
if cudf._lib.scalar._is_null_host_scalar(col_to_normalize[0]):
if cudf.utils.utils._is_null_host_scalar(col_to_normalize[0]):
return normalized_column.astype(input_column_dtype)
if np.isinf(col_to_normalize[0]):
return normalized_column
Expand Down
8 changes: 6 additions & 2 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,11 @@
min_signed_type,
)
from cudf.utils.performance_tracking import _performance_tracking
from cudf.utils.utils import GetAttrGetItemMixin, _external_only_api
from cudf.utils.utils import (
GetAttrGetItemMixin,
_external_only_api,
_is_null_host_scalar,
)

if TYPE_CHECKING:
from cudf._typing import ColumnLike, Dtype, NotImplementedType
Expand Down Expand Up @@ -3371,7 +3375,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
if isinstance(value, (np.ndarray, cupy.ndarray)):
dtype = value.dtype
value = value.item()
if libcudf.scalar._is_null_host_scalar(value):
if _is_null_host_scalar(value):
dtype = "str"
value = as_column(
value,
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,13 +178,13 @@ def dtype(self):
def is_valid(self):
if not self._is_host_value_current:
self._device_value_to_host()
return not cudf._lib.scalar._is_null_host_scalar(self._host_value)
return not cudf.utils.utils._is_null_host_scalar(self._host_value)

def _device_value_to_host(self):
self._host_value = self._device_value._to_host_scalar()

def _preprocess_host_value(self, value, dtype):
valid = not cudf._lib.scalar._is_null_host_scalar(value)
valid = not cudf.utils.utils._is_null_host_scalar(value)

if isinstance(value, list):
if dtype is not None:
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/utils/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def to_cudf_compatible_scalar(val, dtype=None):
If `val` is None, returns None.
"""

if cudf._lib.scalar._is_null_host_scalar(val) or isinstance(
if cudf.utils.utils._is_null_host_scalar(val) or isinstance(
val, cudf.Scalar
):
return val
Expand Down
9 changes: 9 additions & 0 deletions python/cudf/cudf/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,15 @@ def is_na_like(obj):
return obj is None or obj is cudf.NA or obj is cudf.NaT


def _is_null_host_scalar(slr) -> bool:
# slr is NA like or NaT like
return (
is_na_like(slr)
or (isinstance(slr, (np.datetime64, np.timedelta64)) and np.isnat(slr))
or slr is pd.NaT
)


def _warn_no_dask_cudf(fn):
@functools.wraps(fn)
def wrapper(self):
Expand Down

0 comments on commit a0487be

Please sign in to comment.