Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Refactor the Buffer class #11447

Merged
merged 40 commits into from
Aug 11, 2022
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
d000544
Create new Buffers using as_buffer()
madsbk Aug 2, 2022
814d3ea
copyright
madsbk Aug 2, 2022
224c3f8
_data_from_cuda_array_interface_desc: use use as_buffer() directly
madsbk Aug 2, 2022
408f4f7
impl. buffer_from_pointer()
madsbk Aug 2, 2022
2ef3dc8
as_buffer(): impl. extraction of device pointer and size
madsbk Aug 3, 2022
c41fa5c
removed test of Buffer.copy()
madsbk Aug 3, 2022
fa8a878
Buffer(): support given a buffer-like object directly
madsbk Aug 3, 2022
90e2b25
test_buffer: use as_buffer()
madsbk Aug 3, 2022
0b5ff99
Fix typo
madsbk Aug 3, 2022
c1edf9f
Fixing is_c_contiguous()
madsbk Aug 3, 2022
d6ce949
Style
madsbk Aug 5, 2022
75e9c63
Introduce the DeviceBufferLike Protocol
madsbk Aug 5, 2022
61393ad
DeviceBufferLike: removed the .nbytes attribute
madsbk Aug 5, 2022
34248ae
rename: as_buffer => as_device_buffer_like
madsbk Aug 5, 2022
bad4096
Moved _init_buffer_from_any back into Buffer.__init__
madsbk Aug 5, 2022
050f40e
Bufffer.__init__: rename ptr => data
madsbk Aug 5, 2022
be1f38e
removed buffer_from_pointer()
madsbk Aug 5, 2022
50000b5
fixing test_buffer_creation_from_any()
madsbk Aug 5, 2022
86fecde
Removed Buffer.from_buffer()
madsbk Aug 5, 2022
753fdb4
Merge branch 'branch-22.10' of github.com:rapidsai/cudf into buffer_r…
madsbk Aug 8, 2022
a961d96
Introduce Buffer.memoryview() and remove Buffer.to_host_array()
madsbk Aug 8, 2022
04b1397
remove Buffer.__len__
madsbk Aug 8, 2022
a5ea110
Use "*," for the size, offset, and owner arguments
madsbk Aug 8, 2022
bca09ff
Impl. format_bytes()
madsbk Aug 8, 2022
9fff419
is_c_contiguous(): use cumulative_stride
madsbk Aug 8, 2022
a9d9910
Series.data: fixed doc
madsbk Aug 8, 2022
e0a889c
rename: _get_ptr_and_size() => get_ptr_and_size()
madsbk Aug 9, 2022
e91e4d1
Implement __get_item__
madsbk Aug 9, 2022
ab14bc0
Style/cleanup/typos
madsbk Aug 10, 2022
36fe261
Defining FrameList
madsbk Aug 10, 2022
0671574
doc
madsbk Aug 10, 2022
d6f12c3
Use math.prod()
madsbk Aug 10, 2022
1049c1c
style
madsbk Aug 10, 2022
eab3dd7
Buffer.__getitem__(): handle negative index
madsbk Aug 10, 2022
a307821
moved is_c_contiguous() to module level
madsbk Aug 10, 2022
714c497
get_ptr_and_size(): docs
madsbk Aug 10, 2022
78b802e
test_buffer_repr(): testing 1 and -1
madsbk Aug 10, 2022
b40cfb4
ListFrame => List[Frame]
madsbk Aug 10, 2022
2d2adda
test_buffer_from_cuda_iface_dtype(): check data
madsbk Aug 10, 2022
19114a8
__getitem__(): remove support of integers
madsbk Aug 11, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions python/cudf/cudf/_lib/column.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ from __future__ import annotations
from typing import Dict, Optional, Tuple, TypeVar

from cudf._typing import Dtype, DtypeObj, ScalarLike
from cudf.core.buffer import Buffer
from cudf.core.buffer import DeviceBufferLike
from cudf.core.column import ColumnBase

T = TypeVar("T")

class Column:
_data: Optional[Buffer]
_mask: Optional[Buffer]
_base_data: Optional[Buffer]
_base_mask: Optional[Buffer]
_data: Optional[DeviceBufferLike]
_mask: Optional[DeviceBufferLike]
_base_data: Optional[DeviceBufferLike]
_base_mask: Optional[DeviceBufferLike]
_dtype: DtypeObj
_size: int
_offset: int
Expand All @@ -25,10 +25,10 @@ class Column:

def __init__(
self,
data: Optional[Buffer],
data: Optional[DeviceBufferLike],
size: int,
dtype: Dtype,
mask: Optional[Buffer] = None,
mask: Optional[DeviceBufferLike] = None,
offset: int = None,
null_count: int = None,
children: Tuple[ColumnBase, ...] = (),
Expand All @@ -40,27 +40,27 @@ class Column:
@property
def size(self) -> int: ...
@property
def base_data(self) -> Optional[Buffer]: ...
def base_data(self) -> Optional[DeviceBufferLike]: ...
@property
def base_data_ptr(self) -> int: ...
@property
def data(self) -> Optional[Buffer]: ...
def data(self) -> Optional[DeviceBufferLike]: ...
@property
def data_ptr(self) -> int: ...
def set_base_data(self, value: Buffer) -> None: ...
def set_base_data(self, value: DeviceBufferLike) -> None: ...
@property
def nullable(self) -> bool: ...
def has_nulls(self, include_nan: bool = False) -> bool: ...
@property
def base_mask(self) -> Optional[Buffer]: ...
def base_mask(self) -> Optional[DeviceBufferLike]: ...
@property
def base_mask_ptr(self) -> int: ...
@property
def mask(self) -> Optional[Buffer]: ...
def mask(self) -> Optional[DeviceBufferLike]: ...
@property
def mask_ptr(self) -> int: ...
def set_base_mask(self, value: Optional[Buffer]) -> None: ...
def set_mask(self: T, value: Optional[Buffer]) -> T: ...
def set_base_mask(self, value: Optional[DeviceBufferLike]) -> None: ...
def set_mask(self: T, value: Optional[DeviceBufferLike]) -> T: ...
@property
def null_count(self) -> int: ...
@property
Expand Down
71 changes: 33 additions & 38 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import rmm
import cudf
import cudf._lib as libcudf
from cudf.api.types import is_categorical_dtype, is_list_dtype, is_struct_dtype
from cudf.core.buffer import Buffer
from cudf.core.buffer import Buffer, DeviceBufferLike, as_device_buffer_like

from cpython.buffer cimport PyObject_CheckBuffer
from libc.stdint cimport uintptr_t
Expand Down Expand Up @@ -56,9 +56,9 @@ cdef class Column:
A Column stores columnar data in device memory.
A Column may be composed of:

* A *data* Buffer
* A *data* DeviceBufferLike
* One or more (optional) *children* Columns
* An (optional) *mask* Buffer representing the nullmask
* An (optional) *mask* DeviceBufferLike representing the nullmask

The *dtype* indicates the Column's element type.
"""
Expand Down Expand Up @@ -110,18 +110,9 @@ cdef class Column:
if self.base_data is None:
return None
if self._data is None:
itemsize = self.dtype.itemsize
size = self.size * itemsize
offset = self.offset * itemsize if self.size else 0
if offset == 0 and self.base_data.size == size:
# `data` spans all of `base_data`
self._data = self.base_data
else:
self._data = Buffer.from_buffer(
buffer=self.base_data,
size=size,
offset=offset
)
start = self.offset * self.dtype.itemsize
end = start + self.size * self.dtype.itemsize
self._data = self.base_data[start:end]
return self._data

@property
Expand All @@ -132,9 +123,11 @@ cdef class Column:
return self.data.ptr

def set_base_data(self, value):
if value is not None and not isinstance(value, Buffer):
raise TypeError("Expected a Buffer or None for data, got " +
type(value).__name__)
if value is not None and not isinstance(value, DeviceBufferLike):
raise TypeError(
"Expected a DeviceBufferLike or None for data, "
f"got {type(value).__name__}"
)

self._data = None
self._base_data = value
Expand Down Expand Up @@ -179,17 +172,18 @@ cdef class Column:
modify size or offset in any way, so the passed mask is expected to be
compatible with the current offset.
"""
if value is not None and not isinstance(value, Buffer):
raise TypeError("Expected a Buffer or None for mask, got " +
type(value).__name__)
if value is not None and not isinstance(value, DeviceBufferLike):
raise TypeError(
"Expected a DeviceBufferLike or None for mask, "
f"got {type(value).__name__}"
)

if value is not None:
required_size = bitmask_allocation_size_bytes(self.base_size)
if value.size < required_size:
error_msg = (
"The Buffer for mask is smaller than expected, got " +
str(value.size) + " bytes, expected " +
str(required_size) + " bytes."
"The DeviceBufferLike for mask is smaller than expected, "
f"got {value.size} bytes, expected {required_size} bytes."
)
if self.offset > 0 or self.size < self.base_size:
error_msg += (
Expand Down Expand Up @@ -233,31 +227,31 @@ cdef class Column:
if isinstance(value, Column):
value = value.data_array_view
value = cp.asarray(value).view('|u1')
mask = Buffer(value)
mask = as_device_buffer_like(value)
if mask.size < required_num_bytes:
raise ValueError(error_msg.format(str(value.size)))
if mask.size < mask_size:
dbuf = rmm.DeviceBuffer(size=mask_size)
dbuf.copy_from_device(value)
mask = Buffer(dbuf)
mask = as_device_buffer_like(dbuf)
elif hasattr(value, "__array_interface__"):
value = np.asarray(value).view("u1")[:mask_size]
if value.size < required_num_bytes:
raise ValueError(error_msg.format(str(value.size)))
dbuf = rmm.DeviceBuffer(size=mask_size)
dbuf.copy_from_host(value)
mask = Buffer(dbuf)
mask = as_device_buffer_like(dbuf)
elif PyObject_CheckBuffer(value):
value = np.asarray(value).view("u1")[:mask_size]
if value.size < required_num_bytes:
raise ValueError(error_msg.format(str(value.size)))
dbuf = rmm.DeviceBuffer(size=mask_size)
dbuf.copy_from_host(value)
mask = Buffer(dbuf)
mask = as_device_buffer_like(dbuf)
else:
raise TypeError(
"Expected a Buffer-like object or None for mask, got "
+ type(value).__name__
"Expected a DeviceBufferLike object or None for mask, "
f"got {type(value).__name__}"
)

return cudf.core.column.build_column(
Expand Down Expand Up @@ -455,11 +449,11 @@ cdef class Column:
cdef column_contents contents = move(c_col.get()[0].release())

data = DeviceBuffer.c_from_unique_ptr(move(contents.data))
data = Buffer(data)
data = as_device_buffer_like(data)

if null_count > 0:
mask = DeviceBuffer.c_from_unique_ptr(move(contents.null_mask))
mask = Buffer(mask)
mask = as_device_buffer_like(mask)
else:
mask = None

Expand All @@ -484,9 +478,10 @@ cdef class Column:
Given a ``cudf::column_view``, constructs a ``cudf.Column`` from it,
along with referencing an ``owner`` Python object that owns the memory
lifetime. If ``owner`` is a ``cudf.Column``, we reach inside of it and
make the owner of each newly created ``Buffer`` the respective
``Buffer`` from the ``owner`` ``cudf.Column``. If ``owner`` is
``None``, we allocate new memory for the resulting ``cudf.Column``.
make the owner of each newly created ``DeviceBufferLike`` the
respective ``DeviceBufferLike`` from the ``owner`` ``cudf.Column``.
If ``owner`` is ``None``, we allocate new memory for the resulting
``cudf.Column``.
"""
column_owner = isinstance(owner, Column)
mask_owner = owner
Expand All @@ -509,7 +504,7 @@ cdef class Column:

if data_ptr:
if data_owner is None:
data = Buffer(
data = as_device_buffer_like(
rmm.DeviceBuffer(ptr=data_ptr,
size=(size+offset) * dtype.itemsize)
)
Expand All @@ -520,7 +515,7 @@ cdef class Column:
owner=data_owner
)
else:
data = Buffer(
data = as_device_buffer_like(
rmm.DeviceBuffer(ptr=data_ptr, size=0)
)

Expand Down Expand Up @@ -550,7 +545,7 @@ cdef class Column:
# result:
mask = None
else:
mask = Buffer(
mask = as_device_buffer_like(
rmm.DeviceBuffer(
ptr=mask_ptr,
size=bitmask_allocation_size_bytes(size+offset)
Expand Down
8 changes: 5 additions & 3 deletions python/cudf/cudf/_lib/concat.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport make_unique, unique_ptr
Expand All @@ -19,7 +19,7 @@ from cudf._lib.utils cimport (
table_view_from_table,
)

from cudf.core.buffer import Buffer
from cudf.core.buffer import as_device_buffer_like

from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer

Expand All @@ -31,7 +31,9 @@ cpdef concat_masks(object columns):
with nogil:
c_result = move(libcudf_concatenate_masks(c_views))
c_unique_result = make_unique[device_buffer](move(c_result))
return Buffer(DeviceBuffer.c_from_unique_ptr(move(c_unique_result)))
return as_device_buffer_like(
DeviceBuffer.c_from_unique_ptr(move(c_unique_result))
)


cpdef concat_columns(object columns):
Expand Down
6 changes: 5 additions & 1 deletion python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,11 @@ cdef class _CPackedColumns:
header = {}
frames = []

gpu_data = Buffer(self.gpu_data_ptr, self.gpu_data_size, self)
gpu_data = Buffer(
data=self.gpu_data_ptr,
size=self.gpu_data_size,
owner=self
)
data_header, data_frames = gpu_data.serialize()
header["data"] = data_header
frames.extend(data_frames)
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/null_mask.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ from cudf._lib.cpp.null_mask cimport (
)
from cudf._lib.cpp.types cimport mask_state, size_type

from cudf.core.buffer import Buffer
from cudf.core.buffer import as_device_buffer_like


class MaskState(Enum):
Expand Down Expand Up @@ -47,7 +47,7 @@ def copy_bitmask(Column col):
up_db = make_unique[device_buffer](move(db))

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = Buffer(rmm_db)
buf = as_device_buffer_like(rmm_db)
return buf


Expand Down Expand Up @@ -93,5 +93,5 @@ def create_null_mask(size_type size, state=MaskState.UNINITIALIZED):
up_db = make_unique[device_buffer](move(db))

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = Buffer(rmm_db)
buf = as_device_buffer_like(rmm_db)
return buf
12 changes: 6 additions & 6 deletions python/cudf/cudf/_lib/transform.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ from numba.np import numpy_support
import cudf
from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
from cudf.core._internals.expressions import parse_expression
from cudf.core.buffer import Buffer
from cudf.core.buffer import as_device_buffer_like
from cudf.utils import cudautils

from cython.operator cimport dereference
Expand Down Expand Up @@ -40,7 +40,7 @@ from cudf._lib.utils cimport (
def bools_to_mask(Column col):
"""
Given an int8 (boolean) column, compress the data from booleans to bits and
return a Buffer
return a DeviceBufferLike
"""
cdef column_view col_view = col.view()
cdef pair[unique_ptr[device_buffer], size_type] cpp_out
Expand All @@ -52,7 +52,7 @@ def bools_to_mask(Column col):
up_db = move(cpp_out.first)

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = Buffer(rmm_db)
buf = as_device_buffer_like(rmm_db)
return buf


Expand All @@ -61,9 +61,9 @@ def mask_to_bools(object mask_buffer, size_type begin_bit, size_type end_bit):
Given a mask buffer, returns a boolean column representng bit 0 -> False
and 1 -> True within range of [begin_bit, end_bit),
"""
if not isinstance(mask_buffer, cudf.core.buffer.Buffer):
if not isinstance(mask_buffer, cudf.core.buffer.DeviceBufferLike):
raise TypeError("mask_buffer is not an instance of "
"cudf.core.buffer.Buffer")
"cudf.core.buffer.DeviceBufferLike")
cdef bitmask_type* bit_mask = <bitmask_type*><uintptr_t>(mask_buffer.ptr)

cdef unique_ptr[column] result
Expand All @@ -88,7 +88,7 @@ def nans_to_nulls(Column input):
return None

buffer = DeviceBuffer.c_from_unique_ptr(move(c_buffer))
buffer = Buffer(buffer)
buffer = as_device_buffer_like(buffer)
return buffer


Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,8 @@ cdef data_from_table_view(
along with referencing an ``owner`` Python object that owns the memory
lifetime. If ``owner`` is a Frame we reach inside of it and
reach inside of each ``cudf.Column`` to make the owner of each newly
created ``Buffer`` underneath the ``cudf.Column`` objects of the
created Frame the respective ``Buffer`` from the relevant
created ``DeviceBufferLike`` underneath the ``cudf.Column`` objects of the
created Frame the respective ``DeviceBufferLike`` from the relevant
``cudf.Column`` of the ``owner`` Frame
"""
cdef size_type column_idx = 0
Expand Down
Loading