Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Refactor the Buffer class #11447

Merged
merged 40 commits into from
Aug 11, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
d000544
Create new Buffers using as_buffer()
madsbk Aug 2, 2022
814d3ea
copyright
madsbk Aug 2, 2022
224c3f8
_data_from_cuda_array_interface_desc: use use as_buffer() directly
madsbk Aug 2, 2022
408f4f7
impl. buffer_from_pointer()
madsbk Aug 2, 2022
2ef3dc8
as_buffer(): impl. extraction of device pointer and size
madsbk Aug 3, 2022
c41fa5c
removed test of Buffer.copy()
madsbk Aug 3, 2022
fa8a878
Buffer(): support given a buffer-like object directly
madsbk Aug 3, 2022
90e2b25
test_buffer: use as_buffer()
madsbk Aug 3, 2022
0b5ff99
Fix typo
madsbk Aug 3, 2022
c1edf9f
Fixing is_c_contiguous()
madsbk Aug 3, 2022
d6ce949
Style
madsbk Aug 5, 2022
75e9c63
Introduce the DeviceBufferLike Protocol
madsbk Aug 5, 2022
61393ad
DeviceBufferLike: removed the .nbytes attribute
madsbk Aug 5, 2022
34248ae
rename: as_buffer => as_device_buffer_like
madsbk Aug 5, 2022
bad4096
Moved _init_buffer_from_any back into Buffer.__init__
madsbk Aug 5, 2022
050f40e
Bufffer.__init__: rename ptr => data
madsbk Aug 5, 2022
be1f38e
removed buffer_from_pointer()
madsbk Aug 5, 2022
50000b5
fixing test_buffer_creation_from_any()
madsbk Aug 5, 2022
86fecde
Removed Buffer.from_buffer()
madsbk Aug 5, 2022
753fdb4
Merge branch 'branch-22.10' of github.com:rapidsai/cudf into buffer_r…
madsbk Aug 8, 2022
a961d96
Introduce Buffer.memoryview() and remove Buffer.to_host_array()
madsbk Aug 8, 2022
04b1397
remove Buffer.__len__
madsbk Aug 8, 2022
a5ea110
Use "*," for the size, offset, and owner arguments
madsbk Aug 8, 2022
bca09ff
Impl. format_bytes()
madsbk Aug 8, 2022
9fff419
is_c_contiguous(): use cumulative_stride
madsbk Aug 8, 2022
a9d9910
Series.data: fixed doc
madsbk Aug 8, 2022
e0a889c
rename: _get_ptr_and_size() => get_ptr_and_size()
madsbk Aug 9, 2022
e91e4d1
Implement __get_item__
madsbk Aug 9, 2022
ab14bc0
Style/cleanup/typos
madsbk Aug 10, 2022
36fe261
Defining FrameList
madsbk Aug 10, 2022
0671574
doc
madsbk Aug 10, 2022
d6f12c3
Use math.prod()
madsbk Aug 10, 2022
1049c1c
style
madsbk Aug 10, 2022
eab3dd7
Buffer.__getitem__(): handle negative index
madsbk Aug 10, 2022
a307821
moved is_c_contiguous() to module level
madsbk Aug 10, 2022
714c497
get_ptr_and_size(): docs
madsbk Aug 10, 2022
78b802e
test_buffer_repr(): testing 1 and -1
madsbk Aug 10, 2022
b40cfb4
ListFrame => List[Frame]
madsbk Aug 10, 2022
2d2adda
test_buffer_from_cuda_iface_dtype(): check data
madsbk Aug 10, 2022
19114a8
__getitem__(): remove support of integers
madsbk Aug 11, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import rmm
import cudf
import cudf._lib as libcudf
from cudf.api.types import is_categorical_dtype, is_list_dtype, is_struct_dtype
from cudf.core.buffer import Buffer
from cudf.core.buffer import Buffer, as_buffer, buffer_from_pointer

from cpython.buffer cimport PyObject_CheckBuffer
from libc.stdint cimport uintptr_t
Expand Down Expand Up @@ -233,27 +233,27 @@ cdef class Column:
if isinstance(value, Column):
value = value.data_array_view
value = cp.asarray(value).view('|u1')
mask = Buffer(value)
mask = as_buffer(value)
if mask.size < required_num_bytes:
raise ValueError(error_msg.format(str(value.size)))
if mask.size < mask_size:
dbuf = rmm.DeviceBuffer(size=mask_size)
dbuf.copy_from_device(value)
mask = Buffer(dbuf)
mask = as_buffer(dbuf)
elif hasattr(value, "__array_interface__"):
value = np.asarray(value).view("u1")[:mask_size]
if value.size < required_num_bytes:
raise ValueError(error_msg.format(str(value.size)))
dbuf = rmm.DeviceBuffer(size=mask_size)
dbuf.copy_from_host(value)
mask = Buffer(dbuf)
mask = as_buffer(dbuf)
elif PyObject_CheckBuffer(value):
value = np.asarray(value).view("u1")[:mask_size]
if value.size < required_num_bytes:
raise ValueError(error_msg.format(str(value.size)))
dbuf = rmm.DeviceBuffer(size=mask_size)
dbuf.copy_from_host(value)
mask = Buffer(dbuf)
mask = as_buffer(dbuf)
else:
raise TypeError(
"Expected a Buffer-like object or None for mask, got "
Expand Down Expand Up @@ -455,11 +455,11 @@ cdef class Column:
cdef column_contents contents = move(c_col.get()[0].release())

data = DeviceBuffer.c_from_unique_ptr(move(contents.data))
data = Buffer(data)
data = as_buffer(data)

if has_nulls:
mask = DeviceBuffer.c_from_unique_ptr(move(contents.null_mask))
mask = Buffer(mask)
mask = as_buffer(mask)
null_count = c_col.get()[0].null_count()
else:
mask = None
Expand Down Expand Up @@ -511,18 +511,18 @@ cdef class Column:

if data_ptr:
if data_owner is None:
data = Buffer(
data = as_buffer(
rmm.DeviceBuffer(ptr=data_ptr,
size=(size+offset) * dtype.itemsize)
)
else:
data = Buffer(
data=data_ptr,
data = buffer_from_pointer(
ptr=data_ptr,
size=(base_size) * dtype.itemsize,
owner=data_owner
)
else:
data = Buffer(
data = as_buffer(
rmm.DeviceBuffer(ptr=data_ptr, size=0)
)

Expand Down Expand Up @@ -552,15 +552,15 @@ cdef class Column:
# result:
mask = None
else:
mask = Buffer(
mask = as_buffer(
rmm.DeviceBuffer(
ptr=mask_ptr,
size=bitmask_allocation_size_bytes(size+offset)
)
)
else:
mask = Buffer(
data=mask_ptr,
mask = buffer_from_pointer(
ptr=mask_ptr,
size=bitmask_allocation_size_bytes(base_size),
owner=mask_owner
)
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/concat.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport make_unique, unique_ptr
Expand All @@ -19,7 +19,7 @@ from cudf._lib.utils cimport (
table_view_from_table,
)

from cudf.core.buffer import Buffer
from cudf.core.buffer import as_buffer

from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer

Expand All @@ -31,7 +31,7 @@ cpdef concat_masks(object columns):
with nogil:
c_result = move(libcudf_concatenate_masks(c_views))
c_unique_result = make_unique[device_buffer](move(c_result))
return Buffer(DeviceBuffer.c_from_unique_ptr(move(c_unique_result)))
return as_buffer(DeviceBuffer.c_from_unique_ptr(move(c_unique_result)))


cpdef concat_columns(object columns):
Expand Down
8 changes: 6 additions & 2 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ from libcpp.vector cimport vector
from rmm._lib.device_buffer cimport DeviceBuffer

import cudf
from cudf.core.buffer import Buffer
from cudf.core.buffer import Buffer, buffer_from_pointer

from cudf._lib.column cimport Column

Expand Down Expand Up @@ -718,7 +718,11 @@ cdef class _CPackedColumns:
header = {}
frames = []

gpu_data = Buffer(self.gpu_data_ptr, self.gpu_data_size, self)
gpu_data = buffer_from_pointer(
ptr=self.gpu_data_ptr,
size=self.gpu_data_size,
owner=self
)
data_header, data_frames = gpu_data.serialize()
header["data"] = data_header
frames.extend(data_frames)
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/null_mask.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ from cudf._lib.cpp.null_mask cimport (
)
from cudf._lib.cpp.types cimport mask_state, size_type

from cudf.core.buffer import Buffer
from cudf.core.buffer import as_buffer


class MaskState(Enum):
Expand Down Expand Up @@ -47,7 +47,7 @@ def copy_bitmask(Column col):
up_db = make_unique[device_buffer](move(db))

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = Buffer(rmm_db)
buf = as_buffer(rmm_db)
return buf


Expand Down Expand Up @@ -93,5 +93,5 @@ def create_null_mask(size_type size, state=MaskState.UNINITIALIZED):
up_db = make_unique[device_buffer](move(db))

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = Buffer(rmm_db)
buf = as_buffer(rmm_db)
return buf
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/transform.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ from numba.np import numpy_support
import cudf
from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
from cudf.core._internals.expressions import parse_expression
from cudf.core.buffer import Buffer
from cudf.core.buffer import as_buffer
from cudf.utils import cudautils

from cython.operator cimport dereference
Expand Down Expand Up @@ -52,7 +52,7 @@ def bools_to_mask(Column col):
up_db = move(cpp_out.first)

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = Buffer(rmm_db)
buf = as_buffer(rmm_db)
return buf


Expand Down Expand Up @@ -88,7 +88,7 @@ def nans_to_nulls(Column input):
return None

buffer = DeviceBuffer.c_from_unique_ptr(move(c_buffer))
buffer = Buffer(buffer)
buffer = as_buffer(buffer)
return buffer


Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/abc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
"""Common abstract base classes for cudf."""

import sys
Expand Down Expand Up @@ -130,7 +130,7 @@ def device_deserialize(cls, header, frames):
"""
typ = pickle.loads(header["type-serialized"])
frames = [
cudf.core.buffer.Buffer(f) if c else memoryview(f)
cudf.core.buffer.as_buffer(f) if c else memoryview(f)
for c, f in zip(header["is-cuda"], frames)
]
assert all(
Expand Down
Loading