Skip to content

Commit

Permalink
fix: add cuda backend support for to_raggedtensor and `from_raggedt…
Browse files Browse the repository at this point in the history
…ensor` functions (#3263)

* add cuda backend support

* style changes

* keep gpu id the same

* style changes

* fix device id selection

* add new functions to the documentation

* add cuda backend support for ak.from_raggedtensor

* add suggestions from Jim

* add suggestions from Jim

---------

Co-authored-by: Ianna Osborne <[email protected]>
  • Loading branch information
maxymnaumchyk and ianna authored Oct 28, 2024
1 parent 3bb2661 commit c7ebd58
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 10 deletions.
8 changes: 8 additions & 0 deletions docs/reference/toctree.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@
generated/ak.to_feather
generated/ak.from_avro_file

.. toctree::
:caption: Conversions for machine learning

generated/ak.from_raggedtensor
generated/ak.to_raggedtensor
generated/ak.from_torch
generated/ak.to_torch

.. toctree::
:caption: Converting to Pandas DataFrames

Expand Down
42 changes: 39 additions & 3 deletions src/awkward/operations/ak_from_raggedtensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from __future__ import annotations

import re

import awkward as ak
from awkward._dispatch import high_level_function

Expand Down Expand Up @@ -30,18 +32,25 @@ def from_raggedtensor(array):
def _impl(array):
try:
# get the flat values
content = array.flat_values.numpy()
content = array.flat_values
except AttributeError as err:
raise TypeError(
"""only RaggedTensor can be converted to awkward array"""
) from err
# convert them to ak.contents right away

# handle gpu and cpu instances separately
device = content.backing_device

content = _tensor_to_np_or_cp(content, device)

# convert flat_values to ak.contents right away
content = ak.contents.NumpyArray(content)

# get the offsets
offsets_arr = []
for splits in array.nested_row_splits:
split = splits.numpy()
# handle gpu and cpu instances separately
split = _tensor_to_np_or_cp(splits, device)
# convert to ak.index
offset = ak.index.Index64(split)
offsets_arr.append(offset)
Expand All @@ -55,6 +64,33 @@ def _impl(array):
return ak.Array(_recursive_call(content, offsets_arr, 0))


def _tensor_to_np_or_cp(array, device):
matched_device = re.match(".*:(CPU|GPU):[0-9]+", device)

if matched_device is None:
raise NotImplementedError(
f"TensorFlow device has an unexpected format: {device!r}"
)
elif matched_device.groups()[0] == "GPU":
try:
import tensorflow as tf
except ImportError as err:
raise ImportError(
"""to use ak.from_raggedtensor, you must install the 'tensorflow' package with:
pip install tensorflow
or
conda install tensorflow"""
) from err

from awkward._nplikes.cupy import Cupy

cp = Cupy.instance()
return cp.from_dlpack(tf.experimental.dlpack.to_dlpack(array))
elif matched_device.groups()[0] == "CPU":
return array.numpy()


def _recursive_call(content, offsets_arr, count):
if count == len(offsets_arr) - 2:
return ak.contents.ListOffsetArray(
Expand Down
56 changes: 49 additions & 7 deletions src/awkward/operations/ak_to_raggedtensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@

import awkward as ak
from awkward._dispatch import high_level_function
from awkward._nplikes.numpy_like import NumpyMetadata

__all__ = ("to_raggedtensor",)

np = NumpyMetadata.instance()


@high_level_function()
def to_raggedtensor(array):
Expand Down Expand Up @@ -45,14 +48,49 @@ def _impl(array):
# also transforms a python list to awkward array
array = ak.to_layout(array, allow_record=False)

# keep the same device
ak_device = ak.backend(array)
if ak_device not in ["cuda", "cpu"]:
raise ValueError("""Only 'cpu' and 'cuda' backend conversions are allowed""")

if ak_device == "cpu":
device = "CPU:0"
else:
id = _find_innermost_content(array).data.device.id
device = "GPU:" + str(id)

with tf.device(device):
if isinstance(array, ak.contents.numpyarray.NumpyArray):
values = array.data
# handle cupy separately
values = _convert_to_tensor_if_cupy(values)
return tf.RaggedTensor.from_row_splits(
values=values, row_splits=[0, array.__len__()]
)

else:
flat_values, nested_row_splits = _recursive_call(array, ())
return tf.RaggedTensor.from_nested_row_splits(
flat_values, nested_row_splits
)


def _find_innermost_content(array):
if isinstance(array, ak.contents.numpyarray.NumpyArray):
return tf.RaggedTensor.from_row_splits(
values=array.data, row_splits=[0, array.__len__()]
)
return array
else:
return _find_innermost_content(array.content)


def _convert_to_tensor_if_cupy(array):
if isinstance(array, np.ndarray):
return array
else:
flat_values, nested_row_splits = _recursive_call(array, ())
# converts cupy directly to tensor,
# since `tf.RaggedTensor.from_nested_row_splits` can not work with Cupy arrays
import tensorflow as tf

return tf.RaggedTensor.from_nested_row_splits(flat_values, nested_row_splits)
return tf.experimental.dlpack.from_dlpack(array.toDlpack())


def _recursive_call(layout, offsets_arr):
Expand All @@ -75,10 +113,14 @@ def _recursive_call(layout, offsets_arr):
)

# recursively gather all of the offsets of an array
offsets_arr += (layout.offsets.data,)
offset = layout.offsets.data
offset = _convert_to_tensor_if_cupy(offset)
offsets_arr += (offset,)

except AttributeError:
# at the last iteration form a ragged tensor from the
# accumulated offsets and flattened values of the array
return layout.data, offsets_arr
data = layout.data
data = _convert_to_tensor_if_cupy(data)
return data, offsets_arr
return _recursive_call(layout.content, offsets_arr)

0 comments on commit c7ebd58

Please sign in to comment.