Skip to content

Commit

Permalink
feat: to/from TensorFlow Tensor (#3292)
Browse files Browse the repository at this point in the history
* add new functions

* add tests

---------

Co-authored-by: Jim Pivarski <[email protected]>
  • Loading branch information
maxymnaumchyk and jpivarski authored Nov 7, 2024
1 parent cf41b65 commit fa7be40
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/reference/toctree.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@

generated/ak.from_raggedtensor
generated/ak.to_raggedtensor
generated/ak.from_tensorflow
generated/ak.to_tensorflow
generated/ak.from_torch
generated/ak.to_torch

Expand Down
2 changes: 2 additions & 0 deletions src/awkward/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from awkward.operations.ak_from_raggedtensor import *
from awkward.operations.ak_from_rdataframe import *
from awkward.operations.ak_from_regular import *
from awkward.operations.ak_from_tensorflow import *
from awkward.operations.ak_from_torch import *
from awkward.operations.ak_full_like import *
from awkward.operations.ak_imag import *
Expand Down Expand Up @@ -103,6 +104,7 @@
from awkward.operations.ak_to_raggedtensor import *
from awkward.operations.ak_to_rdataframe import *
from awkward.operations.ak_to_regular import *
from awkward.operations.ak_to_tensorflow import *
from awkward.operations.ak_to_torch import *
from awkward.operations.ak_transform import *
from awkward.operations.ak_type import *
Expand Down
69 changes: 69 additions & 0 deletions src/awkward/operations/ak_from_tensorflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE

from __future__ import annotations

import re

import awkward as ak
from awkward._dispatch import high_level_function

__all__ = ("from_tensorflow",)


@high_level_function()
def from_tensorflow(array):
"""
Args:
array: (TensorFlow Tensor):
Tensor to convert into an Awkward Array.
Converts a TensorFlow Tensor into an Awkward Array.
If `array` contains any other data types the function raises an error.
"""

# Dispatch
yield (array,)

# Implementation
return _impl(array)


def _impl(array):
try:
import tensorflow as tf
except ImportError as err:
raise ImportError(
"""to use ak.from_tensorflow, you must install the 'tensorflow' package with:
pip install tensorflow
or
conda install tensorflow"""
) from err

# check if array is a Tensor
if not isinstance(array, tf.Tensor):
raise TypeError(
"""only a TensorFlow Tensor can be converted to Awkward Array"""
)

# keep the resulting array on the same device as input tensor
device = array.backing_device
matched_device = re.match(".*:(CPU|GPU):[0-9]+", device)

if matched_device is None:
raise NotImplementedError(
f"TensorFlow device has an unexpected format: {device!r}"
)
elif matched_device.groups()[0] == "GPU":
from awkward._nplikes.cupy import Cupy

cp = Cupy.instance()
# zero-copy data exchange through DLPack
cp_array = cp.from_dlpack(tf.experimental.dlpack.to_dlpack(array))
ak_array = ak.from_cupy(cp_array)

elif matched_device.groups()[0] == "CPU":
# this makes a copy unfortunately, since numpy is mutable and TensorFlow tensor is not
np_array = array.numpy()
ak_array = ak.from_numpy(np_array)

return ak_array
79 changes: 79 additions & 0 deletions src/awkward/operations/ak_to_tensorflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE

from __future__ import annotations

import awkward as ak
from awkward._dispatch import high_level_function
from awkward._nplikes.numpy_like import NumpyMetadata

__all__ = ("to_tensorflow",)

np = NumpyMetadata.instance()


@high_level_function()
def to_tensorflow(array):
"""
Args:
array: Array-like data. May be a high level #ak.Array,
or low-level #ak.contents.ListOffsetArray, #ak.contents.ListArray,
#ak.contents.RegularArray, #ak.contents.NumpyArray
Converts `array` (only ListOffsetArray, ListArray, RegularArray and NumpyArray data types supported)
into a TensorFlow Tensor, if possible.
If `array` contains any other data types (RecordArray for example) the function raises a TypeError.
"""

# Dispatch
yield (array,)

# Implementation
return _impl(array)


def _impl(array):
try:
import tensorflow as tf
except ImportError as err:
raise ImportError(
"""to use ak.to_tensorflow, you must install the 'tensorflow' package with:
pip install tensorflow
or
conda install tensorflow"""
) from err

# useful function that handles all possible input arrays
array = ak.to_layout(array, allow_record=False)

# get the device array is on
ak_device = ak.backend(array)

if ak_device not in ["cuda", "cpu"]:
raise ValueError("""Only 'cpu' and 'cuda' backend conversions are allowed""")

# convert to numpy or cupy if `array` on gpu
try:
backend_array = array.to_backend_array(allow_missing=False)
except ValueError as err:
raise TypeError(
"Only arrays containing equal-length lists of numbers can be converted into a TensorFlow Tensor"
) from err

if ak_device == "cpu":
device = "CPU:0"
else:
id = backend_array.data.device.id
device = "GPU:" + str(id)

with tf.device(device):
# check if cupy or numpy
if isinstance(backend_array, np.ndarray):
# convert numpy to a tensorflow tensor
tensor = tf.convert_to_tensor(backend_array, dtype=tf.float64)
else:
# cupy -> tensorflow tensor
tensor = tf.experimental.dlpack.from_dlpack(backend_array.toDlpack())

return tensor
74 changes: 74 additions & 0 deletions tests/test_3292_to_tensorflow_from_tensorflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE

from __future__ import annotations

import numpy as np
import pytest

import awkward as ak

to_tensorflow = ak.operations.to_tensorflow
from_tensorflow = ak.operations.from_tensorflow

tf = pytest.importorskip("tensorflow")

a = np.arange(2 * 2 * 2, dtype=np.float64).reshape(2, 2, 2)
b = np.arange(2 * 2 * 2).reshape(2, 2, 2)

array = np.arange(2 * 3 * 5).reshape(2, 3, 5)
content2 = ak.contents.NumpyArray(array.reshape(-1))
inneroffsets = ak.index.Index64(np.array([0, 5, 10, 15, 20, 25, 30]))
outeroffsets = ak.index.Index64(np.array([0, 3, 6]))


def test_to_tensorflow():
# a basic test for a 4 dimensional array
array1 = ak.Array([a, b])
i = 0
for sub_array in [
[[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]],
[[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]],
]:
assert to_tensorflow(array1)[i].numpy().tolist() == sub_array
i += 1

# test that the data types are remaining the same (float64 in this case)
assert array1.layout.to_backend_array().dtype.name in str(
to_tensorflow(array1).dtype
)

# try a listoffset array inside a listoffset array
array2 = ak.contents.ListOffsetArray(
outeroffsets, ak.contents.ListOffsetArray(inneroffsets, content2)
)
assert to_tensorflow(array2)[0].numpy().tolist() == [
[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
]
assert to_tensorflow(array2)[1].numpy().tolist() == [
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24],
[25, 26, 27, 28, 29],
]

# try just a python list
array3 = [3, 1, 4, 1, 9, 2, 6]
assert to_tensorflow(array3).numpy().tolist() == [3, 1, 4, 1, 9, 2, 6]


array1 = tf.constant([[1.0, -1.0], [1.0, -1.0]], dtype=tf.float32)
array2 = tf.constant(np.array([[1, 2, 3], [4, 5, 6]]))


def test_from_tensorflow():
# Awkward.to_list() == Tensor.numpy().tolist()
assert from_tensorflow(array1).to_list() == array1.numpy().tolist()

assert from_tensorflow(array2).to_list() == [[1, 2, 3], [4, 5, 6]]

# test that the data types are remaining the same (int64 in this case)
assert from_tensorflow(array1).layout.dtype.name in str(array1.dtype)

# test that the data types are remaining the same (float32 in this case)
assert from_tensorflow(array2).layout.dtype.name in str(array2.dtype)

0 comments on commit fa7be40

Please sign in to comment.