feat: to/from TensorFlow Tensor (#3292)

* add new functions * add tests --------- Co-authored-by: Jim Pivarski <[email protected]>
scikit-hep · Nov 7, 2024 · fa7be40 · fa7be40
1 parent cf41b65
commit fa7be40
Show file tree

Hide file tree

Showing 5 changed files with 226 additions and 0 deletions.
diff --git a/docs/reference/toctree.txt b/docs/reference/toctree.txt
@@ -44,6 +44,8 @@
 
     generated/ak.from_raggedtensor
     generated/ak.to_raggedtensor
+    generated/ak.from_tensorflow
+    generated/ak.to_tensorflow
     generated/ak.from_torch
     generated/ak.to_torch
 

diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py
@@ -47,6 +47,7 @@
 from awkward.operations.ak_from_raggedtensor import *
 from awkward.operations.ak_from_rdataframe import *
 from awkward.operations.ak_from_regular import *
+from awkward.operations.ak_from_tensorflow import *
 from awkward.operations.ak_from_torch import *
 from awkward.operations.ak_full_like import *
 from awkward.operations.ak_imag import *
@@ -103,6 +104,7 @@
 from awkward.operations.ak_to_raggedtensor import *
 from awkward.operations.ak_to_rdataframe import *
 from awkward.operations.ak_to_regular import *
+from awkward.operations.ak_to_tensorflow import *
 from awkward.operations.ak_to_torch import *
 from awkward.operations.ak_transform import *
 from awkward.operations.ak_type import *

diff --git a/src/awkward/operations/ak_from_tensorflow.py b/src/awkward/operations/ak_from_tensorflow.py
@@ -0,0 +1,69 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
+
+from __future__ import annotations
+
+import re
+
+import awkward as ak
+from awkward._dispatch import high_level_function
+
+__all__ = ("from_tensorflow",)
+
+
+@high_level_function()
+def from_tensorflow(array):
+    """
+    Args:
+        array: (TensorFlow Tensor):
+            Tensor to convert into an Awkward Array.
+    Converts a TensorFlow Tensor into an Awkward Array.
+    If `array` contains any other data types the function raises an error.
+    """
+
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array)
+
+
+def _impl(array):
+    try:
+        import tensorflow as tf
+    except ImportError as err:
+        raise ImportError(
+            """to use ak.from_tensorflow, you must install the 'tensorflow' package with:
+
+        pip install tensorflow
+or
+        conda install tensorflow"""
+        ) from err
+
+    # check if array is a Tensor
+    if not isinstance(array, tf.Tensor):
+        raise TypeError(
+            """only a TensorFlow Tensor can be converted to Awkward Array"""
+        )
+
+    # keep the resulting array on the same device as input tensor
+    device = array.backing_device
+    matched_device = re.match(".*:(CPU|GPU):[0-9]+", device)
+
+    if matched_device is None:
+        raise NotImplementedError(
+            f"TensorFlow device has an unexpected format: {device!r}"
+        )
+    elif matched_device.groups()[0] == "GPU":
+        from awkward._nplikes.cupy import Cupy
+
+        cp = Cupy.instance()
+        # zero-copy data exchange through DLPack
+        cp_array = cp.from_dlpack(tf.experimental.dlpack.to_dlpack(array))
+        ak_array = ak.from_cupy(cp_array)
+
+    elif matched_device.groups()[0] == "CPU":
+        # this makes a copy unfortunately, since numpy is mutable and TensorFlow tensor is not
+        np_array = array.numpy()
+        ak_array = ak.from_numpy(np_array)
+
+    return ak_array
diff --git a/src/awkward/operations/ak_to_tensorflow.py b/src/awkward/operations/ak_to_tensorflow.py
@@ -0,0 +1,79 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
+
+from __future__ import annotations
+
+import awkward as ak
+from awkward._dispatch import high_level_function
+from awkward._nplikes.numpy_like import NumpyMetadata
+
+__all__ = ("to_tensorflow",)
+
+np = NumpyMetadata.instance()
+
+
+@high_level_function()
+def to_tensorflow(array):
+    """
+    Args:
+        array: Array-like data. May be a high level #ak.Array,
+            or low-level #ak.contents.ListOffsetArray, #ak.contents.ListArray,
+            #ak.contents.RegularArray, #ak.contents.NumpyArray
+
+    Converts `array` (only ListOffsetArray, ListArray, RegularArray and NumpyArray data types supported)
+    into a TensorFlow Tensor, if possible.
+
+    If `array` contains any other data types (RecordArray for example) the function raises a TypeError.
+    """
+
+    # Dispatch
+    yield (array,)
+
+    # Implementation
+    return _impl(array)
+
+
+def _impl(array):
+    try:
+        import tensorflow as tf
+    except ImportError as err:
+        raise ImportError(
+            """to use ak.to_tensorflow, you must install the 'tensorflow' package with:
+
+        pip install tensorflow
+or
+        conda install tensorflow"""
+        ) from err
+
+    # useful function that handles all possible input arrays
+    array = ak.to_layout(array, allow_record=False)
+
+    # get the device array is on
+    ak_device = ak.backend(array)
+
+    if ak_device not in ["cuda", "cpu"]:
+        raise ValueError("""Only 'cpu' and 'cuda' backend conversions are allowed""")
+
+    # convert to numpy or cupy if `array` on gpu
+    try:
+        backend_array = array.to_backend_array(allow_missing=False)
+    except ValueError as err:
+        raise TypeError(
+            "Only arrays containing equal-length lists of numbers can be converted into a TensorFlow Tensor"
+        ) from err
+
+    if ak_device == "cpu":
+        device = "CPU:0"
+    else:
+        id = backend_array.data.device.id
+        device = "GPU:" + str(id)
+
+    with tf.device(device):
+        # check if cupy or numpy
+        if isinstance(backend_array, np.ndarray):
+            # convert numpy to a tensorflow tensor
+            tensor = tf.convert_to_tensor(backend_array, dtype=tf.float64)
+        else:
+            # cupy -> tensorflow tensor
+            tensor = tf.experimental.dlpack.from_dlpack(backend_array.toDlpack())
+
+        return tensor
diff --git a/tests/test_3292_to_tensorflow_from_tensorflow.py b/tests/test_3292_to_tensorflow_from_tensorflow.py
@@ -0,0 +1,74 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+import awkward as ak
+
+to_tensorflow = ak.operations.to_tensorflow
+from_tensorflow = ak.operations.from_tensorflow
+
+tf = pytest.importorskip("tensorflow")
+
+a = np.arange(2 * 2 * 2, dtype=np.float64).reshape(2, 2, 2)
+b = np.arange(2 * 2 * 2).reshape(2, 2, 2)
+
+array = np.arange(2 * 3 * 5).reshape(2, 3, 5)
+content2 = ak.contents.NumpyArray(array.reshape(-1))
+inneroffsets = ak.index.Index64(np.array([0, 5, 10, 15, 20, 25, 30]))
+outeroffsets = ak.index.Index64(np.array([0, 3, 6]))
+
+
+def test_to_tensorflow():
+    # a basic test for a 4 dimensional array
+    array1 = ak.Array([a, b])
+    i = 0
+    for sub_array in [
+        [[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]],
+        [[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]],
+    ]:
+        assert to_tensorflow(array1)[i].numpy().tolist() == sub_array
+        i += 1
+
+    # test that the data types are remaining the same (float64 in this case)
+    assert array1.layout.to_backend_array().dtype.name in str(
+        to_tensorflow(array1).dtype
+    )
+
+    # try a listoffset array inside a listoffset array
+    array2 = ak.contents.ListOffsetArray(
+        outeroffsets, ak.contents.ListOffsetArray(inneroffsets, content2)
+    )
+    assert to_tensorflow(array2)[0].numpy().tolist() == [
+        [0, 1, 2, 3, 4],
+        [5, 6, 7, 8, 9],
+        [10, 11, 12, 13, 14],
+    ]
+    assert to_tensorflow(array2)[1].numpy().tolist() == [
+        [15, 16, 17, 18, 19],
+        [20, 21, 22, 23, 24],
+        [25, 26, 27, 28, 29],
+    ]
+
+    # try just a python list
+    array3 = [3, 1, 4, 1, 9, 2, 6]
+    assert to_tensorflow(array3).numpy().tolist() == [3, 1, 4, 1, 9, 2, 6]
+
+
+array1 = tf.constant([[1.0, -1.0], [1.0, -1.0]], dtype=tf.float32)
+array2 = tf.constant(np.array([[1, 2, 3], [4, 5, 6]]))
+
+
+def test_from_tensorflow():
+    # Awkward.to_list() == Tensor.numpy().tolist()
+    assert from_tensorflow(array1).to_list() == array1.numpy().tolist()
+
+    assert from_tensorflow(array2).to_list() == [[1, 2, 3], [4, 5, 6]]
+
+    # test that the data types are remaining the same (int64 in this case)
+    assert from_tensorflow(array1).layout.dtype.name in str(array1.dtype)
+
+    # test that the data types are remaining the same (float32 in this case)
+    assert from_tensorflow(array2).layout.dtype.name in str(array2.dtype)