diff --git a/python/ray/air/tests/test_tensor_extension.py b/python/ray/air/tests/test_tensor_extension.py index b5c18256a108..670dc3e33466 100644 --- a/python/ray/air/tests/test_tensor_extension.py +++ b/python/ray/air/tests/test_tensor_extension.py @@ -676,14 +676,6 @@ def test_variable_shaped_tensor_array_uniform_dim(): np.testing.assert_array_equal(a, expected) -def test_large_arrow_tensor_array(): - test_arr = np.ones((1000, 550), dtype=np.uint8) - ta = ArrowTensorArray.from_numpy([test_arr] * 4000) - assert len(ta) == 4000 - for arr in ta: - assert np.asarray(arr).shape == (1000, 550) - - if __name__ == "__main__": import sys diff --git a/python/ray/air/util/tensor_extensions/arrow.py b/python/ray/air/util/tensor_extensions/arrow.py index 0c912fcfd9ce..ab7b3a247788 100644 --- a/python/ray/air/util/tensor_extensions/arrow.py +++ b/python/ray/air/util/tensor_extensions/arrow.py @@ -105,7 +105,7 @@ def __init__(self, shape: Tuple[int, ...], dtype: pa.DataType): dtype: pyarrow dtype of tensor elements. """ self._shape = shape - super().__init__(pa.large_list(dtype), "ray.data.arrow_tensor") + super().__init__(pa.list_(dtype), "ray.data.arrow_tensor") @property def shape(self): @@ -316,7 +316,7 @@ class ArrowTensorArray(_ArrowTensorScalarIndexingMixin, pa.ExtensionArray): https://arrow.apache.org/docs/python/extending_types.html#custom-extension-array-class """ - OFFSET_DTYPE = np.int64 + OFFSET_DTYPE = np.int32 @classmethod def from_numpy( @@ -414,7 +414,7 @@ def _from_numpy( ) storage = pa.Array.from_buffers( - pa.large_list(pa_dtype), + pa.list_(pa_dtype), outer_len, [None, offset_buffer], children=[data_array], @@ -612,9 +612,7 @@ def __init__(self, dtype: pa.DataType, ndim: int): """ self._ndim = ndim super().__init__( - pa.struct( - [("data", pa.large_list(dtype)), ("shape", pa.list_(pa.int64()))] - ), + pa.struct([("data", pa.list_(dtype)), ("shape", pa.list_(pa.int64()))]), "ray.data.arrow_variable_shaped_tensor", ) @@ -721,7 +719,7 @@ class ArrowVariableShapedTensorArray( https://arrow.apache.org/docs/python/extending_types.html#custom-extension-array-class """ - OFFSET_DTYPE = np.int64 + OFFSET_DTYPE = np.int32 @classmethod def from_numpy( @@ -811,7 +809,7 @@ def from_numpy( # corresponds to a tensor element. size_offsets = np.insert(size_offsets, 0, 0) offset_array = pa.array(size_offsets) - data_array = pa.LargeListArray.from_arrays(offset_array, value_array) + data_array = pa.ListArray.from_arrays(offset_array, value_array) # We store the tensor element shapes so we can reconstruct each tensor when # converting back to NumPy ndarrays. shape_array = pa.array(shapes) diff --git a/python/ray/data/tests/test_tensor.py b/python/ray/data/tests/test_tensor.py index 4ccd2fa05a4b..852142bf4150 100644 --- a/python/ray/data/tests/test_tensor.py +++ b/python/ray/data/tests/test_tensor.py @@ -42,9 +42,7 @@ def test_tensors_basic(ray_start_regular_shared): "Dataset(num_rows=6, schema={data: numpy.ndarray(shape=(3, 5), dtype=int64)})" ) # The actual size is slightly larger due to metadata. - # We add 6 (one per tensor) offset values of 8 bytes each to account for the - # in-memory representation of the PyArrow LargeList type - assert math.isclose(ds.size_bytes(), 5 * 3 * 6 * 8 + 6 * 8, rel_tol=0.1) + assert math.isclose(ds.size_bytes(), 5 * 3 * 6 * 8, rel_tol=0.1) # Test row iterator yields tensors. for tensor in ds.iter_rows():