From 698b015ed5e6b43262b7929cbe9033b7f8bdc09b Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Tue, 14 May 2024 17:10:07 -0500 Subject: [PATCH] fix: prevent exponential memory growth in UnionArray --- src/awkward/contents/unionarray.py | 4 +-- ...st_2713_from_buffers_allow_noncanonical.py | 7 ++-- ...exponential_memory_growth_in_unionarray.py | 34 +++++++++++++++++++ 3 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 tests/test_3118_prevent_exponential_memory_growth_in_unionarray.py diff --git a/src/awkward/contents/unionarray.py b/src/awkward/contents/unionarray.py index bb3b6ca929..213b5c84b1 100644 --- a/src/awkward/contents/unionarray.py +++ b/src/awkward/contents/unionarray.py @@ -431,7 +431,7 @@ def simplified( ] if len(contents) == 1: - next = contents[0]._carry(index, True) + next = contents[0]._carry(index, False) return next.copy(parameters=parameters_union(next._parameters, parameters)) else: @@ -702,7 +702,7 @@ def project(self, index): nextcarry = ak.index.Index64( tmpcarry.data[: lenout[0]], nplike=self._backend.index_nplike ) - return self._contents[index]._carry(nextcarry, True) + return self._contents[index]._carry(nextcarry, False) @staticmethod def regular_index( diff --git a/tests/test_2713_from_buffers_allow_noncanonical.py b/tests/test_2713_from_buffers_allow_noncanonical.py index 21de9bb510..58df61bf8b 100644 --- a/tests/test_2713_from_buffers_allow_noncanonical.py +++ b/tests/test_2713_from_buffers_allow_noncanonical.py @@ -120,9 +120,10 @@ def test_union_simplification(): projected = ak.from_buffers( projected_form, length, container, allow_noncanonical_form=True ) + assert projected.layout.form.to_dict(verbose=False) == { - "class": "IndexedArray", - "index": "i64", - "content": {"class": "RecordArray", "fields": ["x"], "contents": ["int64"]}, + "class": "RecordArray", + "fields": ["x"], + "contents": ["int64"], } assert ak.almost_equal(array[["x"]], projected) diff --git a/tests/test_3118_prevent_exponential_memory_growth_in_unionarray.py b/tests/test_3118_prevent_exponential_memory_growth_in_unionarray.py new file mode 100644 index 0000000000..38eca227fd --- /dev/null +++ b/tests/test_3118_prevent_exponential_memory_growth_in_unionarray.py @@ -0,0 +1,34 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE + +from __future__ import annotations + +import awkward as ak + + +def test(): + one_a = ak.Array([{"x": 1, "y": 2}], with_name="T") + one_b = ak.Array([{"x": 1, "y": 2}], with_name="T") + two_a = ak.Array([{"x": 1, "z": 3}], with_name="T") + two_b = ak.Array([{"x": 1, "z": 3}], with_name="T") + three = ak.Array([{"x": 4}, {"x": 4}], with_name="T") + + first = ak.zip({"a": one_a, "b": one_b}) + second = ak.zip({"a": two_a, "b": two_b}) + + cat = ak.concatenate([first, second], axis=0) + + cat["another"] = three + + def check(layout): + if hasattr(layout, "contents"): + for x in layout.contents: + check(x) + elif hasattr(layout, "content"): + check(layout.content) + else: + assert layout.length <= 2 + + for _ in range(5): + check(cat.layout) + + cat["another", "w"] = three.x