Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: issue #3118 and #3185 #3193

17 changes: 17 additions & 0 deletions src/awkward/contents/indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,7 @@ def _mergemany(self, others: Sequence[Content]) -> Content:
if isinstance(
array, (ak.contents.IndexedOptionArray, ak.contents.IndexedArray)
):
array = array._trim() # see: #3185 and #3119
parameters = parameters_intersect(parameters, array._parameters)

contents.append(array.content)
Expand Down Expand Up @@ -1177,3 +1178,19 @@ def _is_equal_to(
other.content, index_dtype, numpyarray, all_parameters
)
)

def _trim(self) -> Self:
nplike = self._backend.index_nplike

if not nplike.known_data or self._index.length == 0:
return self

idx_buf = nplike.asarray(self._index.data, copy=True)
min_idx = nplike.min(idx_buf)
max_idx = nplike.max(idx_buf)
idx_buf -= min_idx
index = Index(idx_buf)

# left and right trim
content = self._content._getitem_range(min_idx, max_idx + 1)
return IndexedArray(index, content, parameters=self._parameters)
23 changes: 23 additions & 0 deletions src/awkward/contents/indexedoptionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,7 @@ def _mergemany(self, others: Sequence[Content]) -> Content:
if isinstance(
array, (ak.contents.IndexedOptionArray, ak.contents.IndexedArray)
):
array = array._trim() # see: #3185 and #3119
# If we're merging an option, then merge parameters before pulling out `content`
parameters = parameters_intersect(parameters, array._parameters)
contents.append(array.content)
Expand Down Expand Up @@ -1767,6 +1768,28 @@ def _is_equal_to(
)
)

def _trim(self) -> Self:
nplike = self._backend.index_nplike

if not nplike.known_data or self._index.length == 0:
return self

idx_buf = nplike.asarray(self._index.data, copy=True)
only_positive = idx_buf >= 0

# no positive index at all
if not nplike.any(only_positive):
return self

min_idx = nplike.min(idx_buf[only_positive])
max_idx = nplike.max(idx_buf[only_positive])
jpivarski marked this conversation as resolved.
Show resolved Hide resolved
idx_buf[only_positive] -= min_idx
index = Index(idx_buf)

# left and right trim
content = self._content._getitem_range(min_idx, max_idx + 1)
return IndexedOptionArray(index, content, parameters=self._parameters)


def create_missing_data(dtype, backend):
"""Create missing data based on the input dtype
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/contents/unionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def simplified(
]

if len(contents) == 1:
next = contents[0]._carry(index, False)
next = contents[0]._carry(index, True)
return next.copy(parameters=parameters_union(next._parameters, parameters))

else:
Expand Down Expand Up @@ -702,7 +702,7 @@ def project(self, index):
nextcarry = ak.index.Index64(
tmpcarry.data[: lenout[0]], nplike=self._backend.index_nplike
)
return self._contents[index]._carry(nextcarry, False)
return self._contents[index]._carry(nextcarry, True)

@staticmethod
def regular_index(
Expand Down
6 changes: 3 additions & 3 deletions tests/test_2713_from_buffers_allow_noncanonical.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ def test_union_simplification():
)

assert projected.layout.form.to_dict(verbose=False) == {
"class": "RecordArray",
"fields": ["x"],
"contents": ["int64"],
"class": "IndexedArray",
"index": "i64",
"content": {"class": "RecordArray", "fields": ["x"], "contents": ["int64"]},
}
assert ak.almost_equal(array[["x"]], projected)
44 changes: 34 additions & 10 deletions tests/test_3118_prevent_exponential_memory_growth_in_unionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,17 @@
import awkward as ak


def test():
def check(layout, assert_length):
if hasattr(layout, "contents"):
for x in layout.contents:
check(x, assert_length)
elif hasattr(layout, "content"):
check(layout.content, assert_length)
else:
assert layout.length <= assert_length


def test_2arrays():
one_a = ak.Array([{"x": 1, "y": 2}], with_name="T")
one_b = ak.Array([{"x": 1, "y": 2}], with_name="T")
two_a = ak.Array([{"x": 1, "z": 3}], with_name="T")
Expand All @@ -19,16 +29,30 @@ def test():

cat["another"] = three

def check(layout):
if hasattr(layout, "contents"):
for x in layout.contents:
check(x)
elif hasattr(layout, "content"):
check(layout.content)
else:
assert layout.length <= 2
for _ in range(5):
check(cat.layout, 2)

cat["another", "w"] = three.x


def test_3arrays():
zero_a = ak.Array([{"x": 1, "y": 1}], with_name="T")
zero_b = ak.Array([{"x": 1, "v": 1}], with_name="T")
one_a = ak.Array([{"x": 1, "y": 2}], with_name="T")
one_b = ak.Array([{"x": 1, "y": 2}], with_name="T")
two_a = ak.Array([{"x": 1, "z": 3}], with_name="T")
two_b = ak.Array([{"x": 1, "z": 3}], with_name="T")
three = ak.Array([{"x": 4}, {"x": 4}, {"x": 4}], with_name="T")

zeroth = ak.zip({"a": zero_a, "b": zero_b})
first = ak.zip({"a": one_a, "b": one_b})
second = ak.zip({"a": two_a, "b": two_b})

cat = ak.concatenate([zeroth, first, second], axis=0)

cat["another"] = three

for _ in range(5):
check(cat.layout)
check(cat.layout, 3)

cat["another", "w"] = three.x
Loading