From a94fc2d87d124bb1ad694c40db258599f6124885 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 21 Mar 2023 11:23:01 +0000 Subject: [PATCH 01/13] docs: remove unneeded docstring --- src/awkward/operations/ak_cartesian.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/awkward/operations/ak_cartesian.py b/src/awkward/operations/ak_cartesian.py index ad6327941b..c078d59da3 100644 --- a/src/awkward/operations/ak_cartesian.py +++ b/src/awkward/operations/ak_cartesian.py @@ -177,10 +177,7 @@ def cartesian( [(4, 3.3, 'a'), (4, 3.3, 'b')]] The order of the output is fixed: it is always lexicographical in the - order that the `arrays` are written. (Before Python 3.6, the order of - keys in a dict were not guaranteed, so the dict interface is not - recommended for these versions of Python.) Thus, it is not possible to - group by `three` in the example above. + order that the `arrays` are written. To emulate an SQL or Pandas "group by" operation, put the keys that you wish to group by *first* and use `nested=[0]` or `nested=[n]` to group by From f07f59c7b4110d08a315e556d73db3b51a436491 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 21 Mar 2023 11:23:11 +0000 Subject: [PATCH 02/13] fix: typo in error --- src/awkward/operations/ak_cartesian.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/awkward/operations/ak_cartesian.py b/src/awkward/operations/ak_cartesian.py index c078d59da3..6e0e65d147 100644 --- a/src/awkward/operations/ak_cartesian.py +++ b/src/awkward/operations/ak_cartesian.py @@ -283,7 +283,7 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): ): raise ak._errors.wrap_error( ValueError( - "the 'nested' prarmeter of cartesian must be integers in " + "the 'nested' parameter of cartesian must be integers in " "[0, len(arrays) - 1) for an iterable of arrays" ) ) From 317c823af2a03a7f509a10568342487b133098dd Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 21 Mar 2023 11:23:25 +0000 Subject: [PATCH 03/13] fix: bug in nested validation --- src/awkward/operations/ak_cartesian.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/awkward/operations/ak_cartesian.py b/src/awkward/operations/ak_cartesian.py index 6e0e65d147..ccdfa7e1d0 100644 --- a/src/awkward/operations/ak_cartesian.py +++ b/src/awkward/operations/ak_cartesian.py @@ -257,7 +257,7 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): if isinstance(new_arrays, dict): if nested is True: nested = list(new_arrays.keys()) # last key is ignored below - if any(not (isinstance(n, str) and n in new_arrays) for x in nested): + if any(not (isinstance(x, str) and x in new_arrays) for x in nested): raise ak._errors.wrap_error( ValueError( "the 'nested' parameter of cartesian must be dict keys " From 68a699d895af897e68c6ab7f5ed8a087b83bfa77 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 21 Mar 2023 12:05:37 +0000 Subject: [PATCH 04/13] fix: add error for incorrect nesting --- src/awkward/operations/ak_cartesian.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/awkward/operations/ak_cartesian.py b/src/awkward/operations/ak_cartesian.py index ccdfa7e1d0..1b88ee68a0 100644 --- a/src/awkward/operations/ak_cartesian.py +++ b/src/awkward/operations/ak_cartesian.py @@ -292,7 +292,13 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): for x in new_arrays: layouts.append(x) - layouts = list(layouts) + if len(nested) >= len(layouts): + raise ak._errors.wrap_error( + ValueError( + "the `nested` parameter of cartesian must contain " + "fewer items than there are arrays" + ) + ) indexes = [ ak.index.Index64(backend.index_nplike.reshape(x, (-1,))) From 4379ecb3a1a59d96ebf448a9299430557e4309ae Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 21 Mar 2023 12:22:58 +0000 Subject: [PATCH 05/13] refactor: simplify validation --- src/awkward/operations/ak_cartesian.py | 154 +++++++++++-------------- 1 file changed, 69 insertions(+), 85 deletions(-) diff --git a/src/awkward/operations/ak_cartesian.py b/src/awkward/operations/ak_cartesian.py index 1b88ee68a0..187151393c 100644 --- a/src/awkward/operations/ak_cartesian.py +++ b/src/awkward/operations/ak_cartesian.py @@ -240,6 +240,8 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): new_arrays_values = new_arrays posaxis = maybe_posaxis(new_arrays_values[0], axis, 1) + + # Validate `posaxis` if posaxis is None or posaxis < 0: raise ak._errors.wrap_error(ValueError("negative axis depth is ambiguous")) for x in new_arrays_values[1:]: @@ -250,13 +252,16 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): ) ) - if posaxis == 0: - if nested is None or nested is False: - nested = [] - + # Validate `nested` + if nested is None or nested is False: + nested = [] + elif nested is True: + if isinstance(new_arrays, dict): + nested = list(new_arrays.keys())[:-1] + else: + nested = list(range(len(new_arrays))[:-1]) + else: if isinstance(new_arrays, dict): - if nested is True: - nested = list(new_arrays.keys()) # last key is ignored below if any(not (isinstance(x, str) and x in new_arrays) for x in nested): raise ak._errors.wrap_error( ValueError( @@ -264,19 +269,14 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): "for a dict of arrays" ) ) - fields = [] - layouts = [] - tonested = [] - for i, (n, x) in enumerate(new_arrays.items()): - fields.append(n) - layouts.append(x) - if n in nested: - tonested.append(i) - nested = tonested - + if len(nested) >= len(new_arrays): + raise ak._errors.wrap_error( + ValueError( + "the `nested` parameter of cartesian must contain " + "fewer items than there are arrays" + ) + ) else: - if nested is True: - nested = list(range(len(new_arrays) - 1)) if any( not (isinstance(x, int) and 0 <= x < len(new_arrays) - 1) for x in nested @@ -287,19 +287,25 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): "[0, len(arrays) - 1) for an iterable of arrays" ) ) + + if posaxis == 0: + if isinstance(new_arrays, dict): + fields = [] + layouts = [] + tonested = [] + for i, (n, x) in enumerate(new_arrays.items()): + fields.append(n) + layouts.append(x) + if n in nested: + tonested.append(i) + nested = tonested + + else: fields = None layouts = [] for x in new_arrays: layouts.append(x) - if len(nested) >= len(layouts): - raise ak._errors.wrap_error( - ValueError( - "the `nested` parameter of cartesian must contain " - "fewer items than there are arrays" - ) - ) - indexes = [ ak.index.Index64(backend.index_nplike.reshape(x, (-1,))) for x in backend.index_nplike.meshgrid( @@ -308,8 +314,7 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): ) ] outs = [ - ak.contents.IndexedArray.simplified(x, y) - for x, y in __builtins__["zip"](indexes, layouts) + ak.contents.IndexedArray.simplified(x, y) for x, y in zip(indexes, layouts) ] result = ak.contents.RecordArray(outs, fields, parameters=parameters) @@ -325,82 +330,61 @@ def newaxis(layout, i): else: return ak.contents.RegularArray(newaxis(layout, i - 1), 1, 0) - def getgetfunction1(i, **kwargs): - def getfunction1(layout, depth, **kwargs): - if depth == 2: - return newaxis(layout, i) - else: - return None - - return getfunction1 - - def getgetfunction2(i, **kwargs): - def getfunction2(layout, depth, **kwargs): - if depth == posaxis: - inside = len(new_arrays) - i - 1 - outside = i - if ( - layout.parameter("__array__") == "string" - or layout.parameter("__array__") == "bytestring" - ): - raise ak._errors.wrap_error( - ValueError( - "ak.cartesian does not compute combinations of the " - "characters of a string; please split it into lists" - ) + def getfunction1(layout, depth, lateral_context, **kwargs): + i = lateral_context["i"] + if depth == 2: + return newaxis(layout, i) + else: + return None + + def getfunction2(layout, depth, lateral_context, **kwargs): + i = lateral_context["i"] + if depth == posaxis: + inside = len(new_arrays) - i - 1 + outside = i + if ( + layout.parameter("__array__") == "string" + or layout.parameter("__array__") == "bytestring" + ): + raise ak._errors.wrap_error( + ValueError( + "ak.cartesian does not compute combinations of the " + "characters of a string; please split it into lists" ) - nextlayout = ak._do.recursively_apply( - layout, getgetfunction1(inside), behavior ) - return newaxis(nextlayout, outside) - else: - return None - - return getfunction2 - - def apply(x, i): - layout = ak.operations.to_layout(x, allow_record=False, allow_other=False) - return ak._do.recursively_apply(layout, getgetfunction2(i), behavior) + nextlayout = ak._do.recursively_apply( + layout, getfunction1, behavior, lateral_context={"i": inside} + ) + return newaxis(nextlayout, outside) + else: + return None toflatten = [] if nested is None or nested is False: nested = [] if isinstance(new_arrays, dict): - if nested is True: - nested = list(new_arrays.keys()) # last key is ignored below - if any(not (isinstance(n, str) and n in new_arrays) for x in nested): - raise ak._errors.wrap_error( - ValueError( - "the 'nested' parameter of cartesian must be dict keys " - "for a dict of arrays" - ) - ) fields = [] layouts = [] for i, (n, x) in enumerate(new_arrays.items()): fields.append(n) - layouts.append(apply(x, i)) + layouts.append( + ak._do.recursively_apply( + x, getfunction2, behavior, lateral_context={"i": i} + ) + ) if i < len(new_arrays) - 1 and n not in nested: toflatten.append(posaxis + i + 1) else: - if nested is True: - nested = list(range(len(new_arrays) - 1)) - if any( - not (isinstance(x, int) and 0 <= x < len(new_arrays) - 1) - for x in nested - ): - raise ak._errors.wrap_error( - ValueError( - "the 'nested' parameter of cartesian must be integers in " - "[0, len(arrays) - 1) for an iterable of arrays" - ) - ) fields = None layouts = [] for i, x in enumerate(new_arrays): - layouts.append(apply(x, i)) + layouts.append( + ak._do.recursively_apply( + x, getfunction2, behavior, lateral_context={"i": i} + ) + ) if i < len(new_arrays) - 1 and i not in nested: toflatten.append(posaxis + i + 1) From 7b2b348afe1bc9323d62dddee6e59239da3e45c5 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 21 Mar 2023 12:29:21 +0000 Subject: [PATCH 06/13] refactor: simplify logic --- src/awkward/operations/ak_cartesian.py | 105 ++++++++++++------------- 1 file changed, 49 insertions(+), 56 deletions(-) diff --git a/src/awkward/operations/ak_cartesian.py b/src/awkward/operations/ak_cartesian.py index 187151393c..7f7d12c42a 100644 --- a/src/awkward/operations/ak_cartesian.py +++ b/src/awkward/operations/ak_cartesian.py @@ -209,23 +209,23 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): if isinstance(arrays, dict): backend = ak._backends.backend_of(*arrays.values(), default=cpu) behavior = behavior_of(*arrays.values(), behavior=behavior) - new_arrays = {} - for n, x in arrays.items(): - new_arrays[n] = ak.operations.to_layout( - x, allow_record=False, allow_other=False + array_layouts = { + name: ak.operations.to_layout( + layout, allow_record=False, allow_other=False ).to_backend(backend) + for name, layout in arrays.items() + } else: arrays = list(arrays) backend = ak._backends.backend_of(*arrays, default=cpu) behavior = behavior_of(*arrays, behavior=behavior) - new_arrays = [] - for x in arrays: - new_arrays.append( - ak.operations.to_layout( - x, allow_record=False, allow_other=False - ).to_backend(backend) - ) + array_layouts = [ + ak.operations.to_layout( + layout, allow_record=False, allow_other=False + ).to_backend(backend) + for layout in arrays + ] if with_name is not None: if parameters is None: @@ -234,18 +234,18 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): parameters = dict(parameters) parameters["__record__"] = with_name - if isinstance(new_arrays, dict): - new_arrays_values = list(new_arrays.values()) + if isinstance(array_layouts, dict): + layouts = list(array_layouts.values()) else: - new_arrays_values = new_arrays + layouts = array_layouts - posaxis = maybe_posaxis(new_arrays_values[0], axis, 1) + posaxis = maybe_posaxis(layouts[0], axis, 1) # Validate `posaxis` if posaxis is None or posaxis < 0: raise ak._errors.wrap_error(ValueError("negative axis depth is ambiguous")) - for x in new_arrays_values[1:]: - if maybe_posaxis(x, axis, 1) != posaxis: + for layout in layouts[1:]: + if maybe_posaxis(layout, axis, 1) != posaxis: raise ak._errors.wrap_error( ValueError( "arrays to cartesian-product do not have the same depth for negative axis" @@ -256,20 +256,20 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): if nested is None or nested is False: nested = [] elif nested is True: - if isinstance(new_arrays, dict): - nested = list(new_arrays.keys())[:-1] + if isinstance(array_layouts, dict): + nested = list(array_layouts.keys())[:-1] else: - nested = list(range(len(new_arrays))[:-1]) + nested = list(range(len(array_layouts))[:-1]) else: - if isinstance(new_arrays, dict): - if any(not (isinstance(x, str) and x in new_arrays) for x in nested): + if isinstance(array_layouts, dict): + if any(not (isinstance(x, str) and x in array_layouts) for x in nested): raise ak._errors.wrap_error( ValueError( "the 'nested' parameter of cartesian must be dict keys " "for a dict of arrays" ) ) - if len(nested) >= len(new_arrays): + if len(nested) >= len(array_layouts): raise ak._errors.wrap_error( ValueError( "the `nested` parameter of cartesian must contain " @@ -278,7 +278,7 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): ) else: if any( - not (isinstance(x, int) and 0 <= x < len(new_arrays) - 1) + not (isinstance(x, int) and 0 <= x < len(array_layouts) - 1) for x in nested ): raise ak._errors.wrap_error( @@ -289,22 +289,17 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): ) if posaxis == 0: - if isinstance(new_arrays, dict): + if isinstance(array_layouts, dict): fields = [] - layouts = [] tonested = [] - for i, (n, x) in enumerate(new_arrays.items()): - fields.append(n) - layouts.append(x) - if n in nested: + for i, (name, _) in enumerate(array_layouts.items()): + fields.append(name) + if name in nested: tonested.append(i) nested = tonested else: fields = None - layouts = [] - for x in new_arrays: - layouts.append(x) indexes = [ ak.index.Index64(backend.index_nplike.reshape(x, (-1,))) @@ -318,7 +313,7 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): ] result = ak.contents.RecordArray(outs, fields, parameters=parameters) - for i in range(len(new_arrays) - 1, -1, -1): + for i in range(len(array_layouts))[::-1]: if i in nested: result = ak.contents.RegularArray(result, len(layouts[i + 1]), 0) @@ -340,7 +335,7 @@ def getfunction1(layout, depth, lateral_context, **kwargs): def getfunction2(layout, depth, lateral_context, **kwargs): i = lateral_context["i"] if depth == posaxis: - inside = len(new_arrays) - i - 1 + inside = len(array_layouts) - i - 1 outside = i if ( layout.parameter("__array__") == "string" @@ -359,37 +354,35 @@ def getfunction2(layout, depth, lateral_context, **kwargs): else: return None - toflatten = [] - if nested is None or nested is False: - nested = [] + axes_to_flatten = [] - if isinstance(new_arrays, dict): + if isinstance(array_layouts, dict): fields = [] - layouts = [] - for i, (n, x) in enumerate(new_arrays.items()): - fields.append(n) - layouts.append( + new_layouts = [] + for i, (name, layout) in enumerate(array_layouts.items()): + fields.append(name) + new_layouts.append( ak._do.recursively_apply( - x, getfunction2, behavior, lateral_context={"i": i} + layout, getfunction2, behavior, lateral_context={"i": i} ) ) - if i < len(new_arrays) - 1 and n not in nested: - toflatten.append(posaxis + i + 1) + if i < len(array_layouts) - 1 and name not in nested: + axes_to_flatten.append(posaxis + i + 1) else: fields = None - layouts = [] - for i, x in enumerate(new_arrays): - layouts.append( + new_layouts = [] + for i, layout in enumerate(array_layouts): + new_layouts.append( ak._do.recursively_apply( - x, getfunction2, behavior, lateral_context={"i": i} + layout, getfunction2, behavior, lateral_context={"i": i} ) ) - if i < len(new_arrays) - 1 and i not in nested: - toflatten.append(posaxis + i + 1) + if i < len(array_layouts) - 1 and i not in nested: + axes_to_flatten.append(posaxis + i + 1) def getfunction3(inputs, depth, **kwargs): - if depth == posaxis + len(new_arrays): + if depth == posaxis + len(array_layouts): if all(len(x) == 0 for x in inputs): inputs = [ x.content @@ -403,13 +396,13 @@ def getfunction3(inputs, depth, **kwargs): return None out = ak._broadcasting.broadcast_and_apply( - layouts, getfunction3, behavior, right_broadcast=False + new_layouts, getfunction3, behavior, right_broadcast=False ) assert isinstance(out, tuple) and len(out) == 1 result = out[0] - while len(toflatten) != 0: - flatten_axis = toflatten.pop() + while len(axes_to_flatten) != 0: + flatten_axis = axes_to_flatten.pop() result = ak.operations.flatten( result, axis=flatten_axis, highlevel=False, behavior=behavior ) From 013b2d567171bbdaa7c2ab1750ce11a124b87375 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 21 Mar 2023 13:19:54 +0000 Subject: [PATCH 07/13] refactor: explain logic --- src/awkward/operations/ak_cartesian.py | 97 ++++++++++++++++---------- 1 file changed, 61 insertions(+), 36 deletions(-) diff --git a/src/awkward/operations/ak_cartesian.py b/src/awkward/operations/ak_cartesian.py index 7f7d12c42a..a5b28f6e97 100644 --- a/src/awkward/operations/ak_cartesian.py +++ b/src/awkward/operations/ak_cartesian.py @@ -315,28 +315,43 @@ def _impl(arrays, axis, nested, parameters, with_name, highlevel, behavior): result = ak.contents.RecordArray(outs, fields, parameters=parameters) for i in range(len(array_layouts))[::-1]: if i in nested: - result = ak.contents.RegularArray(result, len(layouts[i + 1]), 0) + result = ak.contents.RegularArray(result, layouts[i + 1].length, 0) else: - def newaxis(layout, i): - if i == 0: + def add_outer_dimensions( + layout: ak.contents.Content, n: int + ) -> ak.contents.Content: + if n == 0: return layout else: - return ak.contents.RegularArray(newaxis(layout, i - 1), 1, 0) + return ak.contents.RegularArray( + add_outer_dimensions(layout, n - 1), 1, 0 + ) - def getfunction1(layout, depth, lateral_context, **kwargs): - i = lateral_context["i"] + def apply_pad_inner_list(layout, depth, lateral_context, **kwargs): + """ + Add new dimensions (given by lateral_context["n"]) above innermost list + """ + n = lateral_context["n"] + # We want to be above at least one dimension (list) if depth == 2: - return newaxis(layout, i) + return add_outer_dimensions(layout, n) else: return None - def getfunction2(layout, depth, lateral_context, **kwargs): + def apply_pad_inner_list_at_axis(layout, depth, lateral_context, **kwargs): + """ + Each array in arrays contributes to one of these new dimensions. + To make the cartesian product of the given arrays broadcastable, + each array is padded by (n, m) new length-1 regular dimensions + (above, below) the target depth. The values of (n, m) are given by + the position of the array; the first array is the outermost axis. + """ i = lateral_context["i"] if depth == posaxis: - inside = len(array_layouts) - i - 1 - outside = i + n_inside = len(array_layouts) - i - 1 + n_outside = i if ( layout.parameter("__array__") == "string" or layout.parameter("__array__") == "bytestring" @@ -348,40 +363,50 @@ def getfunction2(layout, depth, lateral_context, **kwargs): ) ) nextlayout = ak._do.recursively_apply( - layout, getfunction1, behavior, lateral_context={"i": inside} + layout, + apply_pad_inner_list, + behavior, + lateral_context={"n": n_inside}, ) - return newaxis(nextlayout, outside) + return add_outer_dimensions(nextlayout, n_outside) else: return None - axes_to_flatten = [] + # New _interior_ axes are added to the result layout, but + # unless explicitly named, these axes should be flattened. + axes_to_flatten = [ + posaxis + i + 1 + for i, _ in enumerate(array_layouts) + if i < len(array_layouts) - 1 and i not in nested + ] + # This list *must* be sorted in reverse order + axes_to_flatten.reverse() if isinstance(array_layouts, dict): - fields = [] - new_layouts = [] - for i, (name, layout) in enumerate(array_layouts.items()): - fields.append(name) - new_layouts.append( - ak._do.recursively_apply( - layout, getfunction2, behavior, lateral_context={"i": i} - ) + fields = list(array_layouts.keys()) + new_layouts = [ + ak._do.recursively_apply( + layout, + apply_pad_inner_list_at_axis, + behavior, + lateral_context={"i": i}, ) - if i < len(array_layouts) - 1 and name not in nested: - axes_to_flatten.append(posaxis + i + 1) + for i, (_, layout) in enumerate(array_layouts.items()) + ] else: fields = None - new_layouts = [] - for i, layout in enumerate(array_layouts): - new_layouts.append( - ak._do.recursively_apply( - layout, getfunction2, behavior, lateral_context={"i": i} - ) + new_layouts = [ + ak._do.recursively_apply( + layout, + apply_pad_inner_list_at_axis, + behavior, + lateral_context={"i": i}, ) - if i < len(array_layouts) - 1 and i not in nested: - axes_to_flatten.append(posaxis + i + 1) + for i, layout in enumerate(array_layouts) + ] - def getfunction3(inputs, depth, **kwargs): + def apply_build_record(inputs, depth, **kwargs): if depth == posaxis + len(array_layouts): if all(len(x) == 0 for x in inputs): inputs = [ @@ -396,15 +421,15 @@ def getfunction3(inputs, depth, **kwargs): return None out = ak._broadcasting.broadcast_and_apply( - new_layouts, getfunction3, behavior, right_broadcast=False + new_layouts, apply_build_record, behavior, right_broadcast=False ) assert isinstance(out, tuple) and len(out) == 1 result = out[0] - while len(axes_to_flatten) != 0: - flatten_axis = axes_to_flatten.pop() + # Remove surplus dimensions, iterating from smallest to greatest + for axis in axes_to_flatten: result = ak.operations.flatten( - result, axis=flatten_axis, highlevel=False, behavior=behavior + result, axis=axis, highlevel=False, behavior=behavior ) return wrap_layout(result, behavior, highlevel) From 9071194a681176d043c7e2f92fcd525df361e1be Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 21 Mar 2023 13:57:06 +0000 Subject: [PATCH 08/13] fix: support regular-array in broadcasting branch --- src/awkward/_broadcasting.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/awkward/_broadcasting.py b/src/awkward/_broadcasting.py index d34c01d628..fb717b86bd 100644 --- a/src/awkward/_broadcasting.py +++ b/src/awkward/_broadcasting.py @@ -841,9 +841,14 @@ def continuation(): else: lencontent = backend.index_nplike.max(stops) nextinputs.append(x.content[:lencontent]) - + elif isinstance(x, RegularArray): + nextinputs.append(x.content[: x.size * x.length]) else: - nextinputs.append(x) + raise ak._errors.wrap_error( + AssertionError( + "encountered non list-type despite all_same_offsets requiring lists" + ) + ) outcontent = apply_step( backend, From daee0a939c9c64a150c57d54e1b49a483ca51a33 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 21 Mar 2023 13:57:51 +0000 Subject: [PATCH 09/13] fix: drop special case in cartesian --- src/awkward/operations/ak_cartesian.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/awkward/operations/ak_cartesian.py b/src/awkward/operations/ak_cartesian.py index a5b28f6e97..fca63ee6be 100644 --- a/src/awkward/operations/ak_cartesian.py +++ b/src/awkward/operations/ak_cartesian.py @@ -408,13 +408,6 @@ def apply_pad_inner_list_at_axis(layout, depth, lateral_context, **kwargs): def apply_build_record(inputs, depth, **kwargs): if depth == posaxis + len(array_layouts): - if all(len(x) == 0 for x in inputs): - inputs = [ - x.content - if isinstance(x, ak.contents.RegularArray) and x.size == 1 - else x - for x in inputs - ] return (ak.contents.RecordArray(inputs, fields, parameters=parameters),) else: From 417cf5aa0093a768cad3836da20557e15ce4ebca Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 21 Mar 2023 13:59:34 +0000 Subject: [PATCH 10/13] fix: revert strict assertion (mistake) --- src/awkward/_broadcasting.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/awkward/_broadcasting.py b/src/awkward/_broadcasting.py index fb717b86bd..eb9a09e6fb 100644 --- a/src/awkward/_broadcasting.py +++ b/src/awkward/_broadcasting.py @@ -844,11 +844,7 @@ def continuation(): elif isinstance(x, RegularArray): nextinputs.append(x.content[: x.size * x.length]) else: - raise ak._errors.wrap_error( - AssertionError( - "encountered non list-type despite all_same_offsets requiring lists" - ) - ) + nextinputs.append(x) outcontent = apply_step( backend, From 1488c101c314abe79f230e075705a710fe01afd3 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 21 Mar 2023 14:05:42 +0000 Subject: [PATCH 11/13] test: add missing test branches --- .../test_2329_cartesian_broadcasting_fixes.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 tests/test_2329_cartesian_broadcasting_fixes.py diff --git a/tests/test_2329_cartesian_broadcasting_fixes.py b/tests/test_2329_cartesian_broadcasting_fixes.py new file mode 100644 index 0000000000..6b6a228779 --- /dev/null +++ b/tests/test_2329_cartesian_broadcasting_fixes.py @@ -0,0 +1,66 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import numpy as np +import pytest # noqa: F401 + +import awkward as ak + + +def test_nested_exis_0(): + arrays = {"x": np.arange(4), "y": ["this", "that", "foo", "bar!"]} + + result = ak.cartesian(arrays, nested=True, axis=0) + assert result.to_list() == [ + [ + {"x": 0, "y": "this"}, + {"x": 0, "y": "that"}, + {"x": 0, "y": "foo"}, + {"x": 0, "y": "bar!"}, + ], + [ + {"x": 1, "y": "this"}, + {"x": 1, "y": "that"}, + {"x": 1, "y": "foo"}, + {"x": 1, "y": "bar!"}, + ], + [ + {"x": 2, "y": "this"}, + {"x": 2, "y": "that"}, + {"x": 2, "y": "foo"}, + {"x": 2, "y": "bar!"}, + ], + [ + {"x": 3, "y": "this"}, + {"x": 3, "y": "that"}, + {"x": 3, "y": "foo"}, + {"x": 3, "y": "bar!"}, + ], + ] + + result = ak.cartesian(arrays, nested=["x"], axis=0) + assert result.to_list() == [ + [ + {"x": 0, "y": "this"}, + {"x": 0, "y": "that"}, + {"x": 0, "y": "foo"}, + {"x": 0, "y": "bar!"}, + ], + [ + {"x": 1, "y": "this"}, + {"x": 1, "y": "that"}, + {"x": 1, "y": "foo"}, + {"x": 1, "y": "bar!"}, + ], + [ + {"x": 2, "y": "this"}, + {"x": 2, "y": "that"}, + {"x": 2, "y": "foo"}, + {"x": 2, "y": "bar!"}, + ], + [ + {"x": 3, "y": "this"}, + {"x": 3, "y": "that"}, + {"x": 3, "y": "foo"}, + {"x": 3, "y": "bar!"}, + ], + ] From e534d0bfd80c4f9c3de33b6a4d8fb1e0a3b611f9 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 21 Mar 2023 14:13:20 +0000 Subject: [PATCH 12/13] fix: include endpoint --- src/awkward/_broadcasting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/awkward/_broadcasting.py b/src/awkward/_broadcasting.py index eb9a09e6fb..baf0d940e6 100644 --- a/src/awkward/_broadcasting.py +++ b/src/awkward/_broadcasting.py @@ -160,7 +160,7 @@ def all_same_offsets(backend: ak._backends.Backend, inputs: list) -> bool: my_offsets = index_nplike.empty(0, dtype=np.int64) else: my_offsets = index_nplike.arange( - 0, x.content.length, x.size, dtype=np.int64 + 0, x.content.length + 1, x.size, dtype=np.int64 ) if offsets is None: From da5c4faf12b52793650ab9feddb11348a9ab6d05 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 21 Mar 2023 14:36:28 +0000 Subject: [PATCH 13/13] chore: run pylint --- src/awkward/operations/ak_cartesian.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/awkward/operations/ak_cartesian.py b/src/awkward/operations/ak_cartesian.py index fca63ee6be..e700a884be 100644 --- a/src/awkward/operations/ak_cartesian.py +++ b/src/awkward/operations/ak_cartesian.py @@ -420,9 +420,9 @@ def apply_build_record(inputs, depth, **kwargs): result = out[0] # Remove surplus dimensions, iterating from smallest to greatest - for axis in axes_to_flatten: + for axis_to_flatten in axes_to_flatten: result = ak.operations.flatten( - result, axis=axis, highlevel=False, behavior=behavior + result, axis=axis_to_flatten, highlevel=False, behavior=behavior ) return wrap_layout(result, behavior, highlevel)