From 038fd92d8b307ffbdf31dc7b5a415df2271b6f20 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 1 Feb 2023 11:43:33 +0000 Subject: [PATCH 01/16] wip: initial stab at `merge_union_of_records` --- src/awkward/operations/__init__.py | 1 + .../operations/ak_merge_union_of_records.py | 154 ++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 src/awkward/operations/ak_merge_union_of_records.py diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py index c5976176e8..05270d41fc 100644 --- a/src/awkward/operations/__init__.py +++ b/src/awkward/operations/__init__.py @@ -47,6 +47,7 @@ from awkward.operations.ak_mask import mask from awkward.operations.ak_max import max, nanmax from awkward.operations.ak_mean import mean, nanmean +from awkward.operations.ak_merge_union_of_records import merge_union_of_records from awkward.operations.ak_metadata_from_parquet import metadata_from_parquet from awkward.operations.ak_min import min, nanmin from awkward.operations.ak_moment import moment diff --git a/src/awkward/operations/ak_merge_union_of_records.py b/src/awkward/operations/ak_merge_union_of_records.py new file mode 100644 index 0000000000..30ec65b4e8 --- /dev/null +++ b/src/awkward/operations/ak_merge_union_of_records.py @@ -0,0 +1,154 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + + +import awkward as ak +from awkward._nplikes.numpylike import NumpyMetadata + +np = NumpyMetadata.instance() +cpu = ak._backends.NumpyBackend.instance() + + +def merge_union_of_records(array, axis=-1, *, highlevel=True, behavior=None): + """ + Args: + array: Array-like data (anything #ak.to_layout recognizes). + axis (int): The dimension at which this operation is applied. + The outermost dimension is `0`, followed by `1`, etc., and negative + values count backward from the innermost: `-1` is the innermost + dimension, `-2` is the next level up, etc. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. + + Simplifies unions of records, e.g. + + >>> ak.concatenate(([{"a": 1}], [{"b": 2}])) + + + into records of options, i.e. + + >>> array = ak.Array([[1.1, None, 2.2], [], [None, 3.3, 4.4]]) + + """ + with ak._errors.OperationErrorContext( + "ak.merge_union_of_records", + dict(array=array, axis=axis, highlevel=highlevel, behavior=behavior), + ): + return _impl(array, axis, highlevel, behavior) + + +def _impl(array, axis, highlevel, behavior): + behavior = ak._util.behavior_of(array, behavior=behavior) + layout = ak.to_layout(array, allow_record=False) + + def apply_displace_index(layout, backend, **kwargs): + if layout.is_record: + return layout + elif (layout.is_option or layout.is_indexed) and layout.content.is_record: + # Transpose option-of-record to record-of-option + return ak.contents.RecordArray( + [layout.copy(content=c) for c in layout.content.contents], + layout.content.fields, + layout.content.length, + backend=backend, + ) + else: + raise ak._errors.wrap_error(TypeError(layout)) + + def apply(layout, depth, backend, **kwargs): + posaxis = ak._util.maybe_posaxis(layout, axis, depth) + if posaxis + 1 == depth and layout.is_union: + # First, find all ordered fields, regularising any index-of-record + # such that we have record-of-index + seen_fields = set() + all_fields = [] + regularised_contents = [] + for content in layout.contents: + # Ensure that we have record-of-index + regularised_content = ak._do.recursively_apply( + content, apply_displace_index + ) + regularised_contents.append(regularised_content) + + # Find new fields + for field in regularised_content.fields: + if field not in seen_fields: + seen_fields.add(field) + all_fields.append(field) + + # Build unions for each field + outer_field_contents = [] + for field in all_fields: + field_tags = backend.index_nplike.asarray(layout.tags, copy=True) + field_index = backend.index_nplike.asarray(layout.index, copy=True) + + # Build contents for union representing current field + field_contents = [ + c.content(field) for c in regularised_contents if c.has_field(field) + ] + + # Find the best location for option type. + # We will potentially have fewer contents in this per-field union + # than the original outer union-of-records, because some recordarrays + # may not have the given field. + tag_for_missing = 0 + for i, content in enumerate(field_contents): + if content.is_option: + tag_for_missing = i + break + + # If at least one recordarray doesn't have this field, we add + # a special option + if len(field_contents) < len(regularised_contents): + # Make the tagged content an option, growing by one to ensure we + # have a known `None` value to index into + tagged_content = field_contents[tag_for_missing] + indexedoption_index = backend.index_nplike.arange( + tagged_content.length + 1, dtype=np.int64 + ) + indexedoption_index[tagged_content.length] = -1 + field_contents[ + tag_for_missing + ] = ak.contents.IndexedOptionArray.simplified( + ak.index.Index64(indexedoption_index), tagged_content + ) + + # Now build contents for union, by looping over outermost index + # Overwrite tags to adjust for new contents length + # and use the tagged content for any missing values + k = 0 + for j, content in enumerate(regularised_contents): + tag_is_j = field_tags == j + + if content.has_field(field): + # Rewrite tags to account for missing fields + field_tags[tag_is_j] = k + k += 1 + + else: + # Rewrite tags to point to option content + field_tags[tag_is_j] = tag_for_missing + # Point each value to missing value + field_index[tag_is_j] = ( + field_contents[tag_for_missing].length - 1 + ) + + outer_field_contents.append( + ak.contents.UnionArray.simplified( + ak.index.Index8(field_tags), + ak.index.Index64(field_index), + field_contents, + ) + ) + return ak.contents.RecordArray( + outer_field_contents, all_fields, backend=backend + ) + + elif layout.is_leaf: + raise ak._errors.wrap_error( + np.AxisError(f"axis={axis} exceeds the depth of this array ({depth})") + ) + + out = ak._do.recursively_apply(layout, apply) + return ak._util.wrap(out, highlevel=highlevel, behavior=behavior) From 3e859e05d57db6af6af2a53486a5bda74b568435 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 1 Feb 2023 11:45:13 +0000 Subject: [PATCH 02/16] docs: fix docstring --- src/awkward/operations/ak_merge_union_of_records.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/awkward/operations/ak_merge_union_of_records.py b/src/awkward/operations/ak_merge_union_of_records.py index 30ec65b4e8..e068bed61d 100644 --- a/src/awkward/operations/ak_merge_union_of_records.py +++ b/src/awkward/operations/ak_merge_union_of_records.py @@ -23,12 +23,13 @@ def merge_union_of_records(array, axis=-1, *, highlevel=True, behavior=None): Simplifies unions of records, e.g. - >>> ak.concatenate(([{"a": 1}], [{"b": 2}])) + >>> array = ak.concatenate(([{"a": 1}], [{"b": 2}])) + >>> array into records of options, i.e. - >>> array = ak.Array([[1.1, None, 2.2], [], [None, 3.3, 4.4]]) + >>> ak.merge_union_of_records(array) """ with ak._errors.OperationErrorContext( From 75de7da9f8d70e1f1ecdf8ad6ffc017cf5ab87db Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 Feb 2023 11:45:36 +0000 Subject: [PATCH 03/16] style: pre-commit fixes --- src/awkward/operations/ak_merge_union_of_records.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/awkward/operations/ak_merge_union_of_records.py b/src/awkward/operations/ak_merge_union_of_records.py index e068bed61d..0d66cfb9d5 100644 --- a/src/awkward/operations/ak_merge_union_of_records.py +++ b/src/awkward/operations/ak_merge_union_of_records.py @@ -34,7 +34,7 @@ def merge_union_of_records(array, axis=-1, *, highlevel=True, behavior=None): """ with ak._errors.OperationErrorContext( "ak.merge_union_of_records", - dict(array=array, axis=axis, highlevel=highlevel, behavior=behavior), + {"array": array, "axis": axis, "highlevel": highlevel, "behavior": behavior}, ): return _impl(array, axis, highlevel, behavior) From 0dc232c0047b612baec2fc9a67217eeae24a4642 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 1 Feb 2023 16:43:40 +0000 Subject: [PATCH 04/16] wip: add `ak.merge_option_of_records` --- src/awkward/operations/__init__.py | 1 + .../operations/ak_merge_option_of_records.py | 76 ++++++++ .../operations/ak_merge_union_of_records.py | 183 ++++++++++-------- 3 files changed, 176 insertions(+), 84 deletions(-) create mode 100644 src/awkward/operations/ak_merge_option_of_records.py diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py index 05270d41fc..c6f8a511cc 100644 --- a/src/awkward/operations/__init__.py +++ b/src/awkward/operations/__init__.py @@ -47,6 +47,7 @@ from awkward.operations.ak_mask import mask from awkward.operations.ak_max import max, nanmax from awkward.operations.ak_mean import mean, nanmean +from awkward.operations.ak_merge_option_of_records import merge_option_of_records from awkward.operations.ak_merge_union_of_records import merge_union_of_records from awkward.operations.ak_metadata_from_parquet import metadata_from_parquet from awkward.operations.ak_min import min, nanmin diff --git a/src/awkward/operations/ak_merge_option_of_records.py b/src/awkward/operations/ak_merge_option_of_records.py new file mode 100644 index 0000000000..cfad371006 --- /dev/null +++ b/src/awkward/operations/ak_merge_option_of_records.py @@ -0,0 +1,76 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + + +import awkward as ak +from awkward._nplikes.numpylike import NumpyMetadata + +np = NumpyMetadata.instance() +cpu = ak._backends.NumpyBackend.instance() + + +def merge_option_of_records(array, axis=-1, *, highlevel=True, behavior=None): + """ + Args: + array: Array-like data (anything #ak.to_layout recognizes). + axis (int): The dimension at which this operation is applied. + The outermost dimension is `0`, followed by `1`, etc., and negative + values count backward from the innermost: `-1` is the innermost + dimension, `-2` is the next level up, etc. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.contents.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. + + Simplifies options of records, e.g. + + >>> array = ak.Array([None, {"a": 1}, {"a": 2}]) + + into records of options, i.e. + + >>> ak.merge_option_of_records(array) + + """ + with ak._errors.OperationErrorContext( + "ak.merge_option_of_records", + dict(array=array, axis=axis, highlevel=highlevel, behavior=behavior), + ): + return _impl(array, axis, highlevel, behavior) + + +def _impl(array, axis, highlevel, behavior): + behavior = ak._util.behavior_of(array, behavior=behavior) + layout = ak.to_layout(array, allow_record=False) + + # First, normalise type-invsible "index-of-records" to "record-of-index" + def apply_displace_index(layout, backend, **kwargs): + if (layout.is_indexed and not layout.is_option) and layout.content.is_record: + record = layout.content + + # Transpose index-of-record to record-of-index + return ak.contents.RecordArray( + [layout.copy(content=c) for c in record.contents], + record.fields, + record.length, + backend=backend, + ) + + layout = ak._do.recursively_apply(layout, apply_displace_index) + + def apply(layout, depth, backend, **kwargs): + posaxis = ak._util.maybe_posaxis(layout, axis, depth) + if depth < posaxis + 1 and layout.is_leaf: + raise ak._errors.wrap_error( + np.AxisError(f"axis={axis} exceeds the depth of this array ({depth})") + ) + elif depth == posaxis + 1 and layout.is_option and layout.content.is_record: + record = layout.content + # Transpose option-of-record to record-of-option + return ak.contents.RecordArray( + [layout.copy(content=c) for c in record.contents], + record.fields, + record.length, + backend=backend, + ) + + out = ak._do.recursively_apply(layout, apply) + return ak._util.wrap(out, highlevel=highlevel, behavior=behavior) diff --git a/src/awkward/operations/ak_merge_union_of_records.py b/src/awkward/operations/ak_merge_union_of_records.py index e068bed61d..4181fe506f 100644 --- a/src/awkward/operations/ak_merge_union_of_records.py +++ b/src/awkward/operations/ak_merge_union_of_records.py @@ -46,8 +46,15 @@ def _impl(array, axis, highlevel, behavior): def apply_displace_index(layout, backend, **kwargs): if layout.is_record: return layout - elif (layout.is_option or layout.is_indexed) and layout.content.is_record: - # Transpose option-of-record to record-of-option + elif layout.is_option and layout.content.is_record: + raise ak._errors.wrap_error( + TypeError( + "optional records cannot be merged by this function. First call `ak.merge_option_of_records` " + "to convert these into records of options." + ) + ) + elif layout.is_indexed and layout.content.is_record: + # Transpose index-of-record to record-of-index return ak.contents.RecordArray( [layout.copy(content=c) for c in layout.content.contents], layout.content.fields, @@ -57,95 +64,103 @@ def apply_displace_index(layout, backend, **kwargs): else: raise ak._errors.wrap_error(TypeError(layout)) - def apply(layout, depth, backend, **kwargs): + def apply(layout, depth, backend, continuation, **kwargs): posaxis = ak._util.maybe_posaxis(layout, axis, depth) - if posaxis + 1 == depth and layout.is_union: - # First, find all ordered fields, regularising any index-of-record - # such that we have record-of-index - seen_fields = set() - all_fields = [] - regularised_contents = [] - for content in layout.contents: - # Ensure that we have record-of-index - regularised_content = ak._do.recursively_apply( - content, apply_displace_index - ) - regularised_contents.append(regularised_content) - - # Find new fields - for field in regularised_content.fields: - if field not in seen_fields: - seen_fields.add(field) - all_fields.append(field) - - # Build unions for each field - outer_field_contents = [] - for field in all_fields: - field_tags = backend.index_nplike.asarray(layout.tags, copy=True) - field_index = backend.index_nplike.asarray(layout.index, copy=True) - - # Build contents for union representing current field - field_contents = [ - c.content(field) for c in regularised_contents if c.has_field(field) - ] - - # Find the best location for option type. - # We will potentially have fewer contents in this per-field union - # than the original outer union-of-records, because some recordarrays - # may not have the given field. - tag_for_missing = 0 - for i, content in enumerate(field_contents): - if content.is_option: - tag_for_missing = i - break - - # If at least one recordarray doesn't have this field, we add - # a special option - if len(field_contents) < len(regularised_contents): - # Make the tagged content an option, growing by one to ensure we - # have a known `None` value to index into - tagged_content = field_contents[tag_for_missing] - indexedoption_index = backend.index_nplike.arange( - tagged_content.length + 1, dtype=np.int64 - ) - indexedoption_index[tagged_content.length] = -1 - field_contents[ - tag_for_missing - ] = ak.contents.IndexedOptionArray.simplified( - ak.index.Index64(indexedoption_index), tagged_content + if posaxis + 1 == depth: + if layout.is_union: + # First, find all ordered fields, regularising any index-of-record + # such that we have record-of-index + seen_fields = set() + all_fields = [] + regularised_contents = [] + for content in layout.contents: + # Ensure that we have record-of-index + regularised_content = ak._do.recursively_apply( + content, apply_displace_index ) - - # Now build contents for union, by looping over outermost index - # Overwrite tags to adjust for new contents length - # and use the tagged content for any missing values - k = 0 - for j, content in enumerate(regularised_contents): - tag_is_j = field_tags == j - - if content.has_field(field): - # Rewrite tags to account for missing fields - field_tags[tag_is_j] = k - k += 1 - - else: - # Rewrite tags to point to option content - field_tags[tag_is_j] = tag_for_missing - # Point each value to missing value - field_index[tag_is_j] = ( - field_contents[tag_for_missing].length - 1 + regularised_contents.append(regularised_content) + + # Find new fields + for field in regularised_content.fields: + if field not in seen_fields: + seen_fields.add(field) + all_fields.append(field) + + # Build unions for each field + outer_field_contents = [] + for field in all_fields: + field_tags = backend.index_nplike.asarray(layout.tags, copy=True) + field_index = backend.index_nplike.asarray(layout.index, copy=True) + + # Build contents for union representing current field + field_contents = [ + c.content(field) + for c in regularised_contents + if c.has_field(field) + ] + + # Find the best location for option type. + # We will potentially have fewer contents in this per-field union + # than the original outer union-of-records, because some recordarrays + # may not have the given field. + tag_for_missing = 0 + for i, content in enumerate(field_contents): + if content.is_option: + tag_for_missing = i + break + + # If at least one recordarray doesn't have this field, we add + # a special option + if len(field_contents) < len(regularised_contents): + # Make the tagged content an option, growing by one to ensure we + # have a known `None` value to index into + tagged_content = field_contents[tag_for_missing] + indexedoption_index = backend.index_nplike.arange( + tagged_content.length + 1, dtype=np.int64 + ) + indexedoption_index[tagged_content.length] = -1 + field_contents[ + tag_for_missing + ] = ak.contents.IndexedOptionArray.simplified( + ak.index.Index64(indexedoption_index), tagged_content ) - outer_field_contents.append( - ak.contents.UnionArray.simplified( - ak.index.Index8(field_tags), - ak.index.Index64(field_index), - field_contents, + # Now build contents for union, by looping over outermost index + # Overwrite tags to adjust for new contents length + # and use the tagged content for any missing values + k = 0 + for j, content in enumerate(regularised_contents): + tag_is_j = field_tags == j + + if content.has_field(field): + # Rewrite tags to account for missing fields + field_tags[tag_is_j] = k + k += 1 + + else: + # Rewrite tags to point to option content + field_tags[tag_is_j] = tag_for_missing + # Point each value to missing value + field_index[tag_is_j] = ( + field_contents[tag_for_missing].length - 1 + ) + + outer_field_contents.append( + ak.contents.UnionArray.simplified( + ak.index.Index8(field_tags), + ak.index.Index64(field_index), + field_contents, + ) ) + return ak.contents.RecordArray( + outer_field_contents, all_fields, backend=backend ) - return ak.contents.RecordArray( - outer_field_contents, all_fields, backend=backend - ) + elif layout.is_option or layout.is_indexed or layout.is_record: + return continuation() + else: + return layout + # Can only hit this branch if we're above the action axis elif layout.is_leaf: raise ak._errors.wrap_error( np.AxisError(f"axis={axis} exceeds the depth of this array ({depth})") From d1c0420919efc7f66e707e22aa7bbfe8a19c4dcf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 Feb 2023 16:45:49 +0000 Subject: [PATCH 05/16] style: pre-commit fixes --- src/awkward/operations/ak_merge_option_of_records.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/awkward/operations/ak_merge_option_of_records.py b/src/awkward/operations/ak_merge_option_of_records.py index cfad371006..46a35d90a1 100644 --- a/src/awkward/operations/ak_merge_option_of_records.py +++ b/src/awkward/operations/ak_merge_option_of_records.py @@ -32,7 +32,7 @@ def merge_option_of_records(array, axis=-1, *, highlevel=True, behavior=None): """ with ak._errors.OperationErrorContext( "ak.merge_option_of_records", - dict(array=array, axis=axis, highlevel=highlevel, behavior=behavior), + {"array": array, "axis": axis, "highlevel": highlevel, "behavior": behavior}, ): return _impl(array, axis, highlevel, behavior) From 22c01cd327f9cc056ef53f63eea121ab63396af9 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 1 Feb 2023 17:32:06 +0000 Subject: [PATCH 06/16] fix: handle `copy` simplification --- .../operations/ak_merge_option_of_records.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/awkward/operations/ak_merge_option_of_records.py b/src/awkward/operations/ak_merge_option_of_records.py index cfad371006..69933f53a5 100644 --- a/src/awkward/operations/ak_merge_option_of_records.py +++ b/src/awkward/operations/ak_merge_option_of_records.py @@ -48,7 +48,12 @@ def apply_displace_index(layout, backend, **kwargs): # Transpose index-of-record to record-of-index return ak.contents.RecordArray( - [layout.copy(content=c) for c in record.contents], + [ + ak.contents.IndexedArray.simplified( + layout.index, c, parameters=layout._parameters + ) + for c in record.contents + ], record.fields, record.length, backend=backend, @@ -63,10 +68,17 @@ def apply(layout, depth, backend, **kwargs): np.AxisError(f"axis={axis} exceeds the depth of this array ({depth})") ) elif depth == posaxis + 1 and layout.is_option and layout.content.is_record: + layout = layout.to_IndexedOptionArray64() + record = layout.content # Transpose option-of-record to record-of-option return ak.contents.RecordArray( - [layout.copy(content=c) for c in record.contents], + [ + ak.contents.IndexedOptionArray.simplified( + layout.index, c, parameters=layout._parameters + ) + for c in record.contents + ], record.fields, record.length, backend=backend, From be0c80de5af1b5b2e4ff3d690cfce1f59cb5af39 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 1 Feb 2023 17:32:09 +0000 Subject: [PATCH 07/16] fix: handle `copy` simplification --- .../operations/ak_merge_union_of_records.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/awkward/operations/ak_merge_union_of_records.py b/src/awkward/operations/ak_merge_union_of_records.py index 6003b2af4c..9a9277ccd3 100644 --- a/src/awkward/operations/ak_merge_union_of_records.py +++ b/src/awkward/operations/ak_merge_union_of_records.py @@ -54,20 +54,30 @@ def apply_displace_index(layout, backend, **kwargs): ) ) elif layout.is_indexed and layout.content.is_record: + record = layout.content # Transpose index-of-record to record-of-index return ak.contents.RecordArray( - [layout.copy(content=c) for c in layout.content.contents], - layout.content.fields, - layout.content.length, + [ + ak.contents.IndexedArray.simplified( + layout.index, c, parameters=layout._parameters + ) + for c in record.contents + ], + record.fields, + record.length, backend=backend, ) else: raise ak._errors.wrap_error(TypeError(layout)) - def apply(layout, depth, backend, continuation, **kwargs): + def apply(layout, depth, backend, **kwargs): posaxis = ak._util.maybe_posaxis(layout, axis, depth) - if posaxis + 1 == depth: - if layout.is_union: + if depth < posaxis + 1 and layout.is_leaf: + raise ak._errors.wrap_error( + np.AxisError(f"axis={axis} exceeds the depth of this array ({depth})") + ) + elif depth == posaxis + 1 and layout.is_union: + if all(x.is_record for x in layout.contents): # First, find all ordered fields, regularising any index-of-record # such that we have record-of-index seen_fields = set() @@ -155,16 +165,6 @@ def apply(layout, depth, backend, continuation, **kwargs): return ak.contents.RecordArray( outer_field_contents, all_fields, backend=backend ) - elif layout.is_option or layout.is_indexed or layout.is_record: - return continuation() - else: - return layout - - # Can only hit this branch if we're above the action axis - elif layout.is_leaf: - raise ak._errors.wrap_error( - np.AxisError(f"axis={axis} exceeds the depth of this array ({depth})") - ) out = ak._do.recursively_apply(layout, apply) return ak._util.wrap(out, highlevel=highlevel, behavior=behavior) From 11d64d3e77ed5b3031978bbe1e620bf506fb5fee Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 1 Feb 2023 14:05:43 -0600 Subject: [PATCH 08/16] Added some tests. --- tests/test_2185_merge_union_of_records.py | 61 +++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tests/test_2185_merge_union_of_records.py diff --git a/tests/test_2185_merge_union_of_records.py b/tests/test_2185_merge_union_of_records.py new file mode 100644 index 0000000000..81eab856a0 --- /dev/null +++ b/tests/test_2185_merge_union_of_records.py @@ -0,0 +1,61 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import numpy as np # noqa: F401 + +import awkward as ak + + +def test_merge_union_of_records(): + a1 = ak.Array([{"a": 1, "b": 2}]) + a2 = ak.Array([{"b": 3.3, "c": 4.4}]) + c = ak.concatenate((a1, a2)) + + assert c.tolist() == [{"a": 1, "b": 2}, {"b": 3.3, "c": 4.4}] + + assert str(c.type) == "2 * union[{a: int64, b: int64}, {b: float64, c: float64}]" + + d = ak.merge_union_of_records(c) + + assert d.tolist() == [{"a": 1, "b": 2, "c": None}, {"a": None, "b": 3.3, "c": 4.4}] + + assert str(d.type) == "2 * {a: ?int64, b: float64, c: ?float64}" + + +def test_merge_union_of_records_2(): + a1 = ak.Array([{"a": 1, "b": 2}]) + a2 = ak.Array([{"b": 3.3, "c": 4.4}, {"b": None, "c": None}]) + c = ak.concatenate((a1, a2)) + + assert c.tolist() == [{"a": 1, "b": 2}, {"b": 3.3, "c": 4.4}, {"b": None, "c": None}] + + assert str(c.type) == "3 * union[{a: int64, b: int64}, {b: ?float64, c: ?float64}]" + + d = ak.merge_union_of_records(c) + + assert d.tolist() == [{"a": 1, "b": 2, "c": None}, {"a": None, "b": 3.3, "c": 4.4}, {"a": None, "b": None, "c": None}] + + assert str(d.type) == "3 * {a: ?int64, b: ?float64, c: ?float64}" + + +def test_merge_option_of_records(): + a = ak.Array([None, {"a": 1, "b": 2}]) + + assert str(a.type) == "2 * ?{a: int64, b: int64}" + + b = ak.merge_option_of_records(a) + + assert b.tolist() == [{"a": None, "b": None}, {"a": 1, "b": 2}] + + assert str(b.type) == "2 * {a: ?int64, b: ?int64}" + + +def test_merge_option_of_records_2(): + a = ak.Array([None, {"a": 1, "b": 2}, {"a": None, "b": None}]) + + assert str(a.type) == "3 * ?{a: ?int64, b: ?int64}" + + b = ak.merge_option_of_records(a) + + assert b.tolist() == [{"a": None, "b": None}, {"a": 1, "b": 2}, {"a": None, "b": None}] + + assert str(b.type) == "3 * {a: ?int64, b: ?int64}" From 6c2fd8a17e69272089b830d621bd0c8754163bf1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 Feb 2023 20:06:31 +0000 Subject: [PATCH 09/16] style: pre-commit fixes --- tests/test_2185_merge_union_of_records.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/test_2185_merge_union_of_records.py b/tests/test_2185_merge_union_of_records.py index 81eab856a0..88bb22753e 100644 --- a/tests/test_2185_merge_union_of_records.py +++ b/tests/test_2185_merge_union_of_records.py @@ -26,13 +26,21 @@ def test_merge_union_of_records_2(): a2 = ak.Array([{"b": 3.3, "c": 4.4}, {"b": None, "c": None}]) c = ak.concatenate((a1, a2)) - assert c.tolist() == [{"a": 1, "b": 2}, {"b": 3.3, "c": 4.4}, {"b": None, "c": None}] + assert c.tolist() == [ + {"a": 1, "b": 2}, + {"b": 3.3, "c": 4.4}, + {"b": None, "c": None}, + ] assert str(c.type) == "3 * union[{a: int64, b: int64}, {b: ?float64, c: ?float64}]" d = ak.merge_union_of_records(c) - assert d.tolist() == [{"a": 1, "b": 2, "c": None}, {"a": None, "b": 3.3, "c": 4.4}, {"a": None, "b": None, "c": None}] + assert d.tolist() == [ + {"a": 1, "b": 2, "c": None}, + {"a": None, "b": 3.3, "c": 4.4}, + {"a": None, "b": None, "c": None}, + ] assert str(d.type) == "3 * {a: ?int64, b: ?float64, c: ?float64}" @@ -56,6 +64,10 @@ def test_merge_option_of_records_2(): b = ak.merge_option_of_records(a) - assert b.tolist() == [{"a": None, "b": None}, {"a": 1, "b": 2}, {"a": None, "b": None}] + assert b.tolist() == [ + {"a": None, "b": None}, + {"a": 1, "b": 2}, + {"a": None, "b": None}, + ] assert str(b.type) == "3 * {a: ?int64, b: ?int64}" From ea8018a3b14da598afdec907a108e62510db7ada Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 1 Feb 2023 14:16:36 -0600 Subject: [PATCH 10/16] Deep tests as well. --- tests/test_2185_merge_union_of_records.py | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/test_2185_merge_union_of_records.py b/tests/test_2185_merge_union_of_records.py index 88bb22753e..f4a7522190 100644 --- a/tests/test_2185_merge_union_of_records.py +++ b/tests/test_2185_merge_union_of_records.py @@ -45,6 +45,22 @@ def test_merge_union_of_records_2(): assert str(d.type) == "3 * {a: ?int64, b: ?float64, c: ?float64}" +def test_merge_union_of_records_3(): + a1 = ak.Array([[[[{"a": 1, "b": 2}]]]]) + a2 = ak.Array([[[[{"b": 3.3, "c": 4.4}]]]]) + c = ak.concatenate((a1, a2), axis=-1) + + assert c.tolist() == [[[[{"a": 1, "b": 2}, {"b": 3.3, "c": 4.4}]]]] + + assert str(c.type) == "1 * var * var * var * union[{a: int64, b: int64}, {b: float64, c: float64}]" + + d = ak.merge_union_of_records(c, axis=-1) + + assert d.tolist() == [[[[{"a": 1, "b": 2, "c": None}, {"a": None, "b": 3.3, "c": 4.4}]]]] + + assert str(d.type) == "1 * var * var * var * {a: ?int64, b: float64, c: ?float64}" + + def test_merge_option_of_records(): a = ak.Array([None, {"a": 1, "b": 2}]) @@ -71,3 +87,15 @@ def test_merge_option_of_records_2(): ] assert str(b.type) == "3 * {a: ?int64, b: ?int64}" + + +def test_merge_option_of_records(): + a = ak.Array([[[[None, {"a": 1, "b": 2}]]]]) + + assert str(a.type) == "1 * var * var * var * ?{a: int64, b: int64}" + + b = ak.merge_option_of_records(a, axis=-1) + + assert b.tolist() == [[[[{"a": None, "b": None}, {"a": 1, "b": 2}]]]] + + assert str(b.type) == "1 * var * var * var * {a: ?int64, b: ?int64}" From e1cb6c875ead3246ff249e39193b61260019a023 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 1 Feb 2023 14:17:03 -0600 Subject: [PATCH 11/16] Passing pre-commit. --- tests/test_2185_merge_union_of_records.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/test_2185_merge_union_of_records.py b/tests/test_2185_merge_union_of_records.py index f4a7522190..6c7d862616 100644 --- a/tests/test_2185_merge_union_of_records.py +++ b/tests/test_2185_merge_union_of_records.py @@ -52,11 +52,16 @@ def test_merge_union_of_records_3(): assert c.tolist() == [[[[{"a": 1, "b": 2}, {"b": 3.3, "c": 4.4}]]]] - assert str(c.type) == "1 * var * var * var * union[{a: int64, b: int64}, {b: float64, c: float64}]" + assert ( + str(c.type) + == "1 * var * var * var * union[{a: int64, b: int64}, {b: float64, c: float64}]" + ) d = ak.merge_union_of_records(c, axis=-1) - assert d.tolist() == [[[[{"a": 1, "b": 2, "c": None}, {"a": None, "b": 3.3, "c": 4.4}]]]] + assert d.tolist() == [ + [[[{"a": 1, "b": 2, "c": None}, {"a": None, "b": 3.3, "c": 4.4}]]] + ] assert str(d.type) == "1 * var * var * var * {a: ?int64, b: float64, c: ?float64}" @@ -89,7 +94,7 @@ def test_merge_option_of_records_2(): assert str(b.type) == "3 * {a: ?int64, b: ?int64}" -def test_merge_option_of_records(): +def test_merge_option_of_records_3(): a = ak.Array([[[[None, {"a": 1, "b": 2}]]]]) assert str(a.type) == "1 * var * var * var * ?{a: int64, b: int64}" From 41c98d203be749f195b0df33da00b9ff0c50f55d Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 1 Feb 2023 20:27:18 +0000 Subject: [PATCH 12/16] fix: use layout length, not content --- src/awkward/operations/ak_merge_option_of_records.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/awkward/operations/ak_merge_option_of_records.py b/src/awkward/operations/ak_merge_option_of_records.py index 7cc2c7b21c..37e050104f 100644 --- a/src/awkward/operations/ak_merge_option_of_records.py +++ b/src/awkward/operations/ak_merge_option_of_records.py @@ -55,7 +55,7 @@ def apply_displace_index(layout, backend, **kwargs): for c in record.contents ], record.fields, - record.length, + layout.length, backend=backend, ) @@ -80,7 +80,7 @@ def apply(layout, depth, backend, **kwargs): for c in record.contents ], record.fields, - record.length, + layout.length, backend=backend, ) From 30aed4fa5cd4bb0eed98692452e2a8a7ee2ce535 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 1 Feb 2023 23:55:18 +0000 Subject: [PATCH 13/16] Update src/awkward/operations/ak_merge_option_of_records.py --- src/awkward/operations/ak_merge_option_of_records.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/awkward/operations/ak_merge_option_of_records.py b/src/awkward/operations/ak_merge_option_of_records.py index 37e050104f..aea1cd6de9 100644 --- a/src/awkward/operations/ak_merge_option_of_records.py +++ b/src/awkward/operations/ak_merge_option_of_records.py @@ -28,7 +28,7 @@ def merge_option_of_records(array, axis=-1, *, highlevel=True, behavior=None): into records of options, i.e. >>> ak.merge_option_of_records(array) - + """ with ak._errors.OperationErrorContext( "ak.merge_option_of_records", From 282722c76ac3893a6500d21a350918df78e46eab Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 1 Feb 2023 23:59:36 +0000 Subject: [PATCH 14/16] docs: refer to new functions --- docs/reference/toctree.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/reference/toctree.txt b/docs/reference/toctree.txt index 40dda46513..d36cbac1b9 100644 --- a/docs/reference/toctree.txt +++ b/docs/reference/toctree.txt @@ -165,6 +165,12 @@ generated/ak.local_index generated/ak.run_lengths + +.. toctree:: + :caption: Restructuring records + + generated/ak.merge_union_of_records + generated/ak.merge_option_of_records .. toctree:: :caption: Copying and packing arrays From 199118e539368e07eab40d32bad3142e78cac9ac Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 1 Feb 2023 23:59:48 +0000 Subject: [PATCH 15/16] docs: `to_packed` in reference --- docs/reference/toctree.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/toctree.txt b/docs/reference/toctree.txt index d36cbac1b9..66723257f3 100644 --- a/docs/reference/toctree.txt +++ b/docs/reference/toctree.txt @@ -176,7 +176,7 @@ :caption: Copying and packing arrays generated/ak.copy - generated/ak.packed + generated/ak.to_packed .. toctree:: :caption: Extracting metadata From f3ff3b35a2f4205e0d91ee15be462fff32dac3f5 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 2 Feb 2023 00:19:08 +0000 Subject: [PATCH 16/16] docs: simplify example --- src/awkward/operations/ak_merge_union_of_records.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/awkward/operations/ak_merge_union_of_records.py b/src/awkward/operations/ak_merge_union_of_records.py index 9a9277ccd3..c389dccb83 100644 --- a/src/awkward/operations/ak_merge_union_of_records.py +++ b/src/awkward/operations/ak_merge_union_of_records.py @@ -23,9 +23,7 @@ def merge_union_of_records(array, axis=-1, *, highlevel=True, behavior=None): Simplifies unions of records, e.g. - >>> array = ak.concatenate(([{"a": 1}], [{"b": 2}])) - >>> array - + >>> array = ak.Array([{"a": 1}, {"b": 2}]) into records of options, i.e.