From e394623bedbb96df25577913ac6839282ae1ef4e Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Wed, 25 Oct 2023 13:11:24 +0200 Subject: [PATCH 01/10] fix: handle unhashable behaviour type --- src/awkward/_connect/numba/builder.py | 2 +- src/awkward/_connect/numba/layoutbuilder.py | 10 +++++++ src/awkward/highlevel.py | 2 +- tests/test_2408_layoutbuilder_in_numba.py | 4 ++- ...ize_and_deserialize_behaviour_for_numba.py | 27 +++++++++++++++++++ 5 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 tests/test_2763_serialize_and_deserialize_behaviour_for_numba.py diff --git a/src/awkward/_connect/numba/builder.py b/src/awkward/_connect/numba/builder.py index a0a668ef19..e15705cc31 100644 --- a/src/awkward/_connect/numba/builder.py +++ b/src/awkward/_connect/numba/builder.py @@ -29,7 +29,7 @@ def __init__(self, behavior): ak._connect.numba.arrayview.repr_behavior(behavior) ) ) - self.behavior = behavior + self.behavior = tuple(behavior.items()) @numba.extending.register_model(ArrayBuilderType) diff --git a/src/awkward/_connect/numba/layoutbuilder.py b/src/awkward/_connect/numba/layoutbuilder.py index 6dbe6070b0..0ae772c911 100644 --- a/src/awkward/_connect/numba/layoutbuilder.py +++ b/src/awkward/_connect/numba/layoutbuilder.py @@ -302,6 +302,16 @@ def getter(builder): return getter +@numba.extending.overload_method(EmptyType, "append") +def Empty_append(builder, datum): + if isinstance(builder, EmptyType): + + def append(builder, datum): + raise NumbaTypeError("Empty cannot append data") + + return append + + ########## ListOffset ######################################################### diff --git a/src/awkward/highlevel.py b/src/awkward/highlevel.py index e455b49289..ba6734a88c 100644 --- a/src/awkward/highlevel.py +++ b/src/awkward/highlevel.py @@ -2373,7 +2373,7 @@ def _wrap(cls, layout, behavior=None): assert isinstance(layout, _ext.ArrayBuilder) out = cls.__new__(cls) out._layout = layout - out._behavior = behavior + out._behavior = behavior if isinstance(behavior, dict) else dict(behavior) return out @property diff --git a/tests/test_2408_layoutbuilder_in_numba.py b/tests/test_2408_layoutbuilder_in_numba.py index fb336aa422..f412e7408b 100644 --- a/tests/test_2408_layoutbuilder_in_numba.py +++ b/tests/test_2408_layoutbuilder_in_numba.py @@ -7,6 +7,8 @@ numba = pytest.importorskip("numba") +from numba.core.errors import NumbaTypeError # noqa: E402 + import awkward.numba.layoutbuilder as lb # noqa: E402 ak.numba.register_and_check() @@ -593,7 +595,7 @@ def f2(x): builder = lb.Empty() # Unknown attribute 'append' of type lb.Empty - with pytest.raises(numba.core.errors.TypingError): + with pytest.raises(NumbaTypeError): f2(builder) diff --git a/tests/test_2763_serialize_and_deserialize_behaviour_for_numba.py b/tests/test_2763_serialize_and_deserialize_behaviour_for_numba.py new file mode 100644 index 0000000000..d1deef239d --- /dev/null +++ b/tests/test_2763_serialize_and_deserialize_behaviour_for_numba.py @@ -0,0 +1,27 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import pytest + +import awkward as ak + +numba = pytest.importorskip("numba") + + +def test_ArrayBuilder_inNumba(): + SOME_ATTRS = {"FOO": "BAR"} + builder = ak.ArrayBuilder(behavior=SOME_ATTRS) + + @numba.njit + def func(array): + return array + + assert builder.behavior is SOME_ATTRS + # In Python, when we create a dictionary literal like {'FOO': 'BAR'}, it + # creates a new dictionary object. If we serialize this dictionary to + # a JSON string, or to a tuple and then deserialize it, we get a new dictionary + # object that is structurally identical to the original one, but it is not + # the same object in terms of identity. + + # To check if two dictionaries are equal in terms of their contents, + # we should use the == operator instead of is. + assert func(builder).behavior == SOME_ATTRS From a6650d6d339af786ebe71221557239ea1b462a16 Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Wed, 25 Oct 2023 13:46:59 +0200 Subject: [PATCH 02/10] fix: add checks for None --- src/awkward/_connect/numba/builder.py | 2 +- src/awkward/highlevel.py | 6 ++++- ...ize_and_deserialize_behaviour_for_numba.py | 27 ------------------- 3 files changed, 6 insertions(+), 29 deletions(-) delete mode 100644 tests/test_2763_serialize_and_deserialize_behaviour_for_numba.py diff --git a/src/awkward/_connect/numba/builder.py b/src/awkward/_connect/numba/builder.py index e15705cc31..b47a656973 100644 --- a/src/awkward/_connect/numba/builder.py +++ b/src/awkward/_connect/numba/builder.py @@ -29,7 +29,7 @@ def __init__(self, behavior): ak._connect.numba.arrayview.repr_behavior(behavior) ) ) - self.behavior = tuple(behavior.items()) + self.behavior = behavior if behavior is None else tuple(behavior.items()) @numba.extending.register_model(ArrayBuilderType) diff --git a/src/awkward/highlevel.py b/src/awkward/highlevel.py index ba6734a88c..00e8523943 100644 --- a/src/awkward/highlevel.py +++ b/src/awkward/highlevel.py @@ -2373,7 +2373,11 @@ def _wrap(cls, layout, behavior=None): assert isinstance(layout, _ext.ArrayBuilder) out = cls.__new__(cls) out._layout = layout - out._behavior = behavior if isinstance(behavior, dict) else dict(behavior) + out._behavior = ( + behavior + if behavior is None or isinstance(behavior, dict) + else dict(behavior) + ) return out @property diff --git a/tests/test_2763_serialize_and_deserialize_behaviour_for_numba.py b/tests/test_2763_serialize_and_deserialize_behaviour_for_numba.py deleted file mode 100644 index d1deef239d..0000000000 --- a/tests/test_2763_serialize_and_deserialize_behaviour_for_numba.py +++ /dev/null @@ -1,27 +0,0 @@ -# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE - -import pytest - -import awkward as ak - -numba = pytest.importorskip("numba") - - -def test_ArrayBuilder_inNumba(): - SOME_ATTRS = {"FOO": "BAR"} - builder = ak.ArrayBuilder(behavior=SOME_ATTRS) - - @numba.njit - def func(array): - return array - - assert builder.behavior is SOME_ATTRS - # In Python, when we create a dictionary literal like {'FOO': 'BAR'}, it - # creates a new dictionary object. If we serialize this dictionary to - # a JSON string, or to a tuple and then deserialize it, we get a new dictionary - # object that is structurally identical to the original one, but it is not - # the same object in terms of identity. - - # To check if two dictionaries are equal in terms of their contents, - # we should use the == operator instead of is. - assert func(builder).behavior == SOME_ATTRS From 01bd008de49786c07bb16215a9498ffdbf20e6ba Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Thu, 26 Oct 2023 09:10:55 +0200 Subject: [PATCH 03/10] fix: use arrayview functions - Jim's suggestion --- src/awkward/_connect/numba/builder.py | 14 +++++++++++--- src/awkward/highlevel.py | 6 +----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/awkward/_connect/numba/builder.py b/src/awkward/_connect/numba/builder.py index b47a656973..2e1d998f17 100644 --- a/src/awkward/_connect/numba/builder.py +++ b/src/awkward/_connect/numba/builder.py @@ -29,7 +29,7 @@ def __init__(self, behavior): ak._connect.numba.arrayview.repr_behavior(behavior) ) ) - self.behavior = behavior if behavior is None else tuple(behavior.items()) + self.behavior = behavior # if behavior is None else tuple(behavior.items()) @numba.extending.register_model(ArrayBuilderType) @@ -74,8 +74,16 @@ def box_ArrayBuilder(arraybuildertype, arraybuilderval, c): ArrayBuilder_obj = c.pyapi.unserialize( c.pyapi.serialize_object(ak.highlevel.ArrayBuilder) ) - behavior_obj = c.pyapi.unserialize( - c.pyapi.serialize_object(arraybuildertype.behavior) + serializable2dict_obj = c.pyapi.unserialize( + c.pyapi.serialize_object(ak._connect.numba.arrayview.serializable2dict) + ) + behavior2_obj = c.pyapi.unserialize( + c.pyapi.serialize_object( + ak._connect.numba.arrayview.dict2serializable(arraybuildertype.behavior) + ) + ) + behavior_obj = c.pyapi.call_function_objargs( + serializable2dict_obj, (behavior2_obj,) ) proxyin = c.context.make_helper(c.builder, arraybuildertype, arraybuilderval) diff --git a/src/awkward/highlevel.py b/src/awkward/highlevel.py index 00e8523943..340bdf70f1 100644 --- a/src/awkward/highlevel.py +++ b/src/awkward/highlevel.py @@ -2373,11 +2373,7 @@ def _wrap(cls, layout, behavior=None): assert isinstance(layout, _ext.ArrayBuilder) out = cls.__new__(cls) out._layout = layout - out._behavior = ( - behavior - if behavior is None or isinstance(behavior, dict) - else dict(behavior) - ) + out._behavior = behavior # ( return out @property From 625b2d326d1505ddd01cede2996b960724246778 Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Thu, 26 Oct 2023 09:13:57 +0200 Subject: [PATCH 04/10] fix: cleanup --- src/awkward/_connect/numba/builder.py | 2 +- src/awkward/highlevel.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/awkward/_connect/numba/builder.py b/src/awkward/_connect/numba/builder.py index 2e1d998f17..a7eafcd463 100644 --- a/src/awkward/_connect/numba/builder.py +++ b/src/awkward/_connect/numba/builder.py @@ -29,7 +29,7 @@ def __init__(self, behavior): ak._connect.numba.arrayview.repr_behavior(behavior) ) ) - self.behavior = behavior # if behavior is None else tuple(behavior.items()) + self.behavior = behavior @numba.extending.register_model(ArrayBuilderType) diff --git a/src/awkward/highlevel.py b/src/awkward/highlevel.py index 340bdf70f1..e455b49289 100644 --- a/src/awkward/highlevel.py +++ b/src/awkward/highlevel.py @@ -2373,7 +2373,7 @@ def _wrap(cls, layout, behavior=None): assert isinstance(layout, _ext.ArrayBuilder) out = cls.__new__(cls) out._layout = layout - out._behavior = behavior # ( + out._behavior = behavior return out @property From 94e1427d24e7336589b785bb46f6d48429ea3830 Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Thu, 26 Oct 2023 09:15:14 +0200 Subject: [PATCH 05/10] test: add a test for a non-pickleable behaviour --- ...ize_and_deserialize_behaviour_for_numba.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py diff --git a/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py new file mode 100644 index 0000000000..d1deef239d --- /dev/null +++ b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py @@ -0,0 +1,27 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import pytest + +import awkward as ak + +numba = pytest.importorskip("numba") + + +def test_ArrayBuilder_inNumba(): + SOME_ATTRS = {"FOO": "BAR"} + builder = ak.ArrayBuilder(behavior=SOME_ATTRS) + + @numba.njit + def func(array): + return array + + assert builder.behavior is SOME_ATTRS + # In Python, when we create a dictionary literal like {'FOO': 'BAR'}, it + # creates a new dictionary object. If we serialize this dictionary to + # a JSON string, or to a tuple and then deserialize it, we get a new dictionary + # object that is structurally identical to the original one, but it is not + # the same object in terms of identity. + + # To check if two dictionaries are equal in terms of their contents, + # we should use the == operator instead of is. + assert func(builder).behavior == SOME_ATTRS From 4f9eeba703136d23a319a1930631486c15391c36 Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Thu, 26 Oct 2023 09:18:12 +0200 Subject: [PATCH 06/10] test: rename the test --- ...ize_and_deserialize_behaviour_for_numba.py | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py index d1deef239d..e6381db80a 100644 --- a/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py +++ b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py @@ -7,7 +7,7 @@ numba = pytest.importorskip("numba") -def test_ArrayBuilder_inNumba(): +def test_ArrayBuilder_behavior(): SOME_ATTRS = {"FOO": "BAR"} builder = ak.ArrayBuilder(behavior=SOME_ATTRS) @@ -16,12 +16,29 @@ def func(array): return array assert builder.behavior is SOME_ATTRS - # In Python, when we create a dictionary literal like {'FOO': 'BAR'}, it - # creates a new dictionary object. If we serialize this dictionary to - # a JSON string, or to a tuple and then deserialize it, we get a new dictionary - # object that is structurally identical to the original one, but it is not - # the same object in terms of identity. - - # To check if two dictionaries are equal in terms of their contents, - # we should use the == operator instead of is. - assert func(builder).behavior == SOME_ATTRS + assert func(builder).behavior is SOME_ATTRS + + def make_add_xyr(): + def add_xyr(left, right): + x = left.x + right.x + y = left.y + right.y + return ak.zip( + { + "x": x, + "y": y, + "r": np.sqrt(x**2 + y**2), + }, + with_name="xyr", + ) + + return add_xyr + + + behavior = {(np.add, "xyr", "xyr"): make_add_xyr()} + + + a = ak.Array([{"x": 3, "y": 4, "r": 5}], with_name="xyr", behavior=behavior) + b = ak.Array([{"x": 3, "y": 4, "r": 5}], with_name="xyr", behavior=behavior) + + builder = ak.ArrayBuilder(behavior=behavior) + assert func(builder).behavior is behavior From fc1c306d5cdc09e872e30bc29fc0f5d8430a164b Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Thu, 26 Oct 2023 09:21:15 +0200 Subject: [PATCH 07/10] fix: clenup --- ...st_2770_serialize_and_deserialize_behaviour_for_numba.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py index e6381db80a..6673935111 100644 --- a/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py +++ b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py @@ -1,5 +1,6 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE +import numpy as np import pytest import awkward as ak @@ -33,12 +34,7 @@ def add_xyr(left, right): return add_xyr - behavior = {(np.add, "xyr", "xyr"): make_add_xyr()} - - a = ak.Array([{"x": 3, "y": 4, "r": 5}], with_name="xyr", behavior=behavior) - b = ak.Array([{"x": 3, "y": 4, "r": 5}], with_name="xyr", behavior=behavior) - builder = ak.ArrayBuilder(behavior=behavior) assert func(builder).behavior is behavior From 8b28efee0610b25181f8c4ad0c5237d5d3fb384b Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Thu, 26 Oct 2023 10:23:09 +0200 Subject: [PATCH 08/10] test: compare behaviours --- ...ize_and_deserialize_behaviour_for_numba.py | 51 ++++++++++++++++--- 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py index 6673935111..956c048622 100644 --- a/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py +++ b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py @@ -8,17 +8,20 @@ numba = pytest.importorskip("numba") +@numba.njit +def func(array): + return array + + def test_ArrayBuilder_behavior(): SOME_ATTRS = {"FOO": "BAR"} builder = ak.ArrayBuilder(behavior=SOME_ATTRS) - @numba.njit - def func(array): - return array - assert builder.behavior is SOME_ATTRS - assert func(builder).behavior is SOME_ATTRS + assert func(builder).behavior == SOME_ATTRS + +def test_ArrayBuilder_non_picklable_behavior(): def make_add_xyr(): def add_xyr(left, right): x = left.x + right.x @@ -35,6 +38,40 @@ def add_xyr(left, right): return add_xyr behavior = {(np.add, "xyr", "xyr"): make_add_xyr()} - builder = ak.ArrayBuilder(behavior=behavior) - assert func(builder).behavior is behavior + behavior_out = func(builder).behavior + + # Compare the dictionaries themselves + # Note: 'behavior_out' is not 'behavior' + if behavior_out == behavior: + print("behavior_out is behavior") + else: + print("behavior_out is not behavior") + + # Define ufuncs + ufunc_add = np.add + + # Compare the identity of the ufunc within the dictionaries + # Note: 'ufunc_behavior_out[0]' is 'ufunc_add' + ufunc_behavior_out = next(iter(behavior_out)) + ufunc_behavior = next(iter(behavior)) + + if ufunc_behavior_out[0] is ufunc_add: + print("ufunc_behavior_out[0] is ufunc_add") + else: + print("ufunc_behavior_out[0] is not ufunc_add") + + if ufunc_behavior[0] is ufunc_add: + print("ufunc_behavior[0] is ufunc_add") + else: + print("ufunc_behavior[0] is not ufunc_add") + + # Compare the unique identifiers of the lambda functions + # Note: Lambda functions have different identities + lambda_behavior_out = next(iter(behavior_out.values())) + lambda_behavior = next(iter(behavior.values())) + + if id(lambda_behavior_out) == id(lambda_behavior): + print("Lambda functions have the same identity") + else: + print("Lambda functions have different identities") From a6dbb356761b1a2c3c2cb3edd380cdd10d0e7948 Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Thu, 26 Oct 2023 10:31:27 +0200 Subject: [PATCH 09/10] test: add more checks --- ...ize_and_deserialize_behaviour_for_numba.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py index 956c048622..5b5a407c3b 100644 --- a/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py +++ b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py @@ -75,3 +75,23 @@ def add_xyr(left, right): print("Lambda functions have the same identity") else: print("Lambda functions have different identities") + + @numba.njit + def make_ab(builder): + builder.begin_record("xyz") + builder.field("x").integer(3) + builder.field("y").integer(4) + builder.field("z").integer(3) + builder.end_record() + + builder.begin_record("xyz") + builder.field("x").integer(3) + builder.field("y").integer(4) + builder.field("z").integer(3) + builder.end_record() + return builder + + result = make_ab(builder).snapshot() + + print(result) + assert result.behavior == make_ab(builder).behavior From 2716f8f20840ef8b162efb39b3b2045de8b7ed4d Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Thu, 26 Oct 2023 10:53:17 +0100 Subject: [PATCH 10/10] test: just check keys --- ...ize_and_deserialize_behaviour_for_numba.py | 58 +------------------ 1 file changed, 2 insertions(+), 56 deletions(-) diff --git a/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py index 5b5a407c3b..42e27e691f 100644 --- a/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py +++ b/tests/test_2770_serialize_and_deserialize_behaviour_for_numba.py @@ -39,59 +39,5 @@ def add_xyr(left, right): behavior = {(np.add, "xyr", "xyr"): make_add_xyr()} builder = ak.ArrayBuilder(behavior=behavior) - behavior_out = func(builder).behavior - - # Compare the dictionaries themselves - # Note: 'behavior_out' is not 'behavior' - if behavior_out == behavior: - print("behavior_out is behavior") - else: - print("behavior_out is not behavior") - - # Define ufuncs - ufunc_add = np.add - - # Compare the identity of the ufunc within the dictionaries - # Note: 'ufunc_behavior_out[0]' is 'ufunc_add' - ufunc_behavior_out = next(iter(behavior_out)) - ufunc_behavior = next(iter(behavior)) - - if ufunc_behavior_out[0] is ufunc_add: - print("ufunc_behavior_out[0] is ufunc_add") - else: - print("ufunc_behavior_out[0] is not ufunc_add") - - if ufunc_behavior[0] is ufunc_add: - print("ufunc_behavior[0] is ufunc_add") - else: - print("ufunc_behavior[0] is not ufunc_add") - - # Compare the unique identifiers of the lambda functions - # Note: Lambda functions have different identities - lambda_behavior_out = next(iter(behavior_out.values())) - lambda_behavior = next(iter(behavior.values())) - - if id(lambda_behavior_out) == id(lambda_behavior): - print("Lambda functions have the same identity") - else: - print("Lambda functions have different identities") - - @numba.njit - def make_ab(builder): - builder.begin_record("xyz") - builder.field("x").integer(3) - builder.field("y").integer(4) - builder.field("z").integer(3) - builder.end_record() - - builder.begin_record("xyz") - builder.field("x").integer(3) - builder.field("y").integer(4) - builder.field("z").integer(3) - builder.end_record() - return builder - - result = make_ab(builder).snapshot() - - print(result) - assert result.behavior == make_ab(builder).behavior + result = func(builder) + assert result.behavior.keys() == behavior.keys()