From e897a0374096dabcbe8805157dd57749a5fe589d Mon Sep 17 00:00:00 2001 From: pfackeldey Date: Thu, 12 Dec 2024 15:18:00 -0500 Subject: [PATCH 1/2] force touch columns needed to build internal vector classes --- src/vector/backends/awkward.py | 52 +++++++++++++++-------------- tests/backends/test_dask_awkward.py | 14 ++++++++ 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/src/vector/backends/awkward.py b/src/vector/backends/awkward.py index a4195a1e..f57e7586 100644 --- a/src/vector/backends/awkward.py +++ b/src/vector/backends/awkward.py @@ -85,6 +85,8 @@ behavior: typing.Any = {} +_touch = ak.typetracer.touch_data + # coordinates classes are a formality for Awkward ############################# @@ -126,9 +128,9 @@ def from_fields(cls, array: ak.Array) -> AzimuthalAwkward: """ fields = ak.fields(array) if "x" in fields and "y" in fields: - return AzimuthalAwkwardXY(array["x"], array["y"]) + return AzimuthalAwkwardXY(_touch(array["x"]), _touch(array["y"])) elif "rho" in fields and "phi" in fields: - return AzimuthalAwkwardRhoPhi(array["rho"], array["phi"]) + return AzimuthalAwkwardRhoPhi(_touch(array["rho"]), _touch(array["phi"])) else: raise ValueError( "array does not have azimuthal coordinates (x, y or rho, phi): " @@ -154,17 +156,17 @@ def from_momentum_fields(cls, array: ak.Array) -> AzimuthalAwkward: """ fields = ak.fields(array) if "x" in fields and "y" in fields: - return AzimuthalAwkwardXY(array["x"], array["y"]) + return AzimuthalAwkwardXY(_touch(array["x"]), _touch(array["y"])) elif "x" in fields and "py" in fields: - return AzimuthalAwkwardXY(array["x"], array["py"]) + return AzimuthalAwkwardXY(_touch(array["x"]), _touch(array["py"])) elif "px" in fields and "y" in fields: - return AzimuthalAwkwardXY(array["px"], array["y"]) + return AzimuthalAwkwardXY(_touch(array["px"]), _touch(array["y"])) elif "px" in fields and "py" in fields: - return AzimuthalAwkwardXY(array["px"], array["py"]) + return AzimuthalAwkwardXY(_touch(array["px"]), _touch(array["py"])) elif "rho" in fields and "phi" in fields: - return AzimuthalAwkwardRhoPhi(array["rho"], array["phi"]) + return AzimuthalAwkwardRhoPhi(_touch(array["rho"]), _touch(array["phi"])) elif "pt" in fields and "phi" in fields: - return AzimuthalAwkwardRhoPhi(array["pt"], array["phi"]) + return AzimuthalAwkwardRhoPhi(_touch(array["pt"]), _touch(array["phi"])) else: raise ValueError( "array does not have azimuthal coordinates (x/px, y/py or rho/pt, phi): " @@ -206,11 +208,11 @@ def from_fields(cls, array: ak.Array) -> LongitudinalAwkward: """ fields = ak.fields(array) if "z" in fields: - return LongitudinalAwkwardZ(array["z"]) + return LongitudinalAwkwardZ(_touch(array["z"])) elif "theta" in fields: - return LongitudinalAwkwardTheta(array["theta"]) + return LongitudinalAwkwardTheta(_touch(array["theta"])) elif "eta" in fields: - return LongitudinalAwkwardEta(array["eta"]) + return LongitudinalAwkwardEta(_touch(array["eta"])) else: raise ValueError( "array does not have longitudinal coordinates (z or theta or eta): " @@ -237,13 +239,13 @@ def from_momentum_fields(cls, array: ak.Array) -> LongitudinalAwkward: """ fields = ak.fields(array) if "z" in fields: - return LongitudinalAwkwardZ(array["z"]) + return LongitudinalAwkwardZ(_touch(array["z"])) elif "pz" in fields: - return LongitudinalAwkwardZ(array["pz"]) + return LongitudinalAwkwardZ(_touch(array["pz"])) elif "theta" in fields: - return LongitudinalAwkwardTheta(array["theta"]) + return LongitudinalAwkwardTheta(_touch(array["theta"])) elif "eta" in fields: - return LongitudinalAwkwardEta(array["eta"]) + return LongitudinalAwkwardEta(_touch(array["eta"])) else: raise ValueError( "array does not have longitudinal coordinates (z/pz or theta or eta): " @@ -284,9 +286,9 @@ def from_fields(cls, array: ak.Array) -> TemporalAwkward: """ fields = ak.fields(array) if "t" in fields: - return TemporalAwkwardT(array["t"]) + return TemporalAwkwardT(_touch(array["t"])) elif "tau" in fields: - return TemporalAwkwardTau(array["tau"]) + return TemporalAwkwardTau(_touch(array["tau"])) else: raise ValueError( "array does not have temporal coordinates (t or tau): " @@ -312,21 +314,21 @@ def from_momentum_fields(cls, array: ak.Array) -> TemporalAwkward: """ fields = ak.fields(array) if "t" in fields: - return TemporalAwkwardT(array["t"]) + return TemporalAwkwardT(_touch(array["t"])) elif "E" in fields: - return TemporalAwkwardT(array["E"]) + return TemporalAwkwardT(_touch(array["E"])) elif "e" in fields: - return TemporalAwkwardT(array["e"]) + return TemporalAwkwardT(_touch(array["e"])) elif "energy" in fields: - return TemporalAwkwardT(array["energy"]) + return TemporalAwkwardT(_touch(array["energy"])) elif "tau" in fields: - return TemporalAwkwardTau(array["tau"]) + return TemporalAwkwardTau(_touch(array["tau"])) elif "M" in fields: - return TemporalAwkwardTau(array["M"]) + return TemporalAwkwardTau(_touch(array["M"])) elif "m" in fields: - return TemporalAwkwardTau(array["m"]) + return TemporalAwkwardTau(_touch(array["m"])) elif "mass" in fields: - return TemporalAwkwardTau(array["mass"]) + return TemporalAwkwardTau(_touch(array["mass"])) else: raise ValueError( "array does not have temporal coordinates (t/E/e/energy or tau/M/m/mass): " diff --git a/tests/backends/test_dask_awkward.py b/tests/backends/test_dask_awkward.py index b556a2c1..c6ff23e3 100644 --- a/tests/backends/test_dask_awkward.py +++ b/tests/backends/test_dask_awkward.py @@ -23,3 +23,17 @@ def test_constructor(): assert isinstance(vec.compute(), vector.backends.awkward.VectorAwkward2D) assert ak.all(vec.x.compute() == ak.Array([1, 1.1])) assert ak.all(vec.y.compute() == ak.Array([2, 2.2])) + + +def test_necessary_columns(): + vec = vector.Array([[{"pt": 1, "phi": 2}], [], [{"pt": 3, "phi": 4}]]) + dak_vec = dak.from_awkward(vec, npartitions=1) + + cols = next(iter(dak.report_necessary_columns(dak_vec).values())) + + # this may seem weird at first: why would you need "phi" and "rho", if you asked for "pt"? + # the reason is that vector will build internally a class with "phi" and "rho", + # see: https://github.com/scikit-hep/vector/blob/main/src/vector/backends/awkward.py#L166-L167 + # So, even if you ask for "pt", you will need "phi" and "rho" as well in order to build the vector class in the first place. + # (the same argument holds true for all other vector classes) + assert cols == frozenset({"phi", "rho"}) From e41c7ed06e76e2519ed8e411e9e59d4e2b8f13dd Mon Sep 17 00:00:00 2001 From: pfackeldey Date: Thu, 12 Dec 2024 16:14:20 -0500 Subject: [PATCH 2/2] touch only typetracer arrays, improve comment for test --- src/vector/backends/awkward.py | 8 +++++++- tests/backends/test_dask_awkward.py | 6 +++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/vector/backends/awkward.py b/src/vector/backends/awkward.py index f57e7586..68281080 100644 --- a/src/vector/backends/awkward.py +++ b/src/vector/backends/awkward.py @@ -82,10 +82,16 @@ vector._import_awkward() ArrayOrRecord = typing.TypeVar("ArrayOrRecord", bound=typing.Union[ak.Array, ak.Record]) +Array = typing.TypeVar("Array") behavior: typing.Any = {} -_touch = ak.typetracer.touch_data + +def _touch(array: Array) -> Array: + # make sure that touching is only done on Awkward arrays + if isinstance(array, (ak.Array, ak.Record)) and ak.backend(array) == "typetracer": + return ak.typetracer.touch_data(array) + return array # coordinates classes are a formality for Awkward ############################# diff --git a/tests/backends/test_dask_awkward.py b/tests/backends/test_dask_awkward.py index c6ff23e3..7e2d0417 100644 --- a/tests/backends/test_dask_awkward.py +++ b/tests/backends/test_dask_awkward.py @@ -31,9 +31,9 @@ def test_necessary_columns(): cols = next(iter(dak.report_necessary_columns(dak_vec).values())) - # this may seem weird at first: why would you need "phi" and "rho", if you asked for "pt"? + # this may seem weird at first: why would one need "phi" and "rho", if one asked for "pt"? # the reason is that vector will build internally a class with "phi" and "rho", - # see: https://github.com/scikit-hep/vector/blob/main/src/vector/backends/awkward.py#L166-L167 - # So, even if you ask for "pt", you will need "phi" and "rho" as well in order to build the vector class in the first place. + # see: https://github.com/scikit-hep/vector/blob/608da2d55a74eed25635fd408d1075b568773c99/src/vector/backends/awkward.py#L166-L167 + # So, even if one asks for "pt", "phi" and "rho" are as well in order to build the vector class in the first place. # (the same argument holds true for all other vector classes) assert cols == frozenset({"phi", "rho"})