From 11e6975b502b43c2417b53bdc9bb6c2790b9d34b Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Fri, 26 Jun 2020 19:51:54 -0500 Subject: [PATCH] AsObjects.simplify, leaf-lists, and test Double32. (#33) * AsObjects.simplify, leaf-lists, and test Double32. * Refactor: AsString and AsStrings take length_bytes in ('1-5', '4'), rather than two booleans. * AsObject.simplify maps AsString -> AsStrings. * AsObject.simplify maps AsVector(dtype) -> AsJagged(AsDtype). * Strings in Awkward1. * Strings in Pandas. * Mapped leaflist onto Awkward RecordArrays. * Mapped leaflist onto Pandas columns. * Mapped fixed-size arrays onto Pandas columns. * Implemented Double32 and Float16, but truncated and Float16 are not being tested. * Implemented TLeafD32 and TLeafF16, but they're not reading back correctly yet. * Finished TLeafD32 and TLeafF16. --- tests/test_0014-all-ttree-versions.py | 8 +- tests/test_0023-more-interpretations-1.py | 296 +++++++++++++++++++++- tests/test_0029-more-string-types.py | 6 +- tests/test_0031-test-stl-containers.py | 6 +- tests/test_0033-more-interpretations-2.py | 144 +++++++++++ uproot4/__init__.py | 34 ++- uproot4/behaviors/TBranch.py | 3 + uproot4/compute/python.py | 5 +- uproot4/interpretation/identify.py | 40 ++- uproot4/interpretation/library.py | 106 ++++++-- uproot4/interpretation/numerical.py | 190 ++++++++++++-- uproot4/interpretation/objects.py | 24 ++ uproot4/interpretation/strings.py | 36 ++- uproot4/models/TAtt.py | 5 - uproot4/models/TBranch.py | 7 - uproot4/models/TLeaf.py | 50 +++- uproot4/models/TTree.py | 5 - uproot4/reading.py | 14 +- uproot4/source/cursor.py | 31 +++ uproot4/stl_containers.py | 31 ++- uproot4/streamers.py | 47 ++-- 21 files changed, 928 insertions(+), 160 deletions(-) create mode 100644 tests/test_0033-more-interpretations-2.py diff --git a/tests/test_0014-all-ttree-versions.py b/tests/test_0014-all-ttree-versions.py index c917b8dea..cfaedea54 100644 --- a/tests/test_0014-all-ttree-versions.py +++ b/tests/test_0014-all-ttree-versions.py @@ -234,11 +234,11 @@ def test_list_streamers(): == """TString (v2) TObject (v1) - fUniqueID: unsigned int - fBits: unsigned int + fUniqueID: unsigned int (TStreamerBasicType) + fBits: unsigned int (TStreamerBasicType) TNamed (v1): TObject (v1) - fName: TString - fTitle: TString + fName: TString (TStreamerString) + fTitle: TString (TStreamerString) """ ) diff --git a/tests/test_0023-more-interpretations-1.py b/tests/test_0023-more-interpretations-1.py index 14307f4dc..f67f24afd 100644 --- a/tests/test_0023-more-interpretations-1.py +++ b/tests/test_0023-more-interpretations-1.py @@ -84,19 +84,8 @@ def test_strings4(): ] -@pytest.mark.skip(reason="FIXME: implement Double32") def test_double32(): - del uproot4.classes["TBranch"] - del uproot4.classes["TBranchElement"] - with uproot4.open(skhep_testdata.data_path("uproot-demo-double32.root"))["T"] as t: - - print(t["fD64"].interpretation) - print(t["fF32"].interpretation) - print(t["fI32"].interpretation) - print(t["fI30"].interpretation) - print(t["fI28"].interpretation) - fD64 = t["fD64"].array(library="np") fF32 = t["fF32"].array(library="np") fI32 = t["fI32"].array(library="np") @@ -110,3 +99,288 @@ def test_double32(): assert ratio_fI32.min() > 0.9999 and ratio_fI32.max() < 1.0001 assert ratio_fI30.min() > 0.9999 and ratio_fI30.max() < 1.0001 assert ratio_fI28.min() > 0.9999 and ratio_fI28.max() < 1.0001 + + +def test_double32_2(): + with uproot4.open(skhep_testdata.data_path("uproot-issue187.root"))["fTreeV0"] as t: + assert numpy.all(t["fMultiplicity"].array(library="np") == -1) + assert t["V0s.fEtaPos"].array(library="np")[-3].tolist() == [ + -0.390625, + 0.046875, + ] + + +def test_double32_3(): + with uproot4.open(skhep_testdata.data_path("uproot-issue232.root"))["fTreeV0"] as t: + assert t["V0Hyper.fNsigmaHe3Pos"].array(library="np")[-1].tolist() == [ + 19.38658905029297, + 999.0, + ] + assert t["V0Hyper.fDcaPos2PrimaryVertex"].array(library="np")[-1].tolist() == [ + 0.256, + 0.256, + ] + + +def test_double32_float16(): + with uproot4.open(skhep_testdata.data_path("uproot-double32-float16.root"))[ + "tree" + ] as t: + assert repr(t["double32_32"].interpretation) == "AsDouble32(-2.71, 10.0, 32)" + assert repr(t["double32_30"].interpretation) == "AsDouble32(-2.71, 10.0, 30)" + assert repr(t["double32_20"].interpretation) == "AsDouble32(-2.71, 10.0, 20)" + assert repr(t["double32_10"].interpretation) == "AsDouble32(-2.71, 10.0, 10)" + assert repr(t["double32_5"].interpretation) == "AsDouble32(-2.71, 10.0, 5)" + assert repr(t["double32_3"].interpretation) == "AsDouble32(-2.71, 10.0, 3)" + assert repr(t["float16_16"].interpretation) == "AsFloat16(-2.71, 10.0, 16)" + assert repr(t["float16_10"].interpretation) == "AsFloat16(-2.71, 10.0, 10)" + assert repr(t["float16_5"].interpretation) == "AsFloat16(-2.71, 10.0, 5)" + assert repr(t["float16_3"].interpretation) == "AsFloat16(-2.71, 10.0, 3)" + assert ( + repr(t["array_30"].interpretation) + == "AsDouble32(-2.71, 10.0, 30, to_dims=(3,))" + ) + assert ( + repr(t["array_10"].interpretation) + == "AsFloat16(-2.71, 10.0, 10, to_dims=(3,))" + ) + + assert t["double32_32"].array(library="np").tolist() == [ + -1.9999999994342215, + -1.4999999998277052, + -1.0000000002211891, + -0.50000000061467276, + -0.10000000329688152, + -1.0081566692576871e-09, + 0.10000000128056863, + 0.49999999859835986, + 0.99999999820487595, + 2.0000000003771863, + 2.9999999995902185, + 3.9999999988032506, + 4.9999999980162837, + 5.9999999972293159, + 6.9999999964423489, + 7.9999999986146593, + 8.9999999978276897, + ] + + assert t["double32_30"].array(library="np").tolist() == [ + -2.0000000023934987, + -1.5000000057462601, + -0.99999999726191158, + -0.50000000061467276, + -0.10000000329688152, + -3.9674339369355494e-09, + 0.10000000719912361, + 0.50000000451691484, + 1.0000000011641537, + 1.9999999944586309, + 2.9999999995902185, + 4.0000000047218061, + 4.9999999980162837, + 6.0000000031478704, + 6.9999999964423489, + 8.0000000015739374, + 8.9999999948684142, + ] + + assert t["double32_20"].array(library="np").tolist() == [ + -2.0000006771087646, + -1.5000011539459228, + -1.0000016307830808, + -0.50000210762023922, + -0.10000248908996578, + -2.5844573974254104e-06, + 0.099997320175170934, + 0.49999693870544437, + 0.99999646186828661, + 1.9999955081939698, + 2.9999945545196534, + 4.0000057220458993, + 5.0000047683715829, + 6.0000038146972665, + 7.0000028610229501, + 8.0000019073486328, + 9.0000009536743164, + ] + + assert t["double32_10"].array(library="np").tolist() == [ + -2.0025097656249997, + -1.5060253906249998, + -0.99712890624999995, + -0.50064453124999986, + -0.10345703124999961, + -0.0041601562499997691, + 0.095136718750000071, + 0.50473632812500036, + 1.0012207031250004, + 1.9941894531250002, + 2.9995703125000004, + 4.0049511718750006, + 4.9979199218750008, + 6.0033007812500001, + 6.9962695312500012, + 8.0016503906250023, + 8.9946191406250016, + ] + + assert t["double32_5"].array(library="np").tolist() == [ + -1.9156249999999999, + -1.5184374999999999, + -1.1212499999999999, + -0.3268749999999998, + 0.0703125, + 0.0703125, + 0.0703125, + 0.46750000000000025, + 0.8646875000000005, + 2.0562500000000004, + 2.850625, + 4.0421875000000007, + 4.8365625000000003, + 6.0281250000000002, + 6.8225000000000007, + 8.0140625000000014, + 8.8084375000000001, + ] + + assert t["double32_3"].array(library="np").tolist() == [ + -2.71, + -1.1212499999999999, + -1.1212499999999999, + -1.1212499999999999, + 0.46750000000000025, + 0.46750000000000025, + 0.46750000000000025, + 0.46750000000000025, + 0.46750000000000025, + 2.0562500000000004, + 3.6450000000000005, + 3.6450000000000005, + 5.233750000000001, + 5.233750000000001, + 6.822500000000001, + 8.411249999999999, + 8.411249999999999, + ] + + assert t["float16_16"].array(library="np").tolist() == [ + -1.9999885559082031, + -1.5000133514404297, + -1.0000380277633667, + -0.50006270408630371, + -0.099966049194335938, + -8.7499618530273438e-05, + 0.099985122680664062, + 0.50008177757263184, + 1.0000569820404053, + 2.0000076293945312, + 2.9999580383300781, + 3.9999089241027832, + 5.0000534057617188, + 6.0000038146972656, + 6.9999542236328125, + 7.9999046325683594, + 9.0000495910644531, + ] + + assert t["float16_10"].array(library="np").tolist() == [ + -2.0025098323822021, + -1.5060254335403442, + -0.99712896347045898, + -0.50064444541931152, + -0.10345697402954102, + -0.0041601657867431641, + 0.095136642456054688, + 0.50473618507385254, + 1.001220703125, + 1.9941892623901367, + 2.999570369720459, + 4.004951000213623, + 4.997920036315918, + 6.003300666809082, + 6.9962692260742188, + 8.0016508102416992, + 8.9946193695068359, + ] + + assert t["float16_5"].array(library="np").tolist() == [ + -1.9156250953674316, + -1.5184375047683716, + -1.1212500333786011, + -0.32687497138977051, + 0.0703125, + 0.0703125, + 0.0703125, + 0.46749997138977051, + 0.86468744277954102, + 2.0562500953674316, + 2.8506250381469727, + 4.0421876907348633, + 4.8365626335144043, + 6.0281248092651367, + 6.8225002288818359, + 8.0140628814697266, + 8.8084373474121094, + ] + + assert t["float16_3"].array(library="np").tolist() == [ + -2.7100000381469727, + -1.1212500333786011, + -1.1212500333786011, + -1.1212500333786011, + 0.46749997138977051, + 0.46749997138977051, + 0.46749997138977051, + 0.46749997138977051, + 0.46749997138977051, + 2.0562500953674316, + 3.6449999809265137, + 3.6449999809265137, + 5.2337498664855957, + 5.2337498664855957, + 6.8225002288818359, + 8.411250114440918, + 8.411250114440918, + ] + + assert t["array_30"].array(library="np").tolist() == [ + [-2.0000000023934987, -2.0000000023934987, -2.0000000023934987], + [-1.5000000057462601, -1.5000000057462601, -1.5000000057462601], + [-0.99999999726191158, -0.99999999726191158, -0.99999999726191158], + [-0.50000000061467276, -0.50000000061467276, -0.50000000061467276], + [-0.10000000329688152, -0.10000000329688152, -0.10000000329688152], + [-3.9674339369355494e-09, -3.9674339369355494e-09, -3.9674339369355494e-09], + [0.10000000719912361, 0.10000000719912361, 0.10000000719912361], + [0.50000000451691484, 0.50000000451691484, 0.50000000451691484], + [1.0000000011641537, 1.0000000011641537, 1.0000000011641537], + [1.9999999944586309, 1.9999999944586309, 1.9999999944586309], + [2.9999999995902185, 2.9999999995902185, 2.9999999995902185], + [4.0000000047218061, 4.0000000047218061, 4.0000000047218061], + [4.9999999980162837, 4.9999999980162837, 4.9999999980162837], + [6.0000000031478704, 6.0000000031478704, 6.0000000031478704], + [6.9999999964423489, 6.9999999964423489, 6.9999999964423489], + [8.0000000015739374, 8.0000000015739374, 8.0000000015739374], + [8.9999999948684142, 8.9999999948684142, 8.9999999948684142], + ] + + assert t["array_10"].array(library="np").tolist() == [ + [-2.0025098323822021, -2.0025098323822021, -2.0025098323822021], + [-1.5060254335403442, -1.5060254335403442, -1.5060254335403442], + [-0.99712896347045898, -0.99712896347045898, -0.99712896347045898], + [-0.50064444541931152, -0.50064444541931152, -0.50064444541931152], + [-0.10345697402954102, -0.10345697402954102, -0.10345697402954102], + [-0.0041601657867431641, -0.0041601657867431641, -0.0041601657867431641], + [0.095136642456054688, 0.095136642456054688, 0.095136642456054688], + [0.50473618507385254, 0.50473618507385254, 0.50473618507385254], + [1.001220703125, 1.001220703125, 1.001220703125], + [1.9941892623901367, 1.9941892623901367, 1.9941892623901367], + [2.999570369720459, 2.999570369720459, 2.999570369720459], + [4.004951000213623, 4.004951000213623, 4.004951000213623], + [4.997920036315918, 4.997920036315918, 4.997920036315918], + [6.003300666809082, 6.003300666809082, 6.003300666809082], + [6.9962692260742188, 6.9962692260742188, 6.9962692260742188], + [8.0016508102416992, 8.0016508102416992, 8.0016508102416992], + [8.9946193695068359, 8.9946193695068359, 8.9946193695068359], + ] diff --git a/tests/test_0029-more-string-types.py b/tests/test_0029-more-string-types.py index 59646e200..0067aaf9f 100644 --- a/tests/test_0029-more-string-types.py +++ b/tests/test_0029-more-string-types.py @@ -22,8 +22,8 @@ def test_parse_typename(): assert parse_typename("string") == AsString(False) assert parse_typename("std::string") == AsString(False) assert parse_typename("std :: string") == AsString(False) - assert parse_typename("char*") == AsString(False) - assert parse_typename("char *") == AsString(False) + assert parse_typename("char*") == AsString(False, length_bytes="4") + assert parse_typename("char *") == AsString(False, length_bytes="4") assert parse_typename("TString") == AsString(False) assert parse_typename("vector") == AsVector(True, uproot4.classes["TTree"]) assert parse_typename("vector") == AsVector(True, ">i4") @@ -38,7 +38,7 @@ def test_parse_typename(): True, AsVector(False, AsString(False)) ) assert parse_typename("vector>") == AsVector( - True, AsVector(False, AsString(False)) + True, AsVector(False, AsString(False, length_bytes="4")) ) assert parse_typename("set") == AsSet(True, ">u2") assert parse_typename("std::set") == AsSet(True, ">u2") diff --git a/tests/test_0031-test-stl-containers.py b/tests/test_0031-test-stl-containers.py index 67322240d..5eccf4de9 100644 --- a/tests/test_0031-test-stl-containers.py +++ b/tests/test_0031-test-stl-containers.py @@ -10,6 +10,8 @@ import skhep_testdata import uproot4 +from uproot4.interpretation.numerical import AsDtype +from uproot4.interpretation.jagged import AsJagged from uproot4.interpretation.objects import AsObjects from uproot4.stl_containers import AsString from uproot4.stl_containers import AsVector @@ -21,9 +23,7 @@ def test_typename(): with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ "tree" ] as tree: - assert tree["vector_int32"].interpretation == AsObjects( - AsVector(True, numpy.dtype(">i4")) - ) + assert tree["vector_int32"].interpretation == AsJagged(AsDtype(">i4"), 10) assert tree["vector_string"].interpretation == AsObjects( AsVector(True, AsString(False)) ) diff --git a/tests/test_0033-more-interpretations-2.py b/tests/test_0033-more-interpretations-2.py new file mode 100644 index 000000000..7ac7b7abc --- /dev/null +++ b/tests/test_0033-more-interpretations-2.py @@ -0,0 +1,144 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/master/LICENSE + +from __future__ import absolute_import + +import sys +import json + +import numpy +import pytest +import skhep_testdata + +import uproot4 + + +def test_awkward_strings(): + awkward1 = pytest.importorskip("awkward1") + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert awkward1.to_list(tree["string"].array(library="ak")) == [ + "one", + "two", + "three", + "four", + "five", + ] + + +def test_pandas_strings(): + pandas = pytest.importorskip("pandas") + with uproot4.open(skhep_testdata.data_path("uproot-stl_containers.root"))[ + "tree" + ] as tree: + assert tree["string"].array(library="pd").values.tolist() == [ + "one", + "two", + "three", + "four", + "five", + ] + + +def test_leaflist_numpy(): + with uproot4.open(skhep_testdata.data_path("uproot-leaflist.root"))[ + "tree/leaflist" + ] as branch: + result = branch.array(library="np") + assert result.dtype.names == ("x", "y", "z") + assert result.tolist() == [ + (1.1, 1, 97), + (2.2, 2, 98), + (3.3, 3, 99), + (4.0, 4, 100), + (5.5, 5, 101), + ] + assert branch.typename == "struct {double x; int32_t y; int8_t z;}" + + +def test_leaflist_awkward(): + awkward1 = pytest.importorskip("awkward1") + with uproot4.open(skhep_testdata.data_path("uproot-leaflist.root"))[ + "tree/leaflist" + ] as branch: + result = branch.array(library="ak") + assert str(awkward1.type(result)) == '5 * {"x": float64, "y": int32, "z": int8}' + assert awkward1.to_list(result) == [ + {"x": 1.1, "y": 1, "z": 97}, + {"x": 2.2, "y": 2, "z": 98}, + {"x": 3.3, "y": 3, "z": 99}, + {"x": 4.0, "y": 4, "z": 100}, + {"x": 5.5, "y": 5, "z": 101}, + ] + + +def test_leaflist_pandas(): + pandas = pytest.importorskip("pandas") + with uproot4.open(skhep_testdata.data_path("uproot-leaflist.root"))["tree"] as tree: + result = tree["leaflist"].array(library="pd") + assert list(result.columns) == [":x", ":y", ":z"] + assert result[":x"].values.tolist() == [1.1, 2.2, 3.3, 4.0, 5.5] + assert result[":y"].values.tolist() == [1, 2, 3, 4, 5] + assert result[":z"].values.tolist() == [97, 98, 99, 100, 101] + + result = tree.arrays("leaflist", library="pd") + assert list(result.columns) == ["leaflist:x", "leaflist:y", "leaflist:z"] + assert result["leaflist:x"].values.tolist() == [1.1, 2.2, 3.3, 4.0, 5.5] + assert result["leaflist:y"].values.tolist() == [1, 2, 3, 4, 5] + assert result["leaflist:z"].values.tolist() == [97, 98, 99, 100, 101] + + +def test_fixed_width(): + with uproot4.open( + skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root") + )["sample"] as tree: + assert tree["ai4"].array(library="np").tolist() == [ + [i, i + 1, i + 2] for i in range(-14, 16) + ] + assert tree["ai4"].typename == "int32_t[3]" + + +def test_fixed_width_awkward(): + awkward1 = pytest.importorskip("awkward1") + with uproot4.open( + skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root") + )["sample"] as tree: + assert awkward1.to_list(tree["ai4"].array(library="ak")) == [ + [i, i + 1, i + 2] for i in range(-14, 16) + ] + + +def test_fixed_width_pandas(): + pandas = pytest.importorskip("pandas") + with uproot4.open( + skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root") + )["sample"] as tree: + result = tree["ai4"].array(library="pd") + assert list(result.columns) == ["[0]", "[1]", "[2]"] + assert result["[0]"].values.tolist() == list(range(-14, 16)) + assert result["[1]"].values.tolist() == list(range(-13, 17)) + assert result["[2]"].values.tolist() == list(range(-12, 18)) + + result = tree.arrays("ai4", library="pd") + assert list(result.columns) == ["ai4[0]", "ai4[1]", "ai4[2]"] + assert result["ai4[0]"].values.tolist() == list(range(-14, 16)) + assert result["ai4[1]"].values.tolist() == list(range(-13, 17)) + assert result["ai4[2]"].values.tolist() == list(range(-12, 18)) + + +def test_fixed_width_pandas_2(): + pandas = pytest.importorskip("pandas") + with uproot4.open( + skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") + )["tree"] as tree: + result = tree["ArrayI32[10]"].array(library="pd") + assert list(result.columns) == ["[" + str(i) + "]" for i in range(10)] + for i in range(10): + assert result["[" + str(i) + "]"].values.tolist() == list(range(100)) + + result = tree.arrays( + "xyz", aliases={"xyz": "get('ArrayI32[10]')"}, library="pd" + ) + assert list(result.columns) == ["xyz[" + str(i) + "]" for i in range(10)] + for i in range(10): + assert result["xyz[" + str(i) + "]"].values.tolist() == list(range(100)) diff --git a/uproot4/__init__.py b/uproot4/__init__.py index ae8a166fb..625136797 100644 --- a/uproot4/__init__.py +++ b/uproot4/__init__.py @@ -100,15 +100,18 @@ def behavior_of(classname): class KeyInFileError(KeyError): - __slots__ = ["key", "because", "cycle", "file_path", "object_path"] + __slots__ = ["key", "because", "cycle", "keys", "file_path", "object_path"] - def __init__(self, key, because="", cycle=None, file_path=None, object_path=None): + def __init__( + self, key, because="", cycle=None, keys=None, file_path=None, object_path=None + ): super(KeyInFileError, self).__init__(key) self.key = key self.because = because self.cycle = cycle self.file_path = file_path self.object_path = object_path + self.keys = keys def __str__(self): if self.because == "": @@ -116,6 +119,21 @@ def __str__(self): else: because = " because " + self.because + with_keys = "" + if self.keys is not None: + to_show = None + for key in self.keys: + if to_show is None: + to_show = key + else: + to_show += ", " + key + if len(to_show) > 200: + to_show += "..." + break + if to_show is None: + to_show = "(none!)" + with_keys = "\n\n Known keys: {0}\n".format(to_show) + in_file = "" if self.file_path is not None: in_file = "\nin file {0}".format(self.file_path) @@ -125,16 +143,16 @@ def __str__(self): in_object = "\nin object {0}".format(self.object_path) if self.cycle == "any": - return """not found: {0} (with any cycle number){1}{2}{3}""".format( - repr(self.key), because, in_file, in_object + return """not found: {0} (with any cycle number){1}{2}{3}{4}""".format( + repr(self.key), because, with_keys, in_file, in_object ) elif self.cycle is None: - return """not found: {0}{1}{2}{3}""".format( - repr(self.key), because, in_file, in_object + return """not found: {0}{1}{2}{3}{4}""".format( + repr(self.key), because, with_keys, in_file, in_object ) else: - return """not found: {0} with cycle {1}{2}{3}{4}""".format( - repr(self.key), self.cycle, because, in_file, in_object + return """not found: {0} with cycle {1}{2}{3}{4}{5}""".format( + repr(self.key), self.cycle, because, with_keys, in_file, in_object ) diff --git a/uproot4/behaviors/TBranch.py b/uproot4/behaviors/TBranch.py index fcbd71cce..0ed82e751 100644 --- a/uproot4/behaviors/TBranch.py +++ b/uproot4/behaviors/TBranch.py @@ -528,6 +528,7 @@ def __getitem__(self, where): else: raise uproot4.KeyInFileError( original_where, + keys=self.keys(recursive=recursive), file_path=self._file.file_path, object_path=self.object_path, ) @@ -541,6 +542,7 @@ def __getitem__(self, where): raise uproot4.KeyInFileError( original_where, file_path=self._file.file_path, + keys=self.keys(recursive=recursive), object_path=self.object_path, ) @@ -552,6 +554,7 @@ def __getitem__(self, where): else: raise uproot4.KeyInFileError( original_where, + keys=self.keys(recursive=recursive), file_path=self._file.file_path, object_path=self.object_path, ) diff --git a/uproot4/compute/python.py b/uproot4/compute/python.py index 57ce868e8..dd0f56206 100644 --- a/uproot4/compute/python.py +++ b/uproot4/compute/python.py @@ -156,7 +156,10 @@ def _expression_to_function( ) except KeyError as err: raise uproot4.KeyInFileError( - err.args[0], file_path=file_path, object_path=object_path + err.args[0], + keys=sorted(keys) + list(aliases), + file_path=file_path, + object_path=object_path, ) function = ast.parse("lambda: None").body[0].value diff --git a/uproot4/interpretation/identify.py b/uproot4/interpretation/identify.py index 3e4b255a2..f1dd31961 100644 --- a/uproot4/interpretation/identify.py +++ b/uproot4/interpretation/identify.py @@ -301,7 +301,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 1, _parse_maybe_quote( - "uproot4.stl_containers.AsString(False, size_1to5_bytes=False, typename='char*')", + "uproot4.stl_containers.AsString(False, length_bytes='4', typename='char*')", quote, ), ) @@ -313,7 +313,7 @@ def _parse_node(tokens, i, typename, file, quote, header, inner_header): return ( i + 2, _parse_maybe_quote( - "uproot4.stl_containers.AsString(False, size_1to5_bytes=False, typename='char*')", + "uproot4.stl_containers.AsString(False, length_bytes='4', typename='char*')", quote, ), ) @@ -524,9 +524,15 @@ def _float16_double32_walk_ast(node, branch, source): if ( isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load) - and node.id == "pi" + and node.id.lower() == "pi" ): out = ast.Num(3.141592653589793) # TMath::Pi() + elif ( + isinstance(node, ast.Name) + and isinstance(node.ctx, ast.Load) + and node.id.lower() == "twopi" + ): + out = ast.Num(6.283185307179586) # TMath::TwoPi() elif isinstance(node, ast.Num): out = ast.Num(float(node.n)) elif isinstance(node, ast.BinOp) and isinstance( @@ -585,15 +591,20 @@ def _float16_double32_walk_ast(node, branch, source): def _float16_or_double32(branch, context, leaf, is_float16, dims): + if leaf.classname in ("TLeafF16", "TLeafD32"): + title = leaf.member("fTitle") + else: + title = branch.streamer.title + try: - left = branch.streamer.title.index("[") - right = branch.streamer.title.index("]") + left = title.index("[") + right = title.index("]") except (ValueError, AttributeError): low, high, num_bits = 0, 0, 0 else: - source = branch.streamer.title[left : right + 1] + source = title[left : right + 1] try: parsed = ast.parse(source).body[0].value except SyntaxError: @@ -604,8 +615,7 @@ def _float16_or_double32(branch, context, leaf, is_float16, dims): ) transformed = ast.Expression(_float16_double32_walk_ast(parsed, branch, source)) - spec = eval(compile(transformed, repr(branch.streamer.title), "eval")) - + spec = eval(compile(transformed, repr(title), "eval")) if ( len(spec) == 2 and uproot4._util.isnum(spec[0]) @@ -618,7 +628,7 @@ def _float16_or_double32(branch, context, leaf, is_float16, dims): len(spec) == 3 and uproot4._util.isnum(spec[0]) and uproot4._util.isnum(spec[1]) - and uproot4._util.isint(spec[1]) + and uproot4._util.isint(spec[2]) ): low, high, num_bits = spec @@ -665,8 +675,12 @@ def interpretation_of(branch, context): if leaf.classname == "TLeafElement": leaftype = _normalize_ftype(leaf.member("fType")) - is_float16 = leaftype == uproot4.const.kFloat16 - is_double32 = leaftype == uproot4.const.kDouble32 + is_float16 = ( + leaftype == uproot4.const.kFloat16 or leaf.classname == "TLeafF16" + ) + is_double32 = ( + leaftype == uproot4.const.kDouble32 or leaf.classname == "TLeafD32" + ) if is_float16 or is_double32: out = _float16_or_double32(branch, context, leaf, is_float16, dims) @@ -741,7 +755,7 @@ def interpretation_of(branch, context): inner_header=False, string_header=True, ) - return uproot4.interpretation.objects.AsObjects(model_cls) + return uproot4.interpretation.objects.AsObjects(model_cls).simplify() if branch.streamer is not None: model_cls = parse_typename( @@ -751,7 +765,7 @@ def interpretation_of(branch, context): inner_header=False, string_header=False, ) - return uproot4.interpretation.objects.AsObjects(model_cls) + return uproot4.interpretation.objects.AsObjects(model_cls).simplify() if leaf.classname == "TLeafElement": raise NotImplementedError diff --git a/uproot4/interpretation/library.py b/uproot4/interpretation/library.py index c2681b4fe..27e3e5a7e 100644 --- a/uproot4/interpretation/library.py +++ b/uproot4/interpretation/library.py @@ -2,6 +2,8 @@ from __future__ import absolute_import +import itertools + import numpy import uproot4.interpretation.jagged @@ -100,22 +102,50 @@ def finalize(self, array, branch): awkward1 = self.imported if isinstance(array, uproot4.interpretation.jagged.JaggedArray): - array_content = array.content.astype( - array.content.dtype.newbyteorder("="), copy=False - ) - content = awkward1.from_numpy(array_content, highlevel=False) + content = awkward1.from_numpy(array.content, highlevel=False) offsets = awkward1.layout.Index32(array.offsets) layout = awkward1.layout.ListOffsetArray32(offsets, content) return awkward1.Array(layout) elif isinstance(array, uproot4.interpretation.strings.StringArray): - raise NotImplementedError + content = awkward1.layout.NumpyArray( + numpy.frombuffer(array.content, dtype=numpy.dtype(numpy.uint8)), + parameters={"__array__": "char"}, + ) + if issubclass(array.offsets.dtype.type, numpy.int32): + offsets = awkward1.layout.Index32(array.offsets) + layout = awkward1.layout.ListOffsetArray32( + offsets, content, parameters={"__array__": "string"} + ) + elif issubclass(array.offsets.dtype.type, numpy.uint32): + offsets = awkward1.layout.IndexU32(array.offsets) + layout = awkward1.layout.ListOffsetArrayU32( + offsets, content, parameters={"__array__": "string"} + ) + elif issubclass(array.offsets.dtype.type, numpy.int64): + offsets = awkward1.layout.Index64(array.offsets) + layout = awkward1.layout.ListOffsetArray64( + offsets, content, parameters={"__array__": "string"} + ) + else: + raise AssertionError(repr(array.offsets.dtype)) + return awkward1.Array(layout) elif isinstance(array, uproot4.interpretation.objects.ObjectArray): raise NotImplementedError + elif array.dtype.names is not None: + length, shape = array.shape[0], array.shape[1:] + array = array.reshape(-1) + contents = [] + for name in array.dtype.names: + contents.append(awkward1.layout.NumpyArray(numpy.array(array[name]))) + out = awkward1.layout.RecordArray(contents, array.dtype.names, length) + for size in shape[::-1]: + out = awkward1.layout.RegularArray(out, size) + return awkward1.Array(out) + else: - array = array.astype(array.dtype.newbyteorder("="), copy=False) return awkward1.from_numpy(array) def group(self, arrays, expression_context, how): @@ -212,21 +242,17 @@ def imported(self): def finalize(self, array, branch): pandas = self.imported - if isinstance( - array, - ( - uproot4.interpretation.jagged.JaggedArray, - uproot4.interpretation.strings.StringArray, - uproot4.interpretation.objects.ObjectArray, - ), - ): + if isinstance(array, uproot4.interpretation.jagged.JaggedArray): index = pandas.MultiIndex.from_arrays( array.parents_localindex(), names=["entry", "subentry"] ) - content = array.content.astype( - array.content.dtype.newbyteorder("="), copy=False - ) - return pandas.Series(content, index=index) + return pandas.Series(array.content, index=index) + + elif isinstance(array, uproot4.interpretation.strings.StringArray): + out = numpy.zeros(len(array), dtype=numpy.object) + for i, x in enumerate(array): + out[i] = x + return pandas.Series(out) elif isinstance(array, uproot4.interpretation.objects.ObjectArray): out = numpy.zeros(len(array), dtype=numpy.object) @@ -234,13 +260,52 @@ def finalize(self, array, branch): out[i] = x return pandas.Series(out) + elif array.dtype.names is not None and len(array.shape) != 1: + names = [] + arrays = {} + for n in array.dtype.names: + for tup in itertools.product(*[range(d) for d in array.shape[1:]]): + name = ":" + n + "".join("[" + str(x) + "]" for x in tup) + names.append(name) + arrays[name] = array[n][(slice(None),) + tup] + return pandas.DataFrame(arrays, columns=names) + + elif array.dtype.names is not None: + names = [":" + x for x in array.dtype.names] + arrays = dict((":" + x, array[x]) for x in array.dtype.names) + return pandas.DataFrame(arrays, columns=names) + + elif len(array.shape) != 1: + names = [] + arrays = {} + for tup in itertools.product(*[range(d) for d in array.shape[1:]]): + name = "".join("[" + str(x) + "]" for x in tup) + names.append(name) + arrays[name] = array[(slice(None),) + tup] + return pandas.DataFrame(arrays, columns=names) + else: - array = array.astype(array.dtype.newbyteorder("="), copy=False) return pandas.Series(array) + def _only_series(self, original_arrays, original_names): + pandas = self.imported + arrays = {} + names = [] + for name in original_names: + if isinstance(original_arrays[name], pandas.Series): + arrays[name] = original_arrays[name] + names.append(name) + else: + df = original_arrays[name] + for subname in df.columns: + path = name + subname + arrays[path] = df[subname] + names.append(path) + return arrays, names + def group(self, arrays, expression_context, how): - names = [name for name, _ in expression_context] pandas = self.imported + names = [name for name, _ in expression_context] if how is tuple: return tuple(arrays[name] for name in names) elif how is list: @@ -248,6 +313,7 @@ def group(self, arrays, expression_context, how): elif how is dict: return dict((name, arrays[name]) for name in names) elif uproot4._util.isstr(how) or how is None: + arrays, names = self._only_series(arrays, names) if all(isinstance(x.index, pandas.RangeIndex) for x in arrays.values()): return pandas.DataFrame(data=arrays, columns=names) indexes = [] diff --git a/uproot4/interpretation/numerical.py b/uproot4/interpretation/numerical.py index dcd681c78..e008f5edb 100644 --- a/uproot4/interpretation/numerical.py +++ b/uproot4/interpretation/numerical.py @@ -16,22 +16,6 @@ def _dtype_shape(dtype): class Numerical(uproot4.interpretation.Interpretation): - @property - def itemsize(self): - raise AssertionError - - @property - def to_dtype(self): - return self._to_dtype - - @property - def numpy_dtype(self): - return self._to_dtype - - @property - def awkward_form(self): - raise NotImplementedError - def final_array( self, basket_arrays, entry_start, entry_stop, entry_offsets, library, branch ): @@ -164,10 +148,18 @@ def __eq__(self, other): def from_dtype(self): return self._from_dtype + @property + def to_dtype(self): + return self._to_dtype + @property def itemsize(self): return self._from_dtype.itemsize + @property + def numpy_dtype(self): + return self._to_dtype + @property def cache_key(self): def form(dtype, name): @@ -267,14 +259,168 @@ def __init__(self): raise NotImplementedError -class AsDouble32(Numerical): - def __init__(self): - raise NotImplementedError +class TruncatedNumerical(Numerical): + @property + def low(self): + return self._low + @property + def high(self): + return self._high -class AsFloat16(Numerical): - def __init__(self): - raise NotImplementedError + @property + def num_bits(self): + return self._num_bits + + @property + def truncated(self): + return self._low == 0.0 and self._high == 0.0 + + @property + def to_dims(self): + return self._to_dims + + @property + def from_dtype(self): + if self.truncated: + return numpy.dtype(({"exponent": (">u1", 0), "mantissa": (">u2", 1)}, ())) + else: + return numpy.dtype(">u4") + + def __repr__(self): + args = [repr(self._low), repr(self._high), repr(self._num_bits)] + if self._to_dims != (): + args.append("to_dims={0}".format(repr(self._to_dims))) + return "{0}({1})".format(type(self).__name__, ", ".join(args)) + + def __eq__(self, other): + return ( + type(self) == type(other) + and self._low == other._low + and self._high == other._high + and self._num_bits == other._num_bits + and self._to_dims == other._to_dims + ) + + @property + def itemsize(self): + return self.from_dtype.itemsize + + @property + def numpy_dtype(self): + return self.to_dtype + + @property + def cache_key(self): + return "{0}({1},{2},{3},{4})".format( + type(self).__name__, self._low, self._high, self._num_bits, self._to_dims + ) + + def basket_array(self, data, byte_offsets, basket, branch, context): + self.hook_before_basket_array( + data=data, + byte_offsets=byte_offsets, + basket=basket, + branch=branch, + context=context, + ) + + try: + raw = data.view(self.from_dtype) + except ValueError: + raise ValueError( + """basket {0} in tree/branch {1} has the wrong number of bytes ({2}) """ + """for interpretation {3} (expecting raw array of {4}) +in file {5}""".format( + basket.basket_num, + branch.object_path, + len(data), + self, + repr(self._from_dtype), + branch.file.file_path, + ) + ) + + if self.truncated: + exponent = raw["exponent"].astype(numpy.int32) + mantissa = raw["mantissa"].astype(numpy.int32) + + exponent <<= 23 + exponent |= (mantissa & ((1 << (self.num_bits + 1)) - 1)) << ( + 23 - self.num_bits + ) + sign = ((1 << (self.num_bits + 1)) & mantissa != 0) * -2 + 1 + + output = exponent.view(numpy.float32) * sign + output = output.astype(self.to_dtype) + + else: + d, s = _dtype_shape(self.to_dtype) + output = raw.astype(d).reshape((-1,) + s) + numpy.multiply( + output, + float(self._high - self._low) / (1 << self._num_bits), + out=output, + ) + numpy.add(output, self.low, out=output) + + self.hook_after_basket_array( + data=data, + byte_offsets=byte_offsets, + basket=basket, + branch=branch, + context=context, + raw=raw, + output=output, + ) + + return output + + +class AsDouble32(TruncatedNumerical): + def __init__(self, low, high, num_bits, to_dims=()): + self._low = low + self._high = high + self._num_bits = num_bits + self._to_dims = to_dims + + if not uproot4._util.isint(num_bits) or not 2 <= num_bits <= 32: + raise TypeError("num_bits must be an integer between 2 and 32 (inclusive)") + if high <= low and not self.truncated: + raise ValueError( + "high ({0}) must be strictly greater than low ({1})".format(high, low) + ) + + @property + def to_dtype(self): + return numpy.dtype((numpy.float64, self.to_dims)) + + @property + def typename(self): + return "Double32_t" + "".join("[" + str(dim) + "]" for dim in self._to_dims) + + +class AsFloat16(TruncatedNumerical): + def __init__(self, low, high, num_bits, to_dims=()): + self._low = low + self._high = high + self._num_bits = num_bits + self._to_dims = to_dims + + if not uproot4._util.isint(num_bits) or not 2 <= num_bits <= 16: + raise TypeError("num_bits must be an integer between 2 and 16 (inclusive)") + if high <= low and not self.truncated: + raise ValueError( + "high ({0}) must be strictly greater than low ({1})".format(high, low) + ) + + @property + def to_dtype(self): + return numpy.dtype((numpy.float32, self.to_dims)) + + @property + def typename(self): + return "Float16_t" + "".join("[" + str(dim) + "]" for dim in self._to_dims) class AsSTLBits(Numerical): diff --git a/uproot4/interpretation/objects.py b/uproot4/interpretation/objects.py index 88557cfe5..4a0231cf9 100644 --- a/uproot4/interpretation/objects.py +++ b/uproot4/interpretation/objects.py @@ -5,6 +5,9 @@ import numpy import uproot4.interpretation +import uproot4.interpretation.strings +import uproot4.interpretation.jagged +import uproot4.interpretation.numerical import uproot4.stl_containers import uproot4.model import uproot4.source.chunk @@ -199,3 +202,24 @@ def final_array( ) return output + + def simplify(self): + if isinstance(self._model, uproot4.stl_containers.AsString): + header_bytes = 0 + if self._model.header: + header_bytes = 6 + return uproot4.interpretation.strings.AsStrings( + header_bytes, self._model.length_bytes, self._model.typename + ) + + if isinstance(self._model, uproot4.stl_containers.AsVector): + if isinstance(self._model.values, numpy.dtype): + header_bytes = 0 + if self._model.header: + header_bytes = 10 + content = uproot4.interpretation.numerical.AsDtype(self._model.values) + return uproot4.interpretation.jagged.AsJagged( + content, header_bytes, self._model.typename + ) + + return self diff --git a/uproot4/interpretation/strings.py b/uproot4/interpretation/strings.py index c583f53dd..984f36098 100644 --- a/uproot4/interpretation/strings.py +++ b/uproot4/interpretation/strings.py @@ -29,16 +29,20 @@ def content(self): return self._content def __getitem__(self, where): - return self._content[self._offsets[where] : self._offsets[where + 1]] + data = self._content[self._offsets[where] : self._offsets[where + 1]] + return uproot4._util.ensure_str(data) def __len__(self): return len(self._offsets) - 1 class AsStrings(uproot4.interpretation.Interpretation): - def __init__(self, header_bytes=0, size_1to5_bytes=True, typename=None): + def __init__(self, header_bytes=0, length_bytes="1-5", typename=None): self._header_bytes = header_bytes - self._size_1to5_bytes = size_1to5_bytes + if length_bytes in ("1-5", "4"): + self._length_bytes = length_bytes + else: + raise ValueError("length_bytes must be '1-5' or '4'") self._typename = typename @property @@ -46,22 +50,22 @@ def header_bytes(self): return self._header_bytes @property - def size_1to5_bytes(self): - return self._size_1to5_bytes + def length_bytes(self): + return self._length_bytes def __repr__(self): args = [] if self._header_bytes != 0: args.append("header_bytes={0}".format(self._header_bytes)) - if self._size_1to5_bytes is not True: - args.append("size_1to5_bytes={0}".format(self._size_1to5_bytes)) + if self._length_bytes != "1-5": + args.append("length_bytes={0}".format(repr(self._length_bytes))) return "AsStrings({0})".format(", ".join(args)) def __eq__(self, other): return ( isinstance(other, AsStrings) and self._header_bytes == other._header_bytes - and self._size_1to5_bytes == other._size_1to5_bytes + and self._length_bytes == other._length_bytes ) @property @@ -82,7 +86,7 @@ def awkward_form(self): @property def cache_key(self): return "{0}({1},{2})".format( - type(self).__name__, self._header_bytes, self._size_1to5_bytes + type(self).__name__, self._header_bytes, repr(self._length_bytes) ) def basket_array(self, data, byte_offsets, basket, branch, context): @@ -99,10 +103,14 @@ def basket_array(self, data, byte_offsets, basket, branch, context): byte_starts = byte_offsets[:-1] + self._header_bytes byte_stops = byte_offsets[1:] - if self._size_1to5_bytes: + if self._length_bytes == "1-5": length_header_size = numpy.ones(len(byte_starts), dtype=numpy.int32) length_header_size[data[byte_starts] == 255] += 4 - byte_starts += length_header_size + elif self._length_bytes == "4": + length_header_size = numpy.full(len(byte_starts), 4, dtype=numpy.int32) + else: + raise AssertionError(repr(self._length_bytes)) + byte_starts += length_header_size mask = numpy.zeros(len(data), dtype=numpy.int8) mask[byte_starts[byte_starts < len(data)]] = 1 @@ -119,7 +127,7 @@ def basket_array(self, data, byte_offsets, basket, branch, context): data = data.tobytes() else: data = data.tostring() - output = StringArray(offsets, uproot4._util.ensure_str(data)) + output = StringArray(offsets, data) self.hook_after_basket_array( data=data, @@ -151,7 +159,7 @@ def final_array( basket_content[k] = v.content if entry_start >= entry_stop: - return StringArray(library.zeros((1,), numpy.int64), "") + return StringArray(library.zeros((1,), numpy.int64), b"") else: length = 0 @@ -209,7 +217,7 @@ def final_array( start = stop - output = StringArray(offsets, "".join(contents)) + output = StringArray(offsets, b"".join(contents)) self.hook_before_library_finalize( basket_arrays=basket_arrays, diff --git a/uproot4/models/TAtt.py b/uproot4/models/TAtt.py index 37402c8a0..35d3856b6 100644 --- a/uproot4/models/TAtt.py +++ b/uproot4/models/TAtt.py @@ -22,7 +22,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [] member_names = ["fLineColor", "fLineStyle", "fLineWidth"] class_flags = {} - hooks = None class_code = None @@ -37,7 +36,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [] member_names = ["fLineColor", "fLineStyle", "fLineWidth"] class_flags = {} - hooks = None class_code = None @@ -54,7 +52,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [] member_names = ["fFillColor", "fFillStyle"] class_flags = {} - hooks = None class_code = None @@ -67,7 +64,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [] member_names = ["fFillColor", "fFillStyle"] class_flags = {} - hooks = None class_code = None @@ -85,7 +81,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [] member_names = ["fMarkerColor", "fMarkerStyle", "fMarkserSize"] class_flags = {} - hooks = None class_code = None diff --git a/uproot4/models/TBranch.py b/uproot4/models/TBranch.py index 7043a3044..aba055616 100644 --- a/uproot4/models/TBranch.py +++ b/uproot4/models/TBranch.py @@ -112,7 +112,6 @@ def member_names(self): base_names_versions = [("TNamed", 1), ("TAttFill", 1)] class_flags = {} - hooks = None class_code = None @@ -219,7 +218,6 @@ def member_names(self): base_names_versions = [("TNamed", 1), ("TAttFill", 1)] class_flags = {} - hooks = None class_code = None @@ -326,7 +324,6 @@ def member_names(self): base_names_versions = [("TNamed", 1), ("TAttFill", 1)] class_flags = {} - hooks = None class_code = None @@ -440,7 +437,6 @@ def member_names(self): base_names_versions = [("TNamed", 1), ("TAttFill", 2)] class_flags = {} - hooks = None class_code = None @@ -505,7 +501,6 @@ def read_members(self, chunk, cursor, context): "fBranchCount2", ] class_flags = {"has_read_object_any": True} - hooks = None class_code = None @@ -561,7 +556,6 @@ def read_members(self, chunk, cursor, context): "fBranchCount2", ] class_flags = {"has_read_object_any": True} - hooks = None class_code = None @@ -617,7 +611,6 @@ def read_members(self, chunk, cursor, context): "fBranchCount2", ] class_flags = {"has_read_object_any": True} - hooks = None class_code = None diff --git a/uproot4/models/TLeaf.py b/uproot4/models/TLeaf.py index 509e768e9..aa8c3e193 100644 --- a/uproot4/models/TLeaf.py +++ b/uproot4/models/TLeaf.py @@ -39,7 +39,6 @@ def read_members(self, chunk, cursor, context): "fLeafCount", ] class_flags = {"has_read_object_any": True} - hooks = None class_code = None @@ -64,7 +63,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [("TLeaf", 2)] member_names = ["fMinimum", "fMaximum"] class_flags = {} - hooks = None class_code = None @@ -89,7 +87,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [("TLeaf", 2)] member_names = ["fMinimum", "fMaximum"] class_flags = {} - hooks = None class_code = None @@ -114,7 +111,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [("TLeaf", 2)] member_names = ["fMinimum", "fMaximum"] class_flags = {} - hooks = None class_code = None @@ -139,7 +135,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [("TLeaf", 2)] member_names = ["fMinimum", "fMaximum"] class_flags = {} - hooks = None class_code = None @@ -164,7 +159,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [("TLeaf", 2)] member_names = ["fMinimum", "fMaximum"] class_flags = {} - hooks = None class_code = None @@ -189,7 +183,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [("TLeaf", 2)] member_names = ["fMinimum", "fMaximum"] class_flags = {} - hooks = None class_code = None @@ -214,7 +207,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [("TLeaf", 2)] member_names = ["fMinimum", "fMaximum"] class_flags = {} - hooks = None class_code = None @@ -239,7 +231,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [("TLeaf", 2)] member_names = ["fMinimum", "fMaximum"] class_flags = {} - hooks = None class_code = None @@ -264,7 +255,6 @@ def read_members(self, chunk, cursor, context): base_names_versions = [("TLeaf", 2)] member_names = ["fID", "fType"] class_flags = {} - hooks = None class_code = None @@ -272,6 +262,44 @@ class Model_TLeafElement(uproot4.model.DispatchByVersion): known_versions = {1: Model_TLeafElement_v1} +class Model_TLeafD32_v1(uproot4.model.VersionedModel): + def read_members(self, chunk, cursor, context): + self._bases.append( + self.class_named("TLeaf", 2).read( + chunk, cursor, context, self._file, self._parent + ) + ) + self._members["fMinimum"] = cursor.double32(chunk, context) + self._members["fMaximum"] = cursor.double32(chunk, context) + + base_names_versions = [("TLeaf", 2)] + member_names = ["fMinimum", "fMaximum"] + class_flags = {} + + +class Model_TLeafD32(uproot4.model.DispatchByVersion): + known_versions = {1: Model_TLeafD32_v1} + + +class Model_TLeafF16_v1(uproot4.model.VersionedModel): + def read_members(self, chunk, cursor, context): + self._bases.append( + self.class_named("TLeaf", 2).read( + chunk, cursor, context, self._file, self._parent + ) + ) + self._members["fMinimum"] = cursor.float16(chunk, 12, context) + self._members["fMaximum"] = cursor.float16(chunk, 12, context) + + base_names_versions = [("TLeaf", 2)] + member_names = ["fMinimum", "fMaximum"] + class_flags = {} + + +class Model_TLeafF16(uproot4.model.DispatchByVersion): + known_versions = {1: Model_TLeafF16_v1} + + uproot4.classes["TLeaf"] = Model_TLeaf uproot4.classes["TLeafB"] = Model_TLeafB uproot4.classes["TLeafC"] = Model_TLeafC @@ -282,3 +310,5 @@ class Model_TLeafElement(uproot4.model.DispatchByVersion): uproot4.classes["TLeafO"] = Model_TLeafO uproot4.classes["TLeafS"] = Model_TLeafS uproot4.classes["TLeafElement"] = Model_TLeafElement +uproot4.classes["TLeafD32"] = Model_TLeafD32 +uproot4.classes["TLeafF16"] = Model_TLeafF16 diff --git a/uproot4/models/TTree.py b/uproot4/models/TTree.py index 9a937af39..38fafe4be 100644 --- a/uproot4/models/TTree.py +++ b/uproot4/models/TTree.py @@ -123,7 +123,6 @@ def member_names(self): ("TAttMarker", 2), ] class_flags = {"has_read_object_any": True} - hooks = None class_code = None @@ -240,7 +239,6 @@ def member_names(self): ("TAttMarker", 2), ] class_flags = {"has_read_object_any": True} - hooks = None class_code = None @@ -361,7 +359,6 @@ def member_values(self): ("TAttMarker", 2), ] class_flags = {"has_read_object_any": True} - hooks = None class_code = None @@ -500,7 +497,6 @@ def member_names(self): ("TAttMarker", 2), ] class_flags = {"has_read_object_any": True} - hooks = None class_code = None @@ -643,7 +639,6 @@ def member_names(self): ("TAttMarker", 2), ] class_flags = {"has_read_object_any": True} - hooks = None class_code = None diff --git a/uproot4/reading.py b/uproot4/reading.py index ca86807d3..da8c43636 100644 --- a/uproot4/reading.py +++ b/uproot4/reading.py @@ -1296,7 +1296,7 @@ def _ipython_key_completions_(self): def __getitem__(self, where): if "/" in where or ":" in where: items = where.split("/") - step = self + step = last = self for i, item in enumerate(items): if item != "": @@ -1304,6 +1304,7 @@ def __getitem__(self, where): if ":" in item and item not in step: index = item.index(":") head, tail = item[:index], item[index + 1 :] + last = step step = step[head] if isinstance(step, uproot4.behaviors.TBranch.HasBranches): return step["/".join([tail] + items[i + 1 :])] @@ -1312,9 +1313,11 @@ def __getitem__(self, where): where, repr(head) + " is not a TDirectory, TTree, or TBranch", + keys=[key.fName for key in last._keys], file_path=self._file.file_path, ) else: + last = step step = step[item] elif isinstance(step, uproot4.behaviors.TBranch.HasBranches): @@ -1324,6 +1327,7 @@ def __getitem__(self, where): raise uproot4.KeyInFileError( where, repr(item) + " is not a TDirectory, TTree, or TBranch", + keys=[key.fName for key in last._keys], file_path=self._file.file_path, ) @@ -1349,15 +1353,17 @@ def key(self, where): if "/" in where: items = where.split("/") - step = self + step = last = self for item in items[:-1]: if item != "": if isinstance(step, ReadOnlyDirectory): + last = step step = step[item] else: raise uproot4.KeyInFileError( where, repr(item) + " is not a TDirectory", + keys=[key.fName for key in last._keys], file_path=self._file.file_path, ) return step.key(items[-1]) @@ -1383,9 +1389,9 @@ def key(self, where): return last elif cycle is None: raise uproot4.KeyInFileError( - item, cycle="any", file_path=self._file.file_path + item, cycle="any", keys=self.keys(), file_path=self._file.file_path ) else: raise uproot4.KeyInFileError( - item, cycle=cycle, file_path=self._file.file_path + item, cycle=cycle, keys=self.keys(), file_path=self._file.file_path ) diff --git a/uproot4/source/cursor.py b/uproot4/source/cursor.py index bc98bd22c..c546d65b7 100644 --- a/uproot4/source/cursor.py +++ b/uproot4/source/cursor.py @@ -8,6 +8,7 @@ from __future__ import absolute_import import sys +import struct import numpy @@ -19,6 +20,8 @@ "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLM" "NOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ " ) +_raw_double32 = struct.Struct(">f") +_raw_float16 = struct.Struct(">BH") class Cursor(object): @@ -177,6 +180,34 @@ def field(self, chunk, format, context, move=True): self._index = stop return format.unpack(chunk.get(start, stop, self, context))[0] + def double32(self, chunk, context, move=True): + # https://github.com/root-project/root/blob/e87a6311278f859ca749b491af4e9a2caed39161/io/io/src/TBufferFile.cxx#L448-L464 + start = self._index + stop = start + _raw_double32.size + if move: + self._index = stop + return _raw_double32.unpack(chunk.get(start, stop, self, context))[0] + + def float16(self, chunk, num_bits, context, move=True): + # https://github.com/root-project/root/blob/e87a6311278f859ca749b491af4e9a2caed39161/io/io/src/TBufferFile.cxx#L432-L442 + # https://github.com/root-project/root/blob/e87a6311278f859ca749b491af4e9a2caed39161/io/io/src/TBufferFile.cxx#L482-L499 + + start = self._index + stop = start + _raw_float16.size + if move: + self._index = stop + + exponent, mantissa = _raw_float16.unpack(chunk.get(start, stop, self, context)) + out = numpy.array([exponent], numpy.int32) + out <<= 23 + out |= (mantissa & ((1 << (num_bits + 1)) - 1)) << (23 - num_bits) + out = out.view(numpy.float32) + + if (1 << (num_bits + 1) & mantissa) != 0: + out = -out + + return out.item() + def bytes(self, chunk, length, context, move=True, copy_if_memmap=False): """ Interpret data at this index of the Chunk as raw bytes with a diff --git a/uproot4/stl_containers.py b/uproot4/stl_containers.py index 4b506ae16..57d98bc8c 100644 --- a/uproot4/stl_containers.py +++ b/uproot4/stl_containers.py @@ -159,27 +159,30 @@ def __ne__(self, other): class AsString(AsSTLContainer): - def __init__(self, header, size_1to5_bytes=True, typename=None): + def __init__(self, header, length_bytes="1-5", typename=None): self.header = header + if length_bytes in ("1-5", "4"): + self._length_bytes = length_bytes + else: + raise ValueError("length_bytes must be '1-5' or '4'") self._typename = typename - self._size_1to5_bytes = size_1to5_bytes @property - def size_1to5_bytes(self): - return self._size_1to5_bytes + def length_bytes(self): + return self._length_bytes def __hash__(self): - return hash((AsString, self._header, self._size_1to5_bytes)) + return hash((AsString, self._header, self._length_bytes)) def __repr__(self): args = [repr(self._header)] - if self._size_1to5_bytes is not True: - args.append("size_1to5_bytes={0}".format(self._size_1to5_bytes)) + if self._length_bytes != "1-5": + args.append("length_bytes={0}".format(repr(self._length_bytes))) return "AsString({0})".format(", ".join(args)) @property def cache_key(self): - return "AsString({0},{1})".format(self._header, self._size_1to5_bytes) + return "AsString({0},{1})".format(self._header, repr(self._length_bytes)) @property def typename(self): @@ -195,11 +198,13 @@ def read(self, chunk, cursor, context, file, parent, header=True): chunk, cursor, context ) - if self._size_1to5_bytes: + if self._length_bytes == "1-5": out = cursor.string(chunk, context) - else: + elif self._length_bytes == "4": length = cursor.field(chunk, _stl_container_size, context) out = cursor.string_with_length(chunk, context, length) + else: + raise AssertionError(repr(self._length_bytes)) if self._header and header: uproot4.deserialization.numbytes_check( @@ -215,7 +220,11 @@ def read(self, chunk, cursor, context, file, parent, header=True): return out def __eq__(self, other): - return isinstance(other, AsString) and self.header == other.header + return ( + isinstance(other, AsString) + and self.header == other.header + and self.length_bytes == other.length_bytes + ) class AsVector(AsSTLContainer): diff --git a/uproot4/streamers.py b/uproot4/streamers.py index 67e770859..596be97d3 100644 --- a/uproot4/streamers.py +++ b/uproot4/streamers.py @@ -27,9 +27,12 @@ (re.compile(r"\bLong_t\b"), "long"), (re.compile(r"\bULong_t\b"), "unsigned long"), (re.compile(r"\bFloat_t\b"), "float"), - (re.compile(r"\bFloat16_t\b"), "float"), # 32-bit, written as 16, trunc mantissa + ( + re.compile(r"\bFloat16_t\b"), + "Float16_t", + ), # 32-bit, written as 16, trunc mantissa (re.compile(r"\bDouble_t\b"), "double"), - (re.compile(r"\bDouble32_t\b"), "double"), # 64-bit, written as 32 + (re.compile(r"\bDouble32_t\b"), "Double32_t"), # 64-bit, written as 32 (re.compile(r"\bLongDouble_t\b"), "long double"), (re.compile(r"\bText_t\b"), "char"), (re.compile(r"\bBool_t\b"), "bool"), @@ -216,12 +219,6 @@ def class_code(self): member_names = [] class_flags = {} - read_members.append( - " if 0 in self.hooks: self.hooks[0](self, chunk, cursor, context)" - ) - - num_hooks = 0 - last_length = len(read_members) for i in range(len(self._members["fElements"])): self._members["fElements"][i].class_code( self, @@ -236,14 +233,9 @@ def class_code(self): member_names, class_flags, ) - if len(read_members) != last_length: - num_hooks += 1 - read_members.append( - " if {0} in self.hooks: self.hooks[{0}](self, " - "chunk, cursor, context)".format(num_hooks) - ) - last_length = len(read_members) + if len(read_members) == 1: + read_members.append(" pass") read_members.append("") class_data = [] @@ -278,8 +270,6 @@ def class_code(self): ) ) - class_data.append(" hooks = {}") - return "\n".join( [ "class {0}(uproot4.model.VersionedModel):".format( @@ -380,7 +370,13 @@ def show(self, stream=sys.stdout): Args: stream: Object with a `write` method for writing the output. """ - stream.write(u" {0}: {1}\n".format(self.name, self.typename)) + stream.write( + u" {0}: {1} ({2})\n".format( + self.name, + self.typename, + uproot4.model.classname_decode(type(self).__name__)[0], + ) + ) class Model_TStreamerArtificial(Model_TStreamerElement): @@ -608,11 +604,24 @@ def class_code( member_names, class_flags, ): - if self.array_length == 0: + if self.typename == "Double32_t": + read_members.append( + " self._members[{0}] = cursor.double32(chunk, " + "context)".format(repr(self.name)) + ) + + elif self.typename == "Float16_t": + read_members.append( + " self._members[{0}] = cursor.float16(chunk, 12, " + "context)".format(repr(self.name)) + ) + + elif self.array_length == 0: if ( i == 0 or not isinstance(elements[i - 1], Model_TStreamerBasicType) or elements[i - 1].array_length != 0 + or elements[i - 1].typename in ("Double32_t", "Float16_t") ): fields.append([]) formats.append([])