Skip to content

Commit 42fd67a

Browse files
ariostaslgrayikrommyd
authored
feat: add support for RNTuples (#1311)
* Started implementing RNTuple support * Adapt to uproot changed * Fixed form construction * Added simple RNTuple test * Fix for PHYSLITE RNTuples * Ignore subfields and RNTupleImporter-made collections * Added more RNTuple samples * add tests * fix xfailing * baseschema test with treemaker * better name in the test --------- Co-authored-by: Lindsey Gray <[email protected]> Co-authored-by: Iason Krommydas <[email protected]>
1 parent 5eb8fd4 commit 42fd67a

File tree

8 files changed

+180
-14
lines changed

8 files changed

+180
-14
lines changed

src/coffea/nanoevents/mapping/uproot.py

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,12 @@ def _lazify_form(form, prefix, docstr=None, typestr=None):
7373
form["parameters"] = parameters
7474
elif form["class"] == "RecordArray":
7575
newfields, newcontents = [], []
76-
for field, value in zip(form["fields"], form["contents"]):
76+
fields = (
77+
form["fields"]
78+
if form["fields"] is not None
79+
else [str(i) for i in range(len(form["contents"]))]
80+
)
81+
for field, value in zip(fields, form["contents"]):
7782
if "," in field or "!" in field:
7883
# Could also skip here
7984
raise CannotBeNanoEvents(
@@ -157,7 +162,10 @@ def _extract_base_form(cls, tree, iteritems_options={}):
157162
continue
158163
if not _is_interpretable(branch):
159164
continue
160-
form = branch.interpretation.awkward_form(None)
165+
if isinstance(branch, uproot.behaviors.RNTuple.HasFields):
166+
form = branch.to_akform()[0].contents[0]
167+
else:
168+
form = branch.interpretation.awkward_form(None)
161169
# until awkward-forth is available, this fixer is necessary
162170
if cls._fix_awkward_form_of_iter:
163171
form = uproot._util.recursively_fix_awkward_form_of_iter(
@@ -168,7 +176,14 @@ def _extract_base_form(cls, tree, iteritems_options={}):
168176
) # normalizes form (expand NumpyArray classes)
169177
try:
170178
form = _lazify_form(
171-
form, f"{key},!load", docstr=branch.title, typestr=branch.typename
179+
form,
180+
f"{key},!load",
181+
docstr=(
182+
branch.description
183+
if isinstance(branch, uproot.behaviors.RNTuple.HasFields)
184+
else branch.title
185+
),
186+
typestr=branch.typename,
172187
)
173188
except CannotBeNanoEvents as ex:
174189
warnings.warn(
@@ -181,7 +196,13 @@ def _extract_base_form(cls, tree, iteritems_options={}):
181196
"class": "RecordArray",
182197
"contents": [item for item in branch_forms.values()],
183198
"fields": [key for key in branch_forms.keys()],
184-
"parameters": {"__doc__": tree.title},
199+
"parameters": {
200+
"__doc__": (
201+
tree.description
202+
if isinstance(tree, uproot.behaviors.RNTuple.HasFields)
203+
else tree.title
204+
)
205+
},
185206
"form_key": None,
186207
}
187208

@@ -219,16 +240,24 @@ def extract_column(
219240
):
220241
the_array = self.preloaded_arrays[columnhandle.name][start:stop]
221242
else:
222-
interp = columnhandle.interpretation
223-
interp._forth = use_ak_forth
224-
225-
the_array = columnhandle.array(
226-
interp,
227-
entry_start=start,
228-
entry_stop=stop,
229-
decompression_executor=self.decompression_executor,
230-
interpretation_executor=self.interpretation_executor,
231-
)
243+
if isinstance(columnhandle, uproot.behaviors.RNTuple.HasFields):
244+
the_array = columnhandle.array(
245+
entry_start=start,
246+
entry_stop=stop,
247+
decompression_executor=self.decompression_executor,
248+
interpretation_executor=self.interpretation_executor,
249+
)
250+
else:
251+
interp = columnhandle.interpretation
252+
interp._forth = use_ak_forth
253+
254+
the_array = columnhandle.array(
255+
interp,
256+
entry_start=start,
257+
entry_stop=stop,
258+
decompression_executor=self.decompression_executor,
259+
interpretation_executor=self.interpretation_executor,
260+
)
232261
if isinstance(the_array.layout, awkward.contents.ListOffsetArray):
233262
the_array = awkward.Array(the_array.layout.to_ListOffsetArray64(True))
234263

src/coffea/util.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,17 @@ def decompress_form(form_compressedb64):
262262

263263

264264
def _is_interpretable(branch, emit_warning=True):
265+
if isinstance(branch, uproot.behaviors.RNTuple.HasFields):
266+
# These are collections made by the RNTuple Importer
267+
# Once "real" (i.e. non-converted) RNTuples start to be written,
268+
# these should not be here and this check can be removed
269+
if branch.path.startswith("_collection"):
270+
return False
271+
# Subfields should be accessed via the parent branch since
272+
# the way forms are set up for subfields
273+
if "." in branch.path:
274+
return False
275+
return True
265276
if isinstance(
266277
branch.interpretation, uproot.interpretation.identify.uproot.AsGrouped
267278
):
73.2 KB
Binary file not shown.

tests/samples/nano_dy_rntuple.root

84.1 KB
Binary file not shown.
36.1 KB
Binary file not shown.

tests/samples/pfnano_rntuple.root

317 KB
Binary file not shown.
345 KB
Binary file not shown.

tests/test_nanoevents_rntuple.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import awkward as ak
2+
import pytest
3+
4+
from coffea.nanoevents import (
5+
BaseSchema,
6+
NanoAODSchema,
7+
NanoEventsFactory,
8+
PFNanoAODSchema,
9+
TreeMakerSchema,
10+
)
11+
12+
13+
@pytest.mark.parametrize("mode", ["eager", "virtual"])
14+
@pytest.mark.parametrize(
15+
"file", ["nano_dy", "nano_dimuon", "nano_tree", "pfnano", "treemaker"]
16+
)
17+
def test_base_schema(tests_directory, file, mode):
18+
key = "PreSelection" if file == "treemaker" else "Events"
19+
file = f"{tests_directory}/samples/{file}"
20+
ttree = NanoEventsFactory.from_root(
21+
{f"{file}.root": key}, schemaclass=BaseSchema, mode=mode
22+
).events()
23+
rntuple = NanoEventsFactory.from_root(
24+
{f"{file}_rntuple.root": key}, schemaclass=BaseSchema, mode=mode
25+
).events()
26+
if mode == "virtual":
27+
assert not ttree.layout.is_any_materialized
28+
assert not rntuple.layout.is_any_materialized
29+
elif mode == "eager":
30+
assert ttree.layout.is_all_materialized
31+
assert rntuple.layout.is_all_materialized
32+
if key == "PreSelection":
33+
for field in rntuple.fields:
34+
subfields = rntuple[field].fields
35+
if subfields != []:
36+
assert len(subfields) == 1
37+
subfield = subfields[0]
38+
subsubfields = rntuple[field][subfield].fields
39+
for subsubfield in subsubfields:
40+
left = rntuple[field][subfield][subsubfield]
41+
right = ttree[field][f"{field}.{subfield}.{subsubfield}"]
42+
assert ak.array_equal(
43+
left,
44+
right,
45+
dtype_exact=False,
46+
check_parameters=False,
47+
equal_nan=True,
48+
)
49+
else:
50+
left = rntuple[field]
51+
right = ttree[field]
52+
assert ak.array_equal(
53+
left,
54+
right,
55+
dtype_exact=False,
56+
check_parameters=False,
57+
equal_nan=True,
58+
)
59+
else:
60+
assert ak.array_equal(
61+
rntuple, ttree, dtype_exact=False, check_parameters=False, equal_nan=True
62+
)
63+
64+
65+
@pytest.mark.parametrize("mode", ["eager", "virtual"])
66+
@pytest.mark.parametrize("file", ["nano_dy", "nano_dimuon", "nano_tree"])
67+
def test_nanoaod_schema(tests_directory, file, mode):
68+
file = f"{tests_directory}/samples/{file}"
69+
ttree = NanoEventsFactory.from_root(
70+
{f"{file}.root": "Events"}, schemaclass=NanoAODSchema, mode=mode
71+
).events()
72+
rntuple = NanoEventsFactory.from_root(
73+
{f"{file}_rntuple.root": "Events"}, schemaclass=NanoAODSchema, mode=mode
74+
).events()
75+
if mode == "virtual":
76+
assert not ttree.layout.is_any_materialized
77+
assert not rntuple.layout.is_any_materialized
78+
elif mode == "eager":
79+
assert ttree.layout.is_all_materialized
80+
assert rntuple.layout.is_all_materialized
81+
assert ak.array_equal(
82+
rntuple, ttree, dtype_exact=False, check_parameters=False, equal_nan=True
83+
)
84+
85+
86+
@pytest.mark.parametrize("mode", ["eager", "virtual"])
87+
def test_pfnano_schema(tests_directory, mode):
88+
file = f"{tests_directory}/samples/pfnano"
89+
ttree = NanoEventsFactory.from_root(
90+
{f"{file}.root": "Events"}, schemaclass=PFNanoAODSchema, mode=mode
91+
).events()
92+
rntuple = NanoEventsFactory.from_root(
93+
{f"{file}_rntuple.root": "Events"}, schemaclass=PFNanoAODSchema, mode=mode
94+
).events()
95+
if mode == "virtual":
96+
assert not ttree.layout.is_any_materialized
97+
assert not rntuple.layout.is_any_materialized
98+
elif mode == "eager":
99+
assert ttree.layout.is_all_materialized
100+
assert rntuple.layout.is_all_materialized
101+
assert ak.array_equal(
102+
rntuple, ttree, dtype_exact=False, check_parameters=False, equal_nan=True
103+
)
104+
105+
106+
@pytest.mark.xfail(
107+
reason="RNTuple version of the treemaker sample has different field structure"
108+
)
109+
@pytest.mark.parametrize("mode", ["eager", "virtual"])
110+
def test_treemaker_schema(tests_directory, mode):
111+
file = f"{tests_directory}/samples/treemaker"
112+
ttree = NanoEventsFactory.from_root(
113+
{f"{file}.root": "PreSelection"}, schemaclass=TreeMakerSchema, mode=mode
114+
).events()
115+
rntuple = NanoEventsFactory.from_root(
116+
{f"{file}_rntuple.root": "PreSelection"}, schemaclass=TreeMakerSchema, mode=mode
117+
).events()
118+
if mode == "virtual":
119+
assert not ttree.layout.is_any_materialized
120+
assert not rntuple.layout.is_any_materialized
121+
elif mode == "eager":
122+
assert ttree.layout.is_all_materialized
123+
assert rntuple.layout.is_all_materialized
124+
assert ak.array_equal(
125+
rntuple, ttree, dtype_exact=False, check_parameters=False, equal_nan=True
126+
)

0 commit comments

Comments
 (0)