Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 43 additions & 14 deletions src/coffea/nanoevents/mapping/uproot.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,12 @@ def _lazify_form(form, prefix, docstr=None, typestr=None):
form["parameters"] = parameters
elif form["class"] == "RecordArray":
newfields, newcontents = [], []
for field, value in zip(form["fields"], form["contents"]):
fields = (
form["fields"]
if form["fields"] is not None
else [str(i) for i in range(len(form["contents"]))]
)
for field, value in zip(fields, form["contents"]):
if "," in field or "!" in field:
# Could also skip here
raise CannotBeNanoEvents(
Expand Down Expand Up @@ -157,7 +162,10 @@ def _extract_base_form(cls, tree, iteritems_options={}):
continue
if not _is_interpretable(branch):
continue
form = branch.interpretation.awkward_form(None)
if isinstance(branch, uproot.behaviors.RNTuple.HasFields):
form = branch.to_akform()[0].contents[0]
else:
form = branch.interpretation.awkward_form(None)
# until awkward-forth is available, this fixer is necessary
if cls._fix_awkward_form_of_iter:
form = uproot._util.recursively_fix_awkward_form_of_iter(
Expand All @@ -168,7 +176,14 @@ def _extract_base_form(cls, tree, iteritems_options={}):
) # normalizes form (expand NumpyArray classes)
try:
form = _lazify_form(
form, f"{key},!load", docstr=branch.title, typestr=branch.typename
form,
f"{key},!load",
docstr=(
branch.description
if isinstance(branch, uproot.behaviors.RNTuple.HasFields)
else branch.title
),
typestr=branch.typename,
)
except CannotBeNanoEvents as ex:
warnings.warn(
Expand All @@ -181,7 +196,13 @@ def _extract_base_form(cls, tree, iteritems_options={}):
"class": "RecordArray",
"contents": [item for item in branch_forms.values()],
"fields": [key for key in branch_forms.keys()],
"parameters": {"__doc__": tree.title},
"parameters": {
"__doc__": (
tree.description
if isinstance(tree, uproot.behaviors.RNTuple.HasFields)
else tree.title
)
},
"form_key": None,
}

Expand Down Expand Up @@ -219,16 +240,24 @@ def extract_column(
):
the_array = self.preloaded_arrays[columnhandle.name][start:stop]
else:
interp = columnhandle.interpretation
interp._forth = use_ak_forth

the_array = columnhandle.array(
interp,
entry_start=start,
entry_stop=stop,
decompression_executor=self.decompression_executor,
interpretation_executor=self.interpretation_executor,
)
if isinstance(columnhandle, uproot.behaviors.RNTuple.HasFields):
the_array = columnhandle.array(
entry_start=start,
entry_stop=stop,
decompression_executor=self.decompression_executor,
interpretation_executor=self.interpretation_executor,
)
else:
interp = columnhandle.interpretation
interp._forth = use_ak_forth

the_array = columnhandle.array(
interp,
entry_start=start,
entry_stop=stop,
decompression_executor=self.decompression_executor,
interpretation_executor=self.interpretation_executor,
)
if isinstance(the_array.layout, awkward.contents.ListOffsetArray):
the_array = awkward.Array(the_array.layout.to_ListOffsetArray64(True))

Expand Down
11 changes: 11 additions & 0 deletions src/coffea/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,17 @@ def decompress_form(form_compressedb64):


def _is_interpretable(branch, emit_warning=True):
if isinstance(branch, uproot.behaviors.RNTuple.HasFields):
# These are collections made by the RNTuple Importer
# Once "real" (i.e. non-converted) RNTuples start to be written,
# these should not be here and this check can be removed
if branch.path.startswith("_collection"):
return False
# Subfields should be accessed via the parent branch since
# the way forms are set up for subfields
if "." in branch.path:
return False
return True
if isinstance(
branch.interpretation, uproot.interpretation.identify.uproot.AsGrouped
):
Expand Down
Binary file added tests/samples/nano_dimuon_rntuple.root
Binary file not shown.
Binary file added tests/samples/nano_dy_rntuple.root
Binary file not shown.
Binary file added tests/samples/nano_tree_rntuple.root
Binary file not shown.
Binary file added tests/samples/pfnano_rntuple.root
Binary file not shown.
Binary file added tests/samples/treemaker_rntuple.root
Binary file not shown.
126 changes: 126 additions & 0 deletions tests/test_nanoevents_rntuple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import awkward as ak
import pytest

from coffea.nanoevents import (
BaseSchema,
NanoAODSchema,
NanoEventsFactory,
PFNanoAODSchema,
TreeMakerSchema,
)


@pytest.mark.parametrize("mode", ["eager", "virtual"])
@pytest.mark.parametrize(
"file", ["nano_dy", "nano_dimuon", "nano_tree", "pfnano", "treemaker"]
)
def test_base_schema(tests_directory, file, mode):
key = "PreSelection" if file == "treemaker" else "Events"
file = f"{tests_directory}/samples/{file}"
ttree = NanoEventsFactory.from_root(
{f"{file}.root": key}, schemaclass=BaseSchema, mode=mode
).events()
rntuple = NanoEventsFactory.from_root(
{f"{file}_rntuple.root": key}, schemaclass=BaseSchema, mode=mode
).events()
if mode == "virtual":
assert not ttree.layout.is_any_materialized
assert not rntuple.layout.is_any_materialized
elif mode == "eager":
assert ttree.layout.is_all_materialized
assert rntuple.layout.is_all_materialized
if key == "PreSelection":
for field in rntuple.fields:
subfields = rntuple[field].fields
if subfields != []:
assert len(subfields) == 1
subfield = subfields[0]
subsubfields = rntuple[field][subfield].fields
for subsubfield in subsubfields:
left = rntuple[field][subfield][subsubfield]
right = ttree[field][f"{field}.{subfield}.{subsubfield}"]
assert ak.array_equal(
left,
right,
dtype_exact=False,
check_parameters=False,
equal_nan=True,
)
else:
left = rntuple[field]
right = ttree[field]
assert ak.array_equal(
left,
right,
dtype_exact=False,
check_parameters=False,
equal_nan=True,
)
else:
assert ak.array_equal(
rntuple, ttree, dtype_exact=False, check_parameters=False, equal_nan=True
)


@pytest.mark.parametrize("mode", ["eager", "virtual"])
@pytest.mark.parametrize("file", ["nano_dy", "nano_dimuon", "nano_tree"])
def test_nanoaod_schema(tests_directory, file, mode):
file = f"{tests_directory}/samples/{file}"
ttree = NanoEventsFactory.from_root(
{f"{file}.root": "Events"}, schemaclass=NanoAODSchema, mode=mode
).events()
rntuple = NanoEventsFactory.from_root(
{f"{file}_rntuple.root": "Events"}, schemaclass=NanoAODSchema, mode=mode
).events()
if mode == "virtual":
assert not ttree.layout.is_any_materialized
assert not rntuple.layout.is_any_materialized
elif mode == "eager":
assert ttree.layout.is_all_materialized
assert rntuple.layout.is_all_materialized
assert ak.array_equal(
rntuple, ttree, dtype_exact=False, check_parameters=False, equal_nan=True
)


@pytest.mark.parametrize("mode", ["eager", "virtual"])
def test_pfnano_schema(tests_directory, mode):
file = f"{tests_directory}/samples/pfnano"
ttree = NanoEventsFactory.from_root(
{f"{file}.root": "Events"}, schemaclass=PFNanoAODSchema, mode=mode
).events()
rntuple = NanoEventsFactory.from_root(
{f"{file}_rntuple.root": "Events"}, schemaclass=PFNanoAODSchema, mode=mode
).events()
if mode == "virtual":
assert not ttree.layout.is_any_materialized
assert not rntuple.layout.is_any_materialized
elif mode == "eager":
assert ttree.layout.is_all_materialized
assert rntuple.layout.is_all_materialized
assert ak.array_equal(
rntuple, ttree, dtype_exact=False, check_parameters=False, equal_nan=True
)


@pytest.mark.xfail(
reason="RNTuple version of the treemaker sample has different field structure"
)
@pytest.mark.parametrize("mode", ["eager", "virtual"])
def test_treemaker_schema(tests_directory, mode):
file = f"{tests_directory}/samples/treemaker"
ttree = NanoEventsFactory.from_root(
{f"{file}.root": "PreSelection"}, schemaclass=TreeMakerSchema, mode=mode
).events()
rntuple = NanoEventsFactory.from_root(
{f"{file}_rntuple.root": "PreSelection"}, schemaclass=TreeMakerSchema, mode=mode
).events()
if mode == "virtual":
assert not ttree.layout.is_any_materialized
assert not rntuple.layout.is_any_materialized
elif mode == "eager":
assert ttree.layout.is_all_materialized
assert rntuple.layout.is_all_materialized
assert ak.array_equal(
rntuple, ttree, dtype_exact=False, check_parameters=False, equal_nan=True
)
Loading