Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions examples/bigger_uproot.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,26 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from servicex import RucioDatasetIdentifier
from servicex import ResultFormat
from servicex import ServiceXClient
from servicex import ServiceXSpec, General, Sample
from servicex.func_adl.func_adl_dataset import FuncADLQuery
from servicex.servicex_client import deliver

sx = ServiceXClient(backend="uc-af")
query = FuncADLQuery().Select(lambda e: {'el_pt': e['el_pt']})

dataset_id = RucioDatasetIdentifier("user.kchoi:user.kchoi.fcnc_tHq_ML.ttH.v8")

ds = sx.func_adl_dataset(dataset_id, title="bigger_uproot")

q = ds.Select(lambda e: {'el_pt': e['el_pt']})\
.set_result_format(ResultFormat.parquet).set_tree("nominal")\
.as_signed_urls()

print(q)
spec = ServiceXSpec(
General=General(
ServiceX="testing1",
Codegen="uproot",
OutputFormat="parquet",
Delivery="LocalCache"
),
Sample=[
Sample(
Name="bigger_uproot",
RucioDID="user.kchoi:user.kchoi.fcnc_tHq_ML.ttH.v8",
Tree="nominal",
Query=query
)
]
)
print(deliver(spec))
19 changes: 19 additions & 0 deletions examples/config_databinder_func_adl.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
General:
ServiceX: DEF_backend
Codegen: uproot
OutputFormat: root-file
Delivery: LocalCache

Sample:
- Name: ggH
XRootdFile: DEF_ggH_input
NFiles: 5
Query: DEF_ttH_nominal_query

Definition:
DEF_backend: "servicex-uc-af"
DEF_ttH_nominal_query: |
Select(lambda e: {'lep_pt': e['lep_pt']}).Where(lambda e: e['lep_pt'] > 1000)

DEF_ggH_input: "root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets\
/2020-01-22/4lep/MC/mc_345060.ggH125_ZZ4lep.4lep.root"
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
General:
ServiceX: uc-af
ServiceX: DEF_backend
Codegen: python
OutputFormat: root
OutputFormat: root-file
Delivery: LocalCache

Sample:
Expand All @@ -17,14 +17,19 @@ Sample:
Function: DEF_function1

- Name: ggH
XRootDFiles: root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets/2020-01-22/4lep/MC/mc_345060.ggH125_ZZ4lep.4lep.root
RootFile: DEF_ggH_input
Function: DEF_function2

Definition:
DEF_backend: "testing1"

DEF_ggH_input: "root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets\
/2020-01-22/4lep/MC/mc_345060.ggH125_ZZ4lep.4lep.root"

DEF_function1: |
def run_query(input_filenames=None):
import uproot

with uproot.open({input_filenames:"nominal"}) as o:
br = o.arrays("mu_pt")
return br
Expand Down
21 changes: 17 additions & 4 deletions examples/databinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,24 @@
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys

from servicex.databinder import DataBinder
import yaml

sx = DataBinder("config_databinder.yaml")
from servicex import ServiceXSpec
from servicex.servicex_client import deliver

out_dict = sx.deliver()
if len(sys.argv) != 2:
print("Usage: python databinder.py <config_file>")
sys.exit(1)

print(out_dict)
try:
with open(sys.argv[1]) as f:
data = yaml.safe_load(f)
except FileNotFoundError:
print(f"File {sys.argv[1]} not found")
sys.exit(1)


spec = ServiceXSpec.parse_obj(data)
print(deliver(spec))
32 changes: 19 additions & 13 deletions examples/python_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,10 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from servicex import FileListDataset
from servicex import FileListDataset, ServiceXSpec, General, Sample
from servicex import ServiceXClient
from servicex import ResultFormat

sx = ServiceXClient(backend="uc-af")
dataset_id = FileListDataset("root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets/2020-01-22/4lep/MC/mc_345060.ggH125_ZZ4lep.4lep.root") # NOQA 501

ds = sx.python_dataset(
dataset_id,
codegen="python",
title="Python",
result_format=ResultFormat.parquet
)
from servicex.servicex_client import deliver


def run_query(input_filenames=None):
Expand All @@ -49,5 +40,20 @@ def run_query(input_filenames=None):
return br


sx3 = ds.with_uproot_function(run_query).as_pandas()
print(sx3)
spec = ServiceXSpec(
General=General(
ServiceX="testing1",
Codegen="python",
OutputFormat="parquet",
Delivery="LocalCache"
),
Sample=[
Sample(
Name="Python Codegen",
RootFile="root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets/2020-01-22/4lep/MC/mc_345060.ggH125_ZZ4lep.4lep.root",
Function=run_query
)
]
)
print(deliver(spec))

48 changes: 35 additions & 13 deletions examples/single_file_uproot.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,42 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from servicex import FileListDataset
from servicex import ResultFormat
from servicex import ServiceXClient
import ast

sx = ServiceXClient(backend="uc-af")
dataset_id = FileListDataset("root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets/2020-01-22/4lep/MC/mc_345060.ggH125_ZZ4lep.4lep.root") # NOQA 501
import qastle

ds = sx.func_adl_dataset(dataset_id, codegen="uproot",
title="Root",
result_format=ResultFormat.parquet)
from servicex import ServiceXSpec, General, Sample
from servicex.func_adl.func_adl_dataset import FuncADLQuery
from servicex.servicex_client import deliver

sx3 = ds.Select(lambda e: {'lep_pt': e['lep_pt']}). \
Where(lambda e: e['lep_pt'] > 1000). \
set_tree("mini"). \
as_pandas()
query = FuncADLQuery().Select(lambda e: {'lep_pt': e['lep_pt']}). \
Where(lambda e: e['lep_pt'] > 1000)

print(sx3)
qstr = """
FuncADLDataset().Select(lambda e: {'lep_pt': e['lep_pt']}). \
Where(lambda e: e['lep_pt'] > 1000)
"""
query_ast = ast.parse(qstr)
qastle_query = qastle.python_ast_to_text_ast(qastle.insert_linq_nodes(query_ast))
print("From str", qastle_query)
q2 = FuncADLQuery()
q2.set_provided_qastle(qastle_query)
print(q2.generate_selection_string())
print("From python", query.generate_selection_string())
spec = ServiceXSpec(
General=General(
ServiceX="testing1",
Codegen="uproot",
OutputFormat="parquet",
Delivery="LocalCache"
),
Sample=[
Sample(
Name="mc_345060.ggH125_ZZ4lep.4lep",
RootFile="root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets/2020-01-22/4lep/MC/mc_345060.ggH125_ZZ4lep.4lep.root", # NOQA E501
Query=query
)
]
)

print(deliver(spec))
Loading