Skip to content

Commit 0863fbc

Browse files
authored
Merge pull request #27 from ssl-hep/feat/ds_type_resolver
Added ds_type_resolver function
2 parents fd3de34 + 2b21ce7 commit 0863fbc

File tree

3 files changed

+129
-1
lines changed

3 files changed

+129
-1
lines changed

servicex_analysis_utils/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2828
from .materialization import to_awk
2929
from .file_peeking import get_structure
30+
from .dataset_resolver import ds_type_resolver
3031

3132
__version__ = "1.1.1"
32-
__all__ = ["to_awk", "get_structure"]
33+
__all__ = ["to_awk", "get_structure", "ds_type_resolver"]
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Copyright (c) 2025, IRIS-HEP
2+
# All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# * Redistributions of source code must retain the above copyright notice, this
8+
# list of conditions and the following disclaimer.
9+
#
10+
# * Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# * Neither the name of the copyright holder nor the names of its
15+
# contributors may be used to endorse or promote products derived from
16+
# this software without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
import re
29+
from typing import Union
30+
from urllib.parse import urlparse
31+
32+
from servicex import dataset
33+
34+
35+
def ds_type_resolver(
36+
ds_name: Union[str, list[str]],
37+
) -> Union[dataset.FileList, dataset.Rucio, dataset.XRootD, dataset.CERNOpenData]:
38+
"""Determine the type of dataset based on the input
39+
string and then return the ServiceX dataset object.
40+
41+
Args:
42+
ds_name (str): Name of the dataset to fetch.
43+
44+
Returns:
45+
dataset: The dataset object
46+
"""
47+
48+
if isinstance(ds_name, list):
49+
return dataset.FileList(ds_name)
50+
51+
elif re.match(r"^https?://", ds_name):
52+
url = ds_name
53+
54+
parsed_url = urlparse(url)
55+
if "cernbox.cern.ch" in parsed_url.netloc and parsed_url.path.startswith(
56+
"/files/spaces"
57+
):
58+
url = f"root://eospublic.cern.ch{parsed_url.path[13:]}"
59+
60+
return dataset.FileList([url])
61+
62+
elif re.match(r"^rucio://", ds_name):
63+
did = ds_name[8:]
64+
return dataset.Rucio(did)
65+
66+
elif ds_name.count(":") == 1 and "/" not in ds_name:
67+
return dataset.Rucio(ds_name)
68+
69+
elif ds_name.isdigit():
70+
return dataset.CERNOpenData(int(ds_name))
71+
72+
elif ds_name.startswith("root://") and ds_name.endswith("*"):
73+
return dataset.XRootD(ds_name)
74+
75+
elif re.match(r"^root://", ds_name):
76+
return dataset.FileList(ds_name)
77+
78+
raise RuntimeError(
79+
f"Unable to find the type of input provided for dataset: {ds_name}"
80+
)

tests/test_dataset_resolver.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Copyright (c) 2025, IRIS-HEP
2+
# All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# * Redistributions of source code must retain the above copyright notice, this
8+
# list of conditions and the following disclaimer.
9+
#
10+
# * Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# * Neither the name of the copyright holder nor the names of its
15+
# contributors may be used to endorse or promote products derived from
16+
# this software without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
import pytest
29+
from servicex_analysis_utils import ds_type_resolver
30+
from servicex import dataset
31+
32+
33+
@pytest.mark.parametrize(
34+
"input_ds, expected_type",
35+
[
36+
("https://test.com", dataset.FileList),
37+
("test:data", dataset.Rucio),
38+
("rucio://test:test", dataset.Rucio),
39+
("123", dataset.CERNOpenData),
40+
("root://eosatlas.cern.ch//eos/", dataset.FileList),
41+
("root://eosatlas.cern.ch//eos/*", dataset.XRootD),
42+
(["root://eosatlas.cern.ch//eos/", "https://test.com"], dataset.FileList),
43+
],
44+
)
45+
def test_find_dataset(input_ds, expected_type):
46+
dataset = ds_type_resolver(input_ds)
47+
assert isinstance(dataset, expected_type)

0 commit comments

Comments
 (0)