Skip to content

Commit 84fc871

Browse files
authored
Add cli command to list datasets (#517)
* Add command line to list datasets * Add option to report only datasets for a specific DID Finder * Increase unit test coverage - Add a file for testing servicex_client. It's only at 92%, but we can add more tests - Test the case of auth error in get_datasets
1 parent 1c3d183 commit 84fc871

File tree

8 files changed

+227
-1
lines changed

8 files changed

+227
-1
lines changed

docs/command_line.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,14 @@ clear
7676
Clear all of the transforms from the cache. Add ``-y`` to force the
7777
operation without confirming with the console.
7878

79+
datasets
80+
~~~~~~~~
81+
82+
These commands interact with datasets cached on the server
83+
84+
list
85+
^^^^
86+
List all of the datasets cached on the server. Accepts a command line argument
87+
of ``--did-finder`` to filter the list of datasets by a specific DID finder such
88+
as ``rucio`` or ``user``.
89+

servicex/app/datasets.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Copyright (c) 2024, IRIS-HEP
2+
# All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# * Redistributions of source code must retain the above copyright notice, this
8+
# list of conditions and the following disclaimer.
9+
#
10+
# * Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# * Neither the name of the copyright holder nor the names of its
15+
# contributors may be used to endorse or promote products derived from
16+
# this software without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
import asyncio
29+
from typing import Optional
30+
31+
import rich
32+
33+
from servicex.app.cli_options import url_cli_option, backend_cli_option
34+
35+
import typer
36+
37+
from servicex.servicex_client import ServiceXClient
38+
from rich.table import Table
39+
40+
datasets_app = typer.Typer(name="datasets", no_args_is_help=True)
41+
42+
43+
@datasets_app.command(no_args_is_help=True)
44+
def list(
45+
url: Optional[str] = url_cli_option,
46+
backend: Optional[str] = backend_cli_option,
47+
did_finder: Optional[str] = typer.Option(
48+
None,
49+
help="Filter datasets by DID finder. Some useful values are 'rucio' or 'user'",
50+
show_default=False,
51+
),
52+
):
53+
"""
54+
List the datasets.
55+
"""
56+
sx = ServiceXClient(url=url, backend=backend)
57+
table = Table(title="ServiceX Datasets")
58+
table.add_column("ID")
59+
table.add_column("Name")
60+
table.add_column("Files")
61+
table.add_column("Size")
62+
table.add_column("Status")
63+
table.add_column("Created")
64+
datasets = asyncio.run(sx.get_datasets(did_finder=did_finder))
65+
for d in datasets:
66+
# Format the CachedDataset object into a table row
67+
# The last_updated field is what we should be displaying, but that is
68+
# currently set to 1970-00-00 in the server and is never updated.
69+
# Stick with the last_used field until
70+
# https://github.com/ssl-hep/ServiceX/issues/906 is resolved
71+
table.add_row(
72+
str(d.id),
73+
d.name if d.did_finder != "user" else "File list",
74+
"%d" % d.n_files,
75+
"{:,}MB".format(round(d.size / 1e6)),
76+
d.lookup_status,
77+
d.last_used.strftime('%Y-%m-%dT%H:%M:%S'),
78+
)
79+
rich.print(table)

servicex/app/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import typer
3232

3333
from servicex._version import __version__
34+
from servicex.app.datasets import datasets_app
3435
from servicex.app.transforms import transforms_app
3536
from servicex.app.cache import cache_app
3637
from servicex.app.codegen import codegen_app
@@ -40,6 +41,7 @@
4041
app.add_typer(transforms_app)
4142
app.add_typer(cache_app)
4243
app.add_typer(codegen_app)
44+
app.add_typer(datasets_app)
4345

4446

4547
def show_version(show: bool):

servicex/models.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,18 @@ class TransformedResults(BaseModel):
170170
files: int
171171
result_format: ResultFormat
172172
log_url: Optional[str] = None
173+
174+
175+
class CachedDataset(BaseModel):
176+
"""
177+
Model for a cached dataset held by ServiceX server
178+
"""
179+
id: int
180+
name: str
181+
did_finder: str
182+
n_files: int
183+
size: int
184+
events: int
185+
last_used: datetime
186+
last_updated: datetime
187+
lookup_status: str

servicex/servicex_adapter.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from google.auth import jwt
3535
from tenacity import AsyncRetrying, stop_after_attempt, wait_fixed, retry_if_not_exception_type
3636

37-
from servicex.models import TransformRequest, TransformStatus
37+
from servicex.models import TransformRequest, TransformStatus, CachedDataset
3838

3939

4040
class AuthorizationError(BaseException):
@@ -109,6 +109,22 @@ def get_code_generators(self):
109109
f"Not authorized to access serviceX at {self.url}")
110110
return r.json()
111111

112+
async def get_datasets(self, did_finder=None) -> List[CachedDataset]:
113+
headers = await self._get_authorization()
114+
115+
with httpx.Client() as client:
116+
params = {"did-finder": did_finder} if did_finder else {}
117+
r = client.get(headers=headers,
118+
url=f"{self.url}/servicex/datasets",
119+
params=params)
120+
121+
if r.status_code == 403:
122+
raise AuthorizationError(
123+
f"Not authorized to access serviceX at {self.url}")
124+
125+
datasets = [CachedDataset(**d) for d in r.json()['datasets']]
126+
return datasets
127+
112128
async def submit_transform(self, transform_request: TransformRequest):
113129
headers = await self._get_authorization()
114130
retry_options = ExponentialRetry(attempts=3, start_timeout=30)

servicex/servicex_client.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,13 @@ async def get_transform_status_async(self, transform_id) -> TransformStatus:
280280

281281
get_transform_status = make_sync(get_transform_status_async)
282282

283+
def get_datasets(self, did_finder=None):
284+
r"""
285+
Retrieve all datasets you have run on the server
286+
:return: List of Query objects
287+
"""
288+
return self.servicex.get_datasets(did_finder)
289+
283290
def get_code_generators(self, backend=None):
284291
r"""
285292
Retrieve the code generators deployed with the serviceX instance

tests/test_servicex_adapter.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,39 @@ def test_get_codegens_error(get, servicex):
129129
assert "Not authorized to access serviceX at" in str(err.value)
130130

131131

132+
@pytest.mark.asyncio
133+
@patch('servicex.servicex_adapter.httpx.Client.get')
134+
async def test_get_datasets(get, servicex):
135+
get.return_value = httpx.Response(200, json={
136+
"datasets": [
137+
{
138+
"id": "123",
139+
"name": "dataset1",
140+
"events": 100,
141+
"size": 1000,
142+
"n_files": 1,
143+
"last_used": "2022-01-01T00:00:00.000000Z",
144+
"last_updated": "2022-01-01T00:00:00.000000Z",
145+
"lookup_status": "looking",
146+
"did_finder": "rucio"
147+
}
148+
149+
]
150+
})
151+
c = await servicex.get_datasets()
152+
assert len(c) == 1
153+
assert c[0].id == 123
154+
155+
156+
@pytest.mark.asyncio
157+
@patch('servicex.servicex_adapter.httpx.Client.get')
158+
async def test_get_datasets_auth_error(get, servicex):
159+
get.return_value = httpx.Response(403)
160+
with pytest.raises(AuthorizationError) as err:
161+
await servicex.get_datasets()
162+
assert "Not authorized to access serviceX at" in str(err.value)
163+
164+
132165
@pytest.mark.asyncio
133166
@patch('servicex.servicex_adapter.RetryClient.post')
134167
async def test_submit(post, servicex):

tests/test_servicex_client.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Copyright (c) 2024, IRIS-HEP
2+
# All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# * Redistributions of source code must retain the above copyright notice, this
8+
# list of conditions and the following disclaimer.
9+
#
10+
# * Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# * Neither the name of the copyright holder nor the names of its
15+
# contributors may be used to endorse or promote products derived from
16+
# this software without specific prior written permission.
17+
#
18+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
from unittest.mock import MagicMock
29+
30+
from pytest_asyncio import fixture
31+
32+
from servicex.query_cache import QueryCache
33+
from servicex.servicex_adapter import ServiceXAdapter
34+
from servicex.servicex_client import ServiceXClient
35+
36+
37+
@fixture
38+
def servicex_adaptor(mocker):
39+
adapter_mock = mocker.patch('servicex.servicex_client.ServiceXAdapter')
40+
mock_adapter = MagicMock(spec=ServiceXAdapter)
41+
42+
adapter_mock.return_value = mock_adapter
43+
return mock_adapter
44+
45+
46+
@fixture
47+
def mock_cache(mocker):
48+
cache_mock = mocker.patch('servicex.servicex_client.QueryCache')
49+
mock_cache = MagicMock(spec=QueryCache)
50+
mock_cache.get_codegen_by_backend.return_value = {
51+
"codegens": {
52+
"ROOT": "my_root_generator",
53+
"UPROOT": "my_uproot_generator"
54+
}
55+
}
56+
cache_mock.return_value = mock_cache
57+
return cache_mock
58+
59+
60+
def test_get_datasets(mock_cache, servicex_adaptor):
61+
sx = ServiceXClient(config_path="tests/example_config.yaml")
62+
sx.get_datasets()
63+
servicex_adaptor.get_datasets.assert_called_once()

0 commit comments

Comments
 (0)