From 030e0ed6a9d51828a9fa62a5c5fdca5bfd09da39 Mon Sep 17 00:00:00 2001 From: Gordon Watts Date: Tue, 5 Aug 2025 18:15:15 -0400 Subject: [PATCH] feat: show human readable cache sizes --- servicex/app/cache.py | 38 +++++++++- tests/app/test_cache.py | 162 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+), 4 deletions(-) create mode 100644 tests/app/test_cache.py diff --git a/servicex/app/cache.py b/servicex/app/cache.py index 76be0811..ec8af4e6 100644 --- a/servicex/app/cache.py +++ b/servicex/app/cache.py @@ -29,11 +29,29 @@ import rich import typer +from pathlib import Path from rich.prompt import Confirm +from typing import List from servicex.app import pipeable_table +from servicex.models import TransformedResults from servicex.servicex_client import ServiceXClient + +def _format_size(size_bytes: int) -> str: + """Return human readable string for size in bytes.""" + if size_bytes >= 1024**4: + size = size_bytes / (1024**4) + unit = "TB" + elif size_bytes >= 1024**3: + size = size_bytes / (1024**3) + unit = "GB" + else: + size = size_bytes / (1024**2) + unit = "MB" + return f"{size:,.2f} {unit}" + + cache_app = typer.Typer(name="cache", no_args_is_help=True) force_opt = typer.Option(False, "-y", help="Force, don't ask for permission") transform_id_arg = typer.Argument(help="Transform ID") @@ -48,7 +66,9 @@ def cache(): @cache_app.command() -def list(): +def list( + show_size: bool = typer.Option(False, "--size", help="Include size of cached files") +) -> None: """ List the cached queries """ @@ -61,16 +81,26 @@ def list(): table.add_column("Run Date") table.add_column("Files") table.add_column("Format") - runs = cache.cached_queries() + if show_size: + table.add_column("Size") + + runs: List[TransformedResults] = cache.cached_queries() for r in runs: - table.add_row( + row = [ r.title, r.codegen, r.request_id, r.submit_time.astimezone().strftime("%a, %Y-%m-%d %H:%M"), str(r.files), r.result_format, - ) + ] + if show_size: + total_size: int = sum( + Path(f).stat().st_size for f in r.file_list if Path(f).exists() + ) + # Convert to human readable string, keeping two decimal places + row.append(_format_size(total_size)) + table.add_row(*row) rich.print(table) diff --git a/tests/app/test_cache.py b/tests/app/test_cache.py new file mode 100644 index 00000000..745a0be9 --- /dev/null +++ b/tests/app/test_cache.py @@ -0,0 +1,162 @@ +# Copyright (c) 2022, IRIS-HEP +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from datetime import datetime, timezone +import os +from pathlib import Path +from unittest.mock import Mock, patch + +from servicex.models import ResultFormat, TransformedResults + + +def test_cache_list_size(script_runner, tmp_path) -> None: + dummy_file: Path = tmp_path / "data.parquet" + dummy_file.write_bytes(b"0" * (5 * 1024 * 1024)) + + record = TransformedResults( + hash="hash", + title="Test", + codegen="code", + request_id="id", + submit_time=datetime.now(timezone.utc), + data_dir=str(tmp_path), + file_list=[str(dummy_file)], + signed_url_list=[], + files=1, + result_format=ResultFormat.parquet, + ) + + with patch("servicex.app.cache.ServiceXClient") as mock_servicex: + cache_mock = Mock() + cache_mock.cached_queries.return_value = [record] + mock_servicex.return_value.query_cache = cache_mock + result = script_runner.run(["servicex", "cache", "list", "--size"]) + + assert result.returncode == 0 + expected_size: float = os.path.getsize(dummy_file) / (1024 * 1024) + result_row = result.stdout.split(" ") + assert len(result_row) == 7 + assert result_row[-1].strip() == f"{expected_size:,.2f} MB" + + +def test_cache_list_without_size(script_runner, tmp_path) -> None: + dummy_file: Path = tmp_path / "data.parquet" + dummy_file.write_bytes(b"0" * (5 * 1024 * 1024)) + + record = TransformedResults( + hash="hash", + title="Test", + codegen="code", + request_id="id", + submit_time=datetime.now(timezone.utc), + data_dir=str(tmp_path), + file_list=[str(dummy_file)], + signed_url_list=[], + files=1, + result_format=ResultFormat.parquet, + ) + + with patch("servicex.app.cache.ServiceXClient") as mock_servicex: + cache_mock = Mock() + cache_mock.cached_queries.return_value = [record] + mock_servicex.return_value.query_cache = cache_mock + result = script_runner.run(["servicex", "cache", "list"]) + + assert result.returncode == 0 + result_row = result.stdout.split(" ") + # Without the --size option, the output should have only six columns + assert len(result_row) == 6 + + +def test_cache_list_size_gb(script_runner, tmp_path) -> None: + dummy_file: Path = tmp_path / "data.parquet" + dummy_file.write_bytes(b"0") + + record = TransformedResults( + hash="hash", + title="Test", + codegen="code", + request_id="id", + submit_time=datetime.now(timezone.utc), + data_dir=str(tmp_path), + file_list=[str(dummy_file)], + signed_url_list=[], + files=1, + result_format=ResultFormat.parquet, + ) + + size_bytes: int = 2 * 1024**3 + with ( + patch("servicex.app.cache.ServiceXClient") as mock_servicex, + patch( + "servicex.app.cache.Path.stat", + return_value=Mock(st_size=size_bytes), + ), + ): + cache_mock = Mock() + cache_mock.cached_queries.return_value = [record] + mock_servicex.return_value.query_cache = cache_mock + result = script_runner.run(["servicex", "cache", "list", "--size"]) + + assert result.returncode == 0 + result_row = result.stdout.split(" ") + assert result_row[-1].strip() == "2.00 GB" + + +def test_cache_list_size_tb(script_runner, tmp_path) -> None: + dummy_file: Path = tmp_path / "data.parquet" + dummy_file.write_bytes(b"0") + + record = TransformedResults( + hash="hash", + title="Test", + codegen="code", + request_id="id", + submit_time=datetime.now(timezone.utc), + data_dir=str(tmp_path), + file_list=[str(dummy_file)], + signed_url_list=[], + files=1, + result_format=ResultFormat.parquet, + ) + + size_bytes: int = 3 * 1024**4 + with ( + patch("servicex.app.cache.ServiceXClient") as mock_servicex, + patch( + "servicex.app.cache.Path.stat", + return_value=Mock(st_size=size_bytes), + ), + ): + cache_mock = Mock() + cache_mock.cached_queries.return_value = [record] + mock_servicex.return_value.query_cache = cache_mock + result = script_runner.run(["servicex", "cache", "list", "--size"]) + + assert result.returncode == 0 + result_row = result.stdout.split(" ") + assert result_row[-1].strip() == "3.00 TB"