From f3c84427c6894dcc40be723a722f938892d72707 Mon Sep 17 00:00:00 2001 From: Peter Onyisi Date: Fri, 4 Jul 2025 23:56:16 +0000 Subject: [PATCH 1/2] Determine sample name length limit from server capability --- servicex/databinder_models.py | 23 ++++++++++++++--------- servicex/servicex_adapter.py | 17 +++++++++++++++++ servicex/servicex_client.py | 2 ++ 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/servicex/databinder_models.py b/servicex/databinder_models.py index 8be5c397..1c86f401 100644 --- a/servicex/databinder_models.py +++ b/servicex/databinder_models.py @@ -143,17 +143,22 @@ def validate_nfiles_is_not_zero(self): raise ValueError("NFiles cannot be set to zero for a dataset.") return self - @field_validator("Name", mode="before") - @classmethod - def truncate_long_sample_name(cls, v): + def validate_title(self, length: int) -> None: """ - Truncate sample name to 512 characters if exceed - Print warning message + Logic for adjusting length of the title """ - if len(v) > 512: - logger.warning(f"Truncating Sample name to 512 characters for {v}") - v = v[0:512] - return v + if length is None: + # we adopt pre-3.2.0 behavior: truncate to 128 characters + if len(self.Name) > 128: + logger.warning( + f"Truncating Sample name to 128 characters for {self.Name}" + ) + self.Name = self.Name[:128] + logger.warning(f"New name is {self.Name}") + else: + if len(self.Name) > length: + logger.error(f"Sample name {self.Name} over the limit ({length})") + raise ValueError(f"Sample name {self.Name} length too long") @property def hash(self): diff --git a/servicex/servicex_adapter.py b/servicex/servicex_adapter.py index 7489340c..f24f3fd5 100644 --- a/servicex/servicex_adapter.py +++ b/servicex/servicex_adapter.py @@ -79,6 +79,7 @@ def __init__(self, url: str, refresh_token: Optional[str] = None): # interact with _servicex_info via get_servicex_info self._servicex_info: Optional[ServiceXInfo] = None + self._sample_title_limit: Optional[int] = None async def _get_token(self): url = f"{self.url}/token/refresh" @@ -162,6 +163,22 @@ async def get_servicex_info(self) -> ServiceXInfo: async def get_servicex_capabilities(self) -> List[str]: return (await self.get_servicex_info()).capabilities + async def get_servicex_sample_title_limit(self) -> Optional[int]: + # check if the capability is defined + capabilities = await self.get_servicex_capabilities() + for capability in capabilities: + if capability.startswith("long_sample_titles_"): + try: + # hope capability is of the form long_sample_titles_NNNN + return int(capability[19:]) + except ValueError: + raise RuntimeError( + "Unable to determine allowed sample title length\n" + f"Server capability is: {capability}" + ) + + return None + async def get_transforms(self) -> List[TransformStatus]: headers = await self._get_authorization() retry_options = Retry(total=3, backoff_factor=10) diff --git a/servicex/servicex_client.py b/servicex/servicex_client.py index 08b9b365..9f8ded44 100644 --- a/servicex/servicex_client.py +++ b/servicex/servicex_client.py @@ -183,7 +183,9 @@ def get_codegen(_sample: Sample, _general: General): sx = ServiceXClient(backend=servicex_name, config_path=config_path) datasets = [] + title_length_limit = make_sync(sx.servicex.get_servicex_sample_title_limit)() for sample in config.Sample: + sample.validate_title(title_length_limit) query = sx.generic_query( dataset_identifier=sample.dataset_identifier, title=sample.Name, From 6e1986ec91f3f50f0348b7202988d1dd358a12ca Mon Sep 17 00:00:00 2001 From: Peter Onyisi Date: Sat, 5 Jul 2025 02:10:38 +0000 Subject: [PATCH 2/2] Fix tests + add tests for sample title limit capability checks --- servicex/databinder_models.py | 2 +- tests/test_databinder.py | 267 ++++++++++++++------------------- tests/test_servicex_adapter.py | 15 ++ 3 files changed, 128 insertions(+), 156 deletions(-) diff --git a/servicex/databinder_models.py b/servicex/databinder_models.py index 1c86f401..3278e52d 100644 --- a/servicex/databinder_models.py +++ b/servicex/databinder_models.py @@ -143,7 +143,7 @@ def validate_nfiles_is_not_zero(self): raise ValueError("NFiles cannot be set to zero for a dataset.") return self - def validate_title(self, length: int) -> None: + def validate_title(self, length: Optional[int]) -> None: """ Logic for adjusting length of the title """ diff --git a/tests/test_databinder.py b/tests/test_databinder.py index 3644a807..2bce6f78 100644 --- a/tests/test_databinder.py +++ b/tests/test_databinder.py @@ -1,4 +1,5 @@ import pytest +from pytest_asyncio import fixture from unittest.mock import patch from pydantic import ValidationError @@ -8,6 +9,36 @@ from servicex.dataset import FileList, Rucio +@fixture +def network_patches(codegen_list): + import contextlib + + with contextlib.ExitStack() as _fixture: + _fixture.enter_context( + patch( + "servicex.servicex_adapter.ServiceXAdapter.get_servicex_capabilities", + return_value=[ + "poll_local_transformation_results", + "long_sample_titles_10240", + ], + ) + ) + _fixture.enter_context( + patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ) + ) + _fixture.enter_context( + patch( + "servicex.servicex_adapter.ServiceXAdapter._get_authorization", + return_value={"Authorization": "Bearer aaa"}, + ) + ) + + yield _fixture + + def basic_spec(samples=None): return { "Sample": samples @@ -27,7 +58,11 @@ def test_long_sample_name(): ], } new_config = ServiceXSpec.model_validate(config) - assert len(new_config.Sample[0].Name) == 512 + assert len(new_config.Sample[0].Name) == 800 + with pytest.raises(ValueError): + new_config.Sample[0].validate_title(512) + new_config.Sample[0].validate_title(None) + assert len(new_config.Sample[0].Name) == 128 def test_load_config(): @@ -289,7 +324,7 @@ def test_invalid_dataset_identifier(): ) -def test_submit_mapping(transformed_result, codegen_list, with_event_loop): +def test_submit_mapping(transformed_result, network_patches, with_event_loop): from servicex import deliver spec = { @@ -305,22 +340,16 @@ def test_submit_mapping(transformed_result, codegen_list, with_event_loop): } ], } - with ( - patch( - "servicex.dataset_group.DatasetGroup.as_files", - return_value=[transformed_result], - ), - patch( - "servicex.servicex_client.ServiceXClient.get_code_generators", - return_value=codegen_list, - ), + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], ): results = deliver(spec, config_path="tests/example_config.yaml") assert list(results["sampleA"]) == ["1.parquet"] def test_submit_mapping_signed_urls( - transformed_result_signed_url, codegen_list, with_event_loop + transformed_result_signed_url, network_patches, with_event_loop ): from servicex import deliver @@ -335,15 +364,9 @@ def test_submit_mapping_signed_urls( } ], } - with ( - patch( - "servicex.dataset_group.DatasetGroup.as_signed_urls", - return_value=[transformed_result_signed_url], - ), - patch( - "servicex.servicex_client.ServiceXClient.get_code_generators", - return_value=codegen_list, - ), + with patch( + "servicex.dataset_group.DatasetGroup.as_signed_urls", + return_value=[transformed_result_signed_url], ): results = deliver(spec, config_path="tests/example_config.yaml") assert list(results["sampleA"]) == [ @@ -352,7 +375,7 @@ def test_submit_mapping_signed_urls( ] -def test_submit_mapping_failure(transformed_result, codegen_list, with_event_loop): +def test_submit_mapping_failure(transformed_result, network_patches, with_event_loop): from servicex import deliver spec = { @@ -365,15 +388,9 @@ def test_submit_mapping_failure(transformed_result, codegen_list, with_event_loo } ] } - with ( - patch( - "servicex.dataset_group.DatasetGroup.as_files", - return_value=[ServiceXException("dummy")], - ), - patch( - "servicex.servicex_client.ServiceXClient.get_code_generators", - return_value=codegen_list, - ), + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[ServiceXException("dummy")], ): results = deliver(spec, config_path="tests/example_config.yaml") assert len(results) == 1 @@ -383,7 +400,7 @@ def test_submit_mapping_failure(transformed_result, codegen_list, with_event_loo pass -def test_submit_mapping_failure_signed_urls(codegen_list, with_event_loop): +def test_submit_mapping_failure_signed_urls(network_patches, with_event_loop): from servicex import deliver spec = { @@ -397,15 +414,9 @@ def test_submit_mapping_failure_signed_urls(codegen_list, with_event_loop): } ], } - with ( - patch( - "servicex.dataset_group.DatasetGroup.as_signed_urls", - return_value=[ServiceXException("dummy")], - ), - patch( - "servicex.servicex_client.ServiceXClient.get_code_generators", - return_value=codegen_list, - ), + with patch( + "servicex.dataset_group.DatasetGroup.as_signed_urls", + return_value=[ServiceXException("dummy")], ): results = deliver( spec, config_path="tests/example_config.yaml", return_exceptions=False @@ -643,7 +654,7 @@ def run_query(input_filenames=None): _load_ServiceXSpec(path2) -def test_funcadl_query(transformed_result, codegen_list, with_event_loop): +def test_funcadl_query(transformed_result, network_patches, with_event_loop): from servicex import deliver from servicex.query import FuncADL_Uproot # type: ignore @@ -660,20 +671,16 @@ def test_funcadl_query(transformed_result, codegen_list, with_event_loop): ] } ) - with ( - patch( - "servicex.dataset_group.DatasetGroup.as_files", - return_value=[transformed_result], - ), - patch( - "servicex.servicex_client.ServiceXClient.get_code_generators", - return_value=codegen_list, - ), + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], ): deliver(spec, config_path="tests/example_config.yaml") -def test_query_with_codegen_override(transformed_result, codegen_list, with_event_loop): +def test_query_with_codegen_override( + transformed_result, network_patches, with_event_loop +): from servicex import deliver from servicex.query import FuncADL_Uproot # type: ignore @@ -692,15 +699,9 @@ def test_query_with_codegen_override(transformed_result, codegen_list, with_even ], } ) - with ( - patch( - "servicex.dataset_group.DatasetGroup.as_files", - return_value=[transformed_result], - ), - patch( - "servicex.servicex_client.ServiceXClient.get_code_generators", - return_value=codegen_list, - ), + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], ): with pytest.raises(NameError) as excinfo: deliver(spec, config_path="tests/example_config.yaml") @@ -722,15 +723,9 @@ def test_query_with_codegen_override(transformed_result, codegen_list, with_even ] } ) - with ( - patch( - "servicex.dataset_group.DatasetGroup.as_files", - return_value=[transformed_result], - ), - patch( - "servicex.servicex_client.ServiceXClient.get_code_generators", - return_value=codegen_list, - ), + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], ): with pytest.raises(NameError) as excinfo: deliver(spec, config_path="tests/example_config.yaml") @@ -757,7 +752,7 @@ def test_databinder_load_dict(): ) -def test_python_query(transformed_result, codegen_list, with_event_loop): +def test_python_query(transformed_result, network_patches, with_event_loop): from servicex import deliver from servicex.query import PythonFunction # type: ignore @@ -778,20 +773,14 @@ def run_query(input_filenames=None): ] } ) - with ( - patch( - "servicex.dataset_group.DatasetGroup.as_files", - return_value=[transformed_result], - ), - patch( - "servicex.servicex_client.ServiceXClient.get_code_generators", - return_value=codegen_list, - ), + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], ): deliver(spec, config_path="tests/example_config.yaml") -def test_uproot_raw_query(transformed_result, codegen_list, with_event_loop): +def test_uproot_raw_query(transformed_result, network_patches, with_event_loop): from servicex import deliver from servicex.query import UprootRaw # type: ignore @@ -806,20 +795,14 @@ def test_uproot_raw_query(transformed_result, codegen_list, with_event_loop): ] } ) - with ( - patch( - "servicex.dataset_group.DatasetGroup.as_files", - return_value=[transformed_result], - ), - patch( - "servicex.servicex_client.ServiceXClient.get_code_generators", - return_value=codegen_list, - ), + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], ): deliver(spec, config_path="tests/example_config.yaml") -def test_uproot_raw_query_parquet(transformed_result, codegen_list, with_event_loop): +def test_uproot_raw_query_parquet(transformed_result, network_patches, with_event_loop): from servicex import deliver from servicex.query import UprootRaw # type: ignore @@ -836,20 +819,14 @@ def test_uproot_raw_query_parquet(transformed_result, codegen_list, with_event_l } ) print(spec) - with ( - patch( - "servicex.dataset_group.DatasetGroup.as_files", - return_value=[transformed_result], - ), - patch( - "servicex.servicex_client.ServiceXClient.get_code_generators", - return_value=codegen_list, - ), + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], ): deliver(spec, config_path="tests/example_config.yaml") -def test_uproot_raw_query_rntuple(transformed_result, codegen_list, with_event_loop): +def test_uproot_raw_query_rntuple(transformed_result, network_patches, with_event_loop): from servicex import deliver from servicex.query import UprootRaw # type: ignore @@ -865,20 +842,14 @@ def test_uproot_raw_query_rntuple(transformed_result, codegen_list, with_event_l ], } ) - with ( - patch( - "servicex.dataset_group.DatasetGroup.as_files", - return_value=[transformed_result], - ), - patch( - "servicex.servicex_client.ServiceXClient.get_code_generators", - return_value=codegen_list, - ), + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], ): deliver(spec, config_path="tests/example_config.yaml") -def test_generic_query(codegen_list): +def test_generic_query(network_patches): from servicex.servicex_client import ServiceXClient spec = ServiceXSpec.model_validate( @@ -895,47 +866,43 @@ def test_generic_query(codegen_list): ], } ) - with patch( - "servicex.servicex_client.ServiceXClient.get_code_generators", - return_value=codegen_list, - ): - sx = ServiceXClient(config_path="tests/example_config.yaml") + sx = ServiceXClient(config_path="tests/example_config.yaml") + query = sx.generic_query( + dataset_identifier=spec.Sample[0].RucioDID, + codegen=spec.General.Codegen, + query=spec.Sample[0].Query, + ) + assert query.generate_selection_string() == "[{'treename': 'nominal'}]" + query = sx.generic_query( + dataset_identifier=spec.Sample[0].RucioDID, + result_format=spec.General.OutputFormat.to_ResultFormat(), + codegen=spec.General.Codegen, + query=spec.Sample[0].Query, + ) + assert query.result_format == "root-file" + query.query_string_generator = None + with pytest.raises(RuntimeError): + query.generate_selection_string() + with pytest.raises(ValueError): query = sx.generic_query( dataset_identifier=spec.Sample[0].RucioDID, codegen=spec.General.Codegen, - query=spec.Sample[0].Query, + query=5, ) - assert query.generate_selection_string() == "[{'treename': 'nominal'}]" + with pytest.raises(NameError): query = sx.generic_query( dataset_identifier=spec.Sample[0].RucioDID, - result_format=spec.General.OutputFormat.to_ResultFormat(), - codegen=spec.General.Codegen, + codegen="nonsense", query=spec.Sample[0].Query, ) - assert query.result_format == "root-file" - query.query_string_generator = None - with pytest.raises(RuntimeError): - query.generate_selection_string() - with pytest.raises(ValueError): - query = sx.generic_query( - dataset_identifier=spec.Sample[0].RucioDID, - codegen=spec.General.Codegen, - query=5, - ) - with pytest.raises(NameError): - query = sx.generic_query( - dataset_identifier=spec.Sample[0].RucioDID, - codegen="nonsense", - query=spec.Sample[0].Query, - ) - with pytest.raises(RuntimeError): - # no codegen specified by generic class - query = sx.generic_query( - dataset_identifier=spec.Sample[0].RucioDID, query=spec.Sample[0].Query - ) + with pytest.raises(RuntimeError): + # no codegen specified by generic class + query = sx.generic_query( + dataset_identifier=spec.Sample[0].RucioDID, query=spec.Sample[0].Query + ) -def test_deliver_progress_options(transformed_result, codegen_list, with_event_loop): +def test_deliver_progress_options(transformed_result, network_patches, with_event_loop): from servicex import deliver, ProgressBarFormat from servicex.query import UprootRaw # type: ignore @@ -955,19 +922,9 @@ async def fake_submit(signed_urls_only, expandable_progress): expandable_progress.add_task("zip", start=False, total=None) return transformed_result - with ( - patch( - "servicex.servicex_client.ServiceXClient.get_code_generators", - return_value=codegen_list, - ), - patch( - "servicex.servicex_adapter.ServiceXAdapter._get_authorization", - return_value={"Authorization": "Bearer aaa"}, - ), - patch( - "servicex.query_core.Query.submit_and_download", - side_effect=fake_submit, - ), + with patch( + "servicex.query_core.Query.submit_and_download", + side_effect=fake_submit, ): import servicex.query_core diff --git a/tests/test_servicex_adapter.py b/tests/test_servicex_adapter.py index 54eeb1c1..60d5fccd 100644 --- a/tests/test_servicex_adapter.py +++ b/tests/test_servicex_adapter.py @@ -843,3 +843,18 @@ async def test_get_transformation_results_failed_file(mock_get, servicex): ) res = await servicex.get_transformation_results("id123", None) assert len(res) == 0 + + +@pytest.mark.asyncio +async def test_sample_title_limit(servicex): + servicex.get_servicex_capabilities = AsyncMock(return_value=["irrelevant"]) + assert await servicex.get_servicex_sample_title_limit() is None + servicex.get_servicex_capabilities = AsyncMock( + return_value=["long_sample_titles_10240"] + ) + assert await servicex.get_servicex_sample_title_limit() == 10240 + servicex.get_servicex_capabilities = AsyncMock( + return_value=["long_sample_titles_invalid"] + ) + with pytest.raises(RuntimeError): + await servicex.get_servicex_sample_title_limit()