diff --git a/servicex/databinder_models.py b/servicex/databinder_models.py index dc4219d5..bb260d16 100644 --- a/servicex/databinder_models.py +++ b/servicex/databinder_models.py @@ -101,7 +101,7 @@ def dataset_identifier(self) -> DataSetIdentifier: Access the dataset identifier for the sample. """ if self.Dataset: - if self.NFiles: + if self.NFiles is not None: self.Dataset.num_files = self.NFiles return self.Dataset elif self.RucioDID: @@ -126,6 +126,15 @@ def validate_did_xor_file(cls, values): raise ValueError("Must specify one of Dataset, XRootDFiles, or RucioDID.") return values + @model_validator(mode="after") + def validate_nfiles_is_not_zero(self): + """ + Ensure that NFiles is not set to zero + """ + if self.dataset_identifier.num_files == 0: + raise ValueError("NFiles cannot be set to zero for a dataset.") + return self + @field_validator("Name", mode="before") @classmethod def truncate_long_sample_name(cls, v): diff --git a/servicex/dataset_identifier.py b/servicex/dataset_identifier.py index de765bc8..bb4b9f6f 100644 --- a/servicex/dataset_identifier.py +++ b/servicex/dataset_identifier.py @@ -78,6 +78,7 @@ def __init__(self, files: Union[List[str], str]): :param files: Either a list of URIs or a single URI string """ + self.num_files: Optional[int] = None # you should pass only the files you want self.files: List[str] if isinstance(files, str): self.files = [files] diff --git a/tests/test_databinder.py b/tests/test_databinder.py index 40cd9695..2a00bd36 100644 --- a/tests/test_databinder.py +++ b/tests/test_databinder.py @@ -155,6 +155,56 @@ def test_dataset_rucio_did_numfiles(): ) +def test_dataset_zerofiles(): + # with an actual dataset, giving no files should throw a validation error + with pytest.raises(ValidationError): + spec = ServiceXSpec.model_validate( + basic_spec( + samples=[ + { + "Name": "sampleA", + "Dataset": + dataset.Rucio("user.ivukotic:user.ivukotic.single_top_tW__nominal"), + "NFiles": 0, + "Query": "a", + } + ] + ) + ) + + with pytest.raises(ValidationError): + spec = ServiceXSpec.model_validate( + basic_spec( + samples=[ + { + "Name": "sampleA", + "Dataset": + dataset.Rucio("user.ivukotic:user.ivukotic.single_top_tW__nominal", + num_files=0), + "Query": "a", + } + ] + ) + ) + + # and num files should be ignored for fileset + spec = ServiceXSpec.model_validate( + basic_spec( + samples=[ + { + "Name": "sampleA", + "Dataset": dataset.FileList([ + "root://eospublic.cern.ch//file1.root", + "root://eospublic.cern.ch//file2.root", + ]), + "Query": "a", + } + ] + ) + ) + assert spec.Sample[0].dataset_identifier.num_files is None + + def test_cernopendata(): spec = ServiceXSpec.model_validate({ "Sample": [