1- # Copyright (c) 2022 , IRIS-HEP
1+ # Copyright (c) 2024 , IRIS-HEP
22# All rights reserved.
33#
44# Redistribution and use in source and binary forms, with or without
2525# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
2626# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2727# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28- from typing import List , Union
28+ from typing import List , Union , Optional
2929
3030from servicex .models import TransformRequest
3131
@@ -35,14 +35,14 @@ class DataSetIdentifier:
3535 Base class for specifying the dataset to transform. This can either be a list of
3636 xRootD URIs or a rucio DID
3737 """
38- def __init__ (self , scheme : str , dataset : str , num_files : int = None ):
38+ def __init__ (self , scheme : str , dataset : str , num_files : Optional [ int ] = None ):
3939 self .scheme = scheme
4040 self .dataset = dataset
4141 self .num_files = num_files
4242
4343 @property
4444 def did (self ):
45- num_files_arg = f"?files={ self .num_files } " if self .num_files else ""
45+ num_files_arg = f"?files={ self .num_files } " if self .num_files is not None else ""
4646 return f"{ self .scheme } ://{ self .dataset } { num_files_arg } "
4747
4848 def populate_transform_request (self , transform_request : TransformRequest ) -> None :
@@ -51,7 +51,7 @@ def populate_transform_request(self, transform_request: TransformRequest) -> Non
5151
5252
5353class RucioDatasetIdentifier (DataSetIdentifier ):
54- def __init__ (self , dataset : str , num_files : int = None ):
54+ def __init__ (self , dataset : str , num_files : Optional [ int ] = None ):
5555 r"""
5656 Rucio Dataset - this will be looked up using the Rucio data management
5757 service.
@@ -64,19 +64,56 @@ def __init__(self, dataset: str, num_files: int = None):
6464 """
6565 super ().__init__ ("rucio" , dataset , num_files = num_files )
6666
67+ yaml_tag = '!Rucio'
6768
68- class FileListDataset :
69+ @classmethod
70+ def from_yaml (cls , _ , node ):
71+ return cls (node .value )
72+
73+
74+ class FileListDataset (DataSetIdentifier ):
6975 def __init__ (self , files : Union [List [str ], str ]):
7076 r"""
7177 Dataset specified as a list of XRootD URIs.
7278
7379 :param files: Either a list of URIs or a single URI string
7480 """
75- if type (files ) is str :
81+ self .files : List [str ]
82+ if isinstance (files , str ):
7683 self .files = [files ]
7784 else :
7885 self .files = files
7986
8087 def populate_transform_request (self , transform_request : TransformRequest ) -> None :
8188 transform_request .file_list = self .files
8289 transform_request .did = None
90+
91+ @property
92+ def did (self ):
93+ return None
94+
95+ yaml_tag = '!FileList'
96+
97+ @classmethod
98+ def from_yaml (cls , constructor , node ):
99+ return cls (constructor .construct_sequence (node ))
100+
101+
102+ class CERNOpenDataDatasetIdentifier (DataSetIdentifier ):
103+ def __init__ (self , dataset : int , num_files : Optional [int ] = None ):
104+ r"""
105+ CERN Open Data Dataset - this will be looked up using the CERN Open Data DID finder.
106+
107+ :param dataset: The dataset ID - this is an integer.
108+ :param num_files: Maximum number of files to return. This is useful during development
109+ to perform quick runs. ServiceX is careful to make sure it always
110+ returns the same subset of files.
111+
112+ """
113+ super ().__init__ ("cernopendata" , f'{ dataset } ' , num_files = num_files )
114+
115+ yaml_tag = '!CERNOpenData'
116+
117+ @classmethod
118+ def from_yaml (cls , _ , node ):
119+ return cls (int (node .value ))
0 commit comments