diff --git a/examples/xAODDataset.py b/examples/xAODDataset.py index 97204286..58e6ca92 100644 --- a/examples/xAODDataset.py +++ b/examples/xAODDataset.py @@ -47,9 +47,8 @@ lambda e: { "run": e.EventInfo("EventInfo").runNumber(), "event": e.EventInfo("EventInfo").eventNumber(), - "good_ele": e.Electrons("Electrons"). - Where(lambda e: (e.pt() / 1000 > 25.0) and (abs(e.eta()) < 2.5) - ), + "good_ele": e.Electrons("Electrons") + .Where(lambda e: (e.pt() / 1000 > 25.0) and (abs(e.eta()) < 2.5)), } ) diff --git a/servicex/dataset.py b/servicex/dataset.py index 8b7d3ef6..b6da6642 100644 --- a/servicex/dataset.py +++ b/servicex/dataset.py @@ -76,6 +76,7 @@ def __init__( servicex_polling_interval: int = 10, minio_polling_interval: int = 5, result_format: ResultFormat = ResultFormat.parquet, + ignore_cache: bool = False, ): r""" This is the main class for constructing transform requests and receiving the @@ -92,6 +93,7 @@ def __init__( :param minio_polling_interval: How many seconds between polling the minio bucket for new files? :param result_format: + :param ignore_cache: If true, ignore the cache and always submit a new transform """ super(Dataset, self).__init__() self.servicex = sx_adapter @@ -112,6 +114,7 @@ def __init__( self._return_qastle = True self.request_id = None + self.ignore_cache = ignore_cache # Number of seconds in between ServiceX status polls self.servicex_polling_interval = servicex_polling_interval @@ -198,8 +201,11 @@ def transform_complete(task: Task): sx_request = self.transform_request - # Let's see if this is in the cache already - cached_record = self.cache.get_transform_by_hash(sx_request.compute_hash()) + # Let's see if this is in the cache already, but respect the user's wishes + # to ignore the cache + cached_record = self.cache.get_transform_by_hash(sx_request.compute_hash()) \ + if not self.ignore_cache \ + else None # And that we grabbed the resulting files in the way that the user requested # (Downloaded, or obtained pre-signed URLs) diff --git a/servicex/dataset_group.py b/servicex/dataset_group.py index ecf8c620..2db49492 100644 --- a/servicex/dataset_group.py +++ b/servicex/dataset_group.py @@ -60,6 +60,7 @@ def set_result_format(self, result_format: ResultFormat): """ for dataset in self.datasets: dataset.set_result_format(result_format) + return self async def as_signed_urls_async( self, diff --git a/servicex/dataset_identifier.py b/servicex/dataset_identifier.py index 68b00cb0..b54f7347 100644 --- a/servicex/dataset_identifier.py +++ b/servicex/dataset_identifier.py @@ -72,7 +72,7 @@ def __init__(self, files: Union[List[str], str]): :param files: Either a list of URIs or a single URI string """ - if type(files) == str: + if type(files) is str: self.files = [files] else: self.files = files diff --git a/servicex/func_adl/func_adl_dataset.py b/servicex/func_adl/func_adl_dataset.py index 7ff0a7cc..b719e899 100644 --- a/servicex/func_adl/func_adl_dataset.py +++ b/servicex/func_adl/func_adl_dataset.py @@ -81,6 +81,7 @@ def __init__( query_cache: QueryCache = None, result_format: Optional[ResultFormat] = None, item_type: Type = Any, + ignore_cache: bool = False, ): Dataset.__init__( self, @@ -91,6 +92,7 @@ def __init__( config=config, query_cache=query_cache, result_format=result_format, + ignore_cache=ignore_cache, ) EventDataset.__init__(self, item_type=item_type) self.provided_qastle = None @@ -111,7 +113,7 @@ def __deepcopy__(self, memo): memo[id(self)] = obj for attr, value in vars(self).items(): - if type(value) == QueryCache: + if type(value) is QueryCache: setattr(obj, attr, value) else: setattr(obj, attr, copy.deepcopy(value, memo)) diff --git a/servicex/func_adl/func_adl_dataset_group.py b/servicex/func_adl/func_adl_dataset_group.py index 427e10f7..f5d5cd66 100644 --- a/servicex/func_adl/func_adl_dataset_group.py +++ b/servicex/func_adl/func_adl_dataset_group.py @@ -32,7 +32,7 @@ import ast from servicex import DatasetGroup -from func_adl_dataset import T +from servicex.func_adl.func_adl_dataset import T class FuncADLDatasetGroup(DatasetGroup): diff --git a/servicex/python_dataset.py b/servicex/python_dataset.py index fa5c92c2..2946f7cf 100644 --- a/servicex/python_dataset.py +++ b/servicex/python_dataset.py @@ -45,7 +45,8 @@ def __init__(self, dataset_identifier: DID, codegen: str = None, config: Configuration = None, query_cache: QueryCache = None, - result_format: typing.Optional[ResultFormat] = None + result_format: typing.Optional[ResultFormat] = None, + ignore_cache: bool = False ): super().__init__(dataset_identifier=dataset_identifier, title=title, @@ -53,7 +54,8 @@ def __init__(self, dataset_identifier: DID, sx_adapter=sx_adapter, config=config, query_cache=query_cache, - result_format=result_format) + result_format=result_format, + ignore_cache=ignore_cache) self.python_function = None diff --git a/servicex/servicex_client.py b/servicex/servicex_client.py index 57acffd5..8aa853b2 100644 --- a/servicex/servicex_client.py +++ b/servicex/servicex_client.py @@ -125,6 +125,7 @@ def func_adl_dataset( codegen: str = "uproot", result_format: Optional[ResultFormat] = None, item_type: Type[T] = Any, + ignore_cache: bool = False ) -> FuncADLDataset[T]: r""" Generate a dataset that can use func_adl query language @@ -135,6 +136,8 @@ def func_adl_dataset( :param codegen: Name of the code generator to use with this transform :param result_format: Do you want Paqrquet or Root? This can be set later with the set_result_format method + :param item_type: The type of the items that will be returned from the query + :param ignore_cache: Ignore the query cache and always run the query :return: A func_adl dataset ready to accept query statements. """ if codegen not in self.code_generators: @@ -152,6 +155,7 @@ def func_adl_dataset( query_cache=self.query_cache, result_format=result_format, item_type=item_type, + ignore_cache=ignore_cache ) def python_dataset( @@ -160,6 +164,7 @@ def python_dataset( title: str = "ServiceX Client", codegen: str = "uproot", result_format: Optional[ResultFormat] = None, + ignore_cache: bool = False ) -> PythonDataset: r""" Generate a dataset that can use accept a python function for the query @@ -170,6 +175,7 @@ def python_dataset( :param codegen: Name of the code generator to use with this transform :param result_format: Do you want Paqrquet or Root? This can be set later with the set_result_format method + :param ignore_cache: Ignore the query cache and always run the query :return: A func_adl dataset ready to accept a python function statements. """ @@ -188,4 +194,5 @@ def python_dataset( config=self.config, query_cache=self.query_cache, result_format=result_format, + ignore_cache=ignore_cache )