Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions examples/xAODDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,8 @@
lambda e: {
"run": e.EventInfo("EventInfo").runNumber(),
"event": e.EventInfo("EventInfo").eventNumber(),
"good_ele": e.Electrons("Electrons").
Where(lambda e: (e.pt() / 1000 > 25.0) and (abs(e.eta()) < 2.5)
),
"good_ele": e.Electrons("Electrons")
.Where(lambda e: (e.pt() / 1000 > 25.0) and (abs(e.eta()) < 2.5)),
}
)

Expand Down
10 changes: 8 additions & 2 deletions servicex/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def __init__(
servicex_polling_interval: int = 10,
minio_polling_interval: int = 5,
result_format: ResultFormat = ResultFormat.parquet,
ignore_cache: bool = False,
):
r"""
This is the main class for constructing transform requests and receiving the
Expand All @@ -92,6 +93,7 @@ def __init__(
:param minio_polling_interval: How many seconds between polling the minio bucket
for new files?
:param result_format:
:param ignore_cache: If true, ignore the cache and always submit a new transform
"""
super(Dataset, self).__init__()
self.servicex = sx_adapter
Expand All @@ -112,6 +114,7 @@ def __init__(
self._return_qastle = True

self.request_id = None
self.ignore_cache = ignore_cache

# Number of seconds in between ServiceX status polls
self.servicex_polling_interval = servicex_polling_interval
Expand Down Expand Up @@ -198,8 +201,11 @@ def transform_complete(task: Task):

sx_request = self.transform_request

# Let's see if this is in the cache already
cached_record = self.cache.get_transform_by_hash(sx_request.compute_hash())
# Let's see if this is in the cache already, but respect the user's wishes
# to ignore the cache
cached_record = self.cache.get_transform_by_hash(sx_request.compute_hash()) \
if not self.ignore_cache \
else None

# And that we grabbed the resulting files in the way that the user requested
# (Downloaded, or obtained pre-signed URLs)
Expand Down
1 change: 1 addition & 0 deletions servicex/dataset_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def set_result_format(self, result_format: ResultFormat):
"""
for dataset in self.datasets:
dataset.set_result_format(result_format)
return self

async def as_signed_urls_async(
self,
Expand Down
2 changes: 1 addition & 1 deletion servicex/dataset_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __init__(self, files: Union[List[str], str]):

:param files: Either a list of URIs or a single URI string
"""
if type(files) == str:
if type(files) is str:
self.files = [files]
else:
self.files = files
Expand Down
4 changes: 3 additions & 1 deletion servicex/func_adl/func_adl_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def __init__(
query_cache: QueryCache = None,
result_format: Optional[ResultFormat] = None,
item_type: Type = Any,
ignore_cache: bool = False,
):
Dataset.__init__(
self,
Expand All @@ -91,6 +92,7 @@ def __init__(
config=config,
query_cache=query_cache,
result_format=result_format,
ignore_cache=ignore_cache,
)
EventDataset.__init__(self, item_type=item_type)
self.provided_qastle = None
Expand All @@ -111,7 +113,7 @@ def __deepcopy__(self, memo):
memo[id(self)] = obj

for attr, value in vars(self).items():
if type(value) == QueryCache:
if type(value) is QueryCache:
setattr(obj, attr, value)
else:
setattr(obj, attr, copy.deepcopy(value, memo))
Expand Down
2 changes: 1 addition & 1 deletion servicex/func_adl/func_adl_dataset_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import ast
from servicex import DatasetGroup

from func_adl_dataset import T
from servicex.func_adl.func_adl_dataset import T


class FuncADLDatasetGroup(DatasetGroup):
Expand Down
6 changes: 4 additions & 2 deletions servicex/python_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,17 @@ def __init__(self, dataset_identifier: DID,
codegen: str = None,
config: Configuration = None,
query_cache: QueryCache = None,
result_format: typing.Optional[ResultFormat] = None
result_format: typing.Optional[ResultFormat] = None,
ignore_cache: bool = False
):
super().__init__(dataset_identifier=dataset_identifier,
title=title,
codegen=codegen,
sx_adapter=sx_adapter,
config=config,
query_cache=query_cache,
result_format=result_format)
result_format=result_format,
ignore_cache=ignore_cache)

self.python_function = None

Expand Down
7 changes: 7 additions & 0 deletions servicex/servicex_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def func_adl_dataset(
codegen: str = "uproot",
result_format: Optional[ResultFormat] = None,
item_type: Type[T] = Any,
ignore_cache: bool = False
) -> FuncADLDataset[T]:
r"""
Generate a dataset that can use func_adl query language
Expand All @@ -135,6 +136,8 @@ def func_adl_dataset(
:param codegen: Name of the code generator to use with this transform
:param result_format: Do you want Paqrquet or Root? This can be set later with
the set_result_format method
:param item_type: The type of the items that will be returned from the query
:param ignore_cache: Ignore the query cache and always run the query
:return: A func_adl dataset ready to accept query statements.
"""
if codegen not in self.code_generators:
Expand All @@ -152,6 +155,7 @@ def func_adl_dataset(
query_cache=self.query_cache,
result_format=result_format,
item_type=item_type,
ignore_cache=ignore_cache
)

def python_dataset(
Expand All @@ -160,6 +164,7 @@ def python_dataset(
title: str = "ServiceX Client",
codegen: str = "uproot",
result_format: Optional[ResultFormat] = None,
ignore_cache: bool = False
) -> PythonDataset:
r"""
Generate a dataset that can use accept a python function for the query
Expand All @@ -170,6 +175,7 @@ def python_dataset(
:param codegen: Name of the code generator to use with this transform
:param result_format: Do you want Paqrquet or Root? This can be set later with
the set_result_format method
:param ignore_cache: Ignore the query cache and always run the query
:return: A func_adl dataset ready to accept a python function statements.

"""
Expand All @@ -188,4 +194,5 @@ def python_dataset(
config=self.config,
query_cache=self.query_cache,
result_format=result_format,
ignore_cache=ignore_cache
)