diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index d1e187ef..06a32f8f 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -19,4 +19,4 @@ RUN if [ "${INSTALL_NODE}" = "true" ]; then su vscode -c "umask 0002 && . /usr/l # && apt-get -y install --no-install-recommends # [Optional] Uncomment this line to install global node packages. -# RUN su vscode -c "source /usr/local/share/nvm/nvm.sh && npm install -g " 2>&1 \ No newline at end of file +# RUN su vscode -c "source /usr/local/share/nvm/nvm.sh && npm install -g " 2>&1 diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 8d000e9d..456cafe8 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -15,7 +15,7 @@ }, // Set *default* container specific settings.json values on container create. - "settings": { + "settings": { "python.pythonPath": "/usr/local/bin/python", "python.languageServer": "Pylance", "python.linting.enabled": true, diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e8c60a5f..7e62c0f6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -31,7 +31,7 @@ jobs: strategy: matrix: platform: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] runs-on: ${{ matrix.platform }} steps: diff --git a/.github/workflows/ci_production.yaml b/.github/workflows/ci_production.yaml index 2803ece5..e798c5a1 100644 --- a/.github/workflows/ci_production.yaml +++ b/.github/workflows/ci_production.yaml @@ -35,4 +35,3 @@ jobs: - name: Run example working-directory: examples run: python UprootRaw_Dict.py - diff --git a/.gitignore b/.gitignore index 3dc77dad..181a9d7e 100644 --- a/.gitignore +++ b/.gitignore @@ -127,4 +127,3 @@ dmypy.json .idea/ .servicex - diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dc023a4f..7db1c25e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,3 +14,19 @@ repos: rev: "7.1.1" hooks: - id: flake8 + - repo: https://github.com/psf/black + rev: 22.10.0 + hooks: + - id: black + +ci: + autofix_commit_msg: | + [pre-commit.ci] auto fixes from pre-commit.com hooks + + for more information, see https://pre-commit.ci + autofix_prs: true + autoupdate_branch: '' + autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' + autoupdate_schedule: weekly + skip: [] + submodules: false diff --git a/.readthedocs.yaml b/.readthedocs.yaml index c08ca858..04eb948a 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -30,4 +30,4 @@ python: - method: pip path: . extra_requirements: - - docs \ No newline at end of file + - docs diff --git a/.vscode/settings.json b/.vscode/settings.json index 2c83d14e..13d8ea1a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -107,4 +107,4 @@ "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, "editor.formatOnSave": false, -} \ No newline at end of file +} diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 5ab94536..56ec63a4 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -52,4 +52,4 @@ }, }, ] -} \ No newline at end of file +} diff --git a/README.md b/README.md index e82191f1..42001e23 100644 --- a/README.md +++ b/README.md @@ -17,5 +17,3 @@ Python SDK and CLI Client for ServiceX * The **user** documentation is hosted at https://servicex-frontend.readthedocs.io * The **developer** documentation is available under the [`latest` view](https://servicex-frontend.readthedocs.io/en/latest/) - - diff --git a/docs/about.rst b/docs/about.rst index b0488ace..2529f1c7 100644 --- a/docs/about.rst +++ b/docs/about.rst @@ -5,7 +5,7 @@ This page lists presentations and posters on the ServiceX client (frontend) libr The `Client version` stands for the ServiceX client version for the given presentation. A demo (or demo-like) talks are marked as `Notebook`. -You can find more materials on the ServiceX backend or intergration into broader +You can find more materials on the ServiceX backend or intergration into broader python ecosystem at the `IRIS-HEP ServiceX page `_. @@ -15,7 +15,7 @@ python ecosystem at the `IRIS-HEP ServiceX page `_ - `ServiceX, the novel data delivery system, for physics analysis `_ - KyungEon Choi - \- - \- - * - May 11, 2023 + * - May 11, 2023 - `CHEP 2023 `_ - `Data Management Package for the novel data delivery system, ServiceX, and Applications to various physics analysis workflows `_ - KyungEon Choi @@ -50,4 +50,4 @@ python ecosystem at the `IRIS-HEP ServiceX page `_ - KyungEon Choi - \- - - o \ No newline at end of file + - o diff --git a/docs/conf.py b/docs/conf.py index d3c81008..9a7de4cd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,30 +11,30 @@ # sys.path.insert(0, os.path.abspath("..")) -project = 'ServiceX' -copyright = '2024 Institute for Research and Innovation in Software for High Energy Physics (IRIS-HEP)' # NOQA 501 -author = 'Ben Galewsky, Gordon Watts, KyongEon Choi, Ketan Mahajan, Peter Onyisi' -release = '3.0.0' +project = "ServiceX" +copyright = "2024 Institute for Research and Innovation in Software for High Energy Physics (IRIS-HEP)" # NOQA 501 +author = "Ben Galewsky, Gordon Watts, KyongEon Choi, Ketan Mahajan, Peter Onyisi" +release = "3.0.0" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration extensions = [ - 'sphinx.ext.napoleon', - 'sphinx.ext.intersphinx', - 'sphinx.ext.viewcode', - 'sphinx.ext.doctest', - 'code_include.extension', - 'myst_parser', - 'sphinxcontrib.autodoc_pydantic', - 'sphinx_tabs.tabs', - 'sphinx_copybutton', + "sphinx.ext.napoleon", + "sphinx.ext.intersphinx", + "sphinx.ext.viewcode", + "sphinx.ext.doctest", + "code_include.extension", + "myst_parser", + "sphinxcontrib.autodoc_pydantic", + "sphinx_tabs.tabs", + "sphinx_copybutton", ] -templates_path = ['_templates'] -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'examples'] +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "examples"] -autoclass_content = 'both' +autoclass_content = "both" autodoc_pydantic_model_show_json = False autodoc_pydantic_field_list_validators = False @@ -45,12 +45,12 @@ autodoc_pydantic_model_undoc_members = False autodoc_pydantic_settings_show_validator_summary = False autodoc_pydantic_settings_show_validator_members = False -autodoc_pydantic_model_member_order = 'bysource' +autodoc_pydantic_model_member_order = "bysource" # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -html_theme = 'furo' -html_static_path = ['_static'] +html_theme = "furo" +html_static_path = ["_static"] html_show_sourcelink = False # sphinx-copybutton configuration diff --git a/docs/contribute.rst b/docs/contribute.rst index 2ffaa8da..93204a74 100644 --- a/docs/contribute.rst +++ b/docs/contribute.rst @@ -7,7 +7,7 @@ Overview -------- The ``servicex`` frontend code uses standard python packaging and open-source development methodologies. The code is hosted on GitHub, -and we use the GitHub issue tracker to manage bugs and feature requests. We also use GitHub pull requests for code review and merging. +and we use the GitHub issue tracker to manage bugs and feature requests. We also use GitHub pull requests for code review and merging. - `ServiceX_frontend `_ - The ServiceX Python library, which enables users to send requests to ServiceX. Currently, this is the only ServiceX frontend client. @@ -37,7 +37,7 @@ Development Workflow 1. Set up a local development environment: - Fork the ``ServiceX_frontend`` - Clone the (forked) repository to your local machine: - + - Set up a new environment via ``conda`` or ``virtualenv``. - Install dependencies, including test dependencies: @@ -52,15 +52,15 @@ Development Workflow git checkout master git pull upstream master - + - Create a branch for the feature you want to work on: .. code-block:: bash git checkout -b fix-issue-99 - + - Commit locally as you progress with ``git add`` and ``git commit``. - + 3. Test your changes: - Run the full test suite with ``python -m pytest``, or target specific test files with ``python -m pytest tests/path/to/file.py``. - Please write new unit tests to cover any changes you make. diff --git a/docs/databinder.rst b/docs/databinder.rst index c90f0c00..09272885 100644 --- a/docs/databinder.rst +++ b/docs/databinder.rst @@ -22,5 +22,3 @@ This page documents the classes used to describe a DataBinder request. :model-show-validator-summary: False :model-show-field-summary: False :undoc-members: - - diff --git a/docs/dataset_group.rst b/docs/dataset_group.rst index b1247e73..3e8216cb 100644 --- a/docs/dataset_group.rst +++ b/docs/dataset_group.rst @@ -3,4 +3,4 @@ DatasetGroup .. autoclass:: servicex.dataset_group.DatasetGroup :members: :undoc-members: - :show-inheritance: \ No newline at end of file + :show-inheritance: diff --git a/docs/datasets.rst b/docs/datasets.rst index ac2c42f1..0b2bd3f3 100644 --- a/docs/datasets.rst +++ b/docs/datasets.rst @@ -27,4 +27,4 @@ Files can also be located using wildcard patterns with XRootD. So, for example, *Note: available from ServiceX client version 3.0.1.* * Python: ``{ "Dataset": servicex.dataset.XRootD("root://eospublic.cern.ch//eos/opendata/mystuff/*") }`` - * YAML: ``Dataset: !XRootD root://eospublic.cern.ch//eos/opendata/mystuff/*`` \ No newline at end of file + * YAML: ``Dataset: !XRootD root://eospublic.cern.ch//eos/opendata/mystuff/*`` diff --git a/docs/errors.md b/docs/errors.md index baed8ea2..0de2dbfa 100644 --- a/docs/errors.md +++ b/docs/errors.md @@ -1,6 +1,6 @@ # Error Handling -Sometimes things go wrong in a ServiceX query, due to a badly-formed query, unavailable input files, server error, or something else. Issues can arise either before submission, during submission, after submission, or when retrieving results. +Sometimes things go wrong in a ServiceX query, due to a badly-formed query, unavailable input files, server error, or something else. Issues can arise either before submission, during submission, after submission, or when retrieving results. ## Errors before submission * If the request does not parse properly, a `ValidationError` will be raised and nothing will be submitted. @@ -27,4 +27,4 @@ If an error occurs after submission, `deliver()` will return a dict unless a sev * If a request cannot be submitted at all (for example, somehow a unparseable query is sent) then a `RuntimeError` will be raised for the corresponding sample. * If a transformation for a specific sample is canceled or ServiceX signals a fatal error on the backend, it will raise a `ServiceXException` for that sample. * If a transformation for a specific sample does not fully process all files, the results will depend on whether the `fail_if_incomplete` argument to `deliver()` was set to `True` (default) or `False`. If `True`, then a `ServiceXException` will be raised for that sample. If `False`, then a list of the available partial results will be returned. In either case the results will *not* be cached to the local cache database --- rerunning the code will resubmit the request. An error message will be printed with a link to a web page which summarizes errors on the server associated with the transformation. This can be caused by any runtime error: frequent causes are input files that are unavailable or errors in the query that can only be checked at run time (e.g. requesting branches that do not exist). -* If an error occurs during download (lack of disk space, permission errors, problems connecting to the remote storage, etc.) an appropriate exception will be raised for the corresponding sample. \ No newline at end of file +* If an error occurs during download (lack of disk space, permission errors, problems connecting to the remote storage, etc.) an appropriate exception will be raised for the corresponding sample. diff --git a/docs/examples.rst b/docs/examples.rst index 499d95d6..8097b70d 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -7,24 +7,24 @@ as a typed Python object. Examples For Each Query Type ----------------------------- -Examples for each query type (``Uproot-Raw``, ``Func_ADL Uproot``, ``Python Function``) +Examples for each query type (``Uproot-Raw``, ``Func_ADL Uproot``, ``Python Function``) and three different representations (``Python Dictionary``, ``Python Typed Object``, and ``YAML``). -Note that all examples extract the same branch of the same tree (``ROOT TTree``) from +Note that all examples extract the same branch of the same tree (``ROOT TTree``) from the same :ref:`dataset `. Uproot-Raw Query Example ~~~~~~~~~~~~~~~~~~~~~~~~~ -This example uses the raw uproot query type to extract the ``AnalysisElectronsAuxDyn.pt`` branch +This example uses the raw uproot query type to extract the ``AnalysisElectronsAuxDyn.pt`` branch from the ``CollectionTree`` tree in ATLAS PHYSLITE OpenData Dataset. .. tabs:: - + .. tab:: *Python Dict* .. literalinclude:: ../examples/UprootRaw_Dict.py :language: python - + .. tab:: *Python Typed Object* .. literalinclude:: ../examples/UprootRaw_Typed.py @@ -38,8 +38,8 @@ from the ``CollectionTree`` tree in ATLAS PHYSLITE OpenData Dataset. Func_ADL Uproot Query Example ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example uses an Func_ADL to extract the ``AnalysisElectronsAuxDyn.pt`` branch -from the ``CollectionTree`` tree in ATLAS PHYSLITE OpenData Dataset. +This example uses an Func_ADL to extract the ``AnalysisElectronsAuxDyn.pt`` branch +from the ``CollectionTree`` tree in ATLAS PHYSLITE OpenData Dataset. .. tabs:: @@ -89,8 +89,8 @@ For this second example, make sure the extra package ``func_adl_servicex_xaodr22 Python Function Query Example ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example uses an uproot python function to extract the ``AnalysisElectronsAuxDyn.pt`` branch -from the ``CollectionTree`` tree in ATLAS PHYSLITE OpenData Dataset. +This example uses an uproot python function to extract the ``AnalysisElectronsAuxDyn.pt`` branch +from the ``CollectionTree`` tree in ATLAS PHYSLITE OpenData Dataset. Note that you can specify a python function even in a yaml file. .. tabs:: @@ -114,8 +114,8 @@ Note that you can specify a python function even in a yaml file. The ServiceX Deliver Function ------------------------------ The ``deliver`` function is used to submit a request to ServiceX. It takes a request in one of the -three formats and returns a python dictionary with the name of the sample as a key -and a list of URLs or local file paths as a value. +three formats and returns a python dictionary with the name of the sample as a key +and a list of URLs or local file paths as a value. How to Use YAML Specification @@ -132,9 +132,9 @@ You can use the following code: ) .. _label_dataset: - + The Dataset in Examples ----------------------- The dataset in the examples is publically accessible ATLAS Open Data -(`ATLAS DAOD PHYSLITE format Run 2 2016 proton-proton collision data +(`ATLAS DAOD PHYSLITE format Run 2 2016 proton-proton collision data `_). diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 68c5934a..2d661d1a 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -66,7 +66,7 @@ Expected output: .. code-block:: python JetPt - entry + entry 0 36.319766 1 34.331914 2 16.590844 @@ -91,7 +91,7 @@ so we can access the files directly. .. code-block:: python - + import ast import qastle @@ -145,4 +145,4 @@ Expected output: Next steps ----------- -Check out the [requests guide](requests.md) to learn more about specifying transformation requests using func-ADL. \ No newline at end of file +Check out the [requests guide](requests.md) to learn more about specifying transformation requests using func-ADL. diff --git a/docs/introduction.rst b/docs/introduction.rst index b7225bc5..dfe8ebff 100644 --- a/docs/introduction.rst +++ b/docs/introduction.rst @@ -25,4 +25,4 @@ data needed for an analysis. ServiceX is designed to feed columns to a user running an analysis (e.g. via `Awkward `_ or `Coffea `_ tools) based on the results of a query designed by -the user. \ No newline at end of file +the user. diff --git a/docs/query_types.rst b/docs/query_types.rst index 8e9288fb..6fb76d80 100644 --- a/docs/query_types.rst +++ b/docs/query_types.rst @@ -117,7 +117,7 @@ code, or uproot python code. An example that fetches the :math:`p_T, \eta` and EM fraction of jets from an ATLAS PHYSLITE file is as follows: .. code-block:: python - + from func_adl_servicex_xaodr22 import FuncADLQueryPHYSLITE, cpp_float query = FuncADLQueryPHYSLITE() diff --git a/docs/servicex.databinder.rst b/docs/servicex.databinder.rst index 85ea7cf5..49589226 100644 --- a/docs/servicex.databinder.rst +++ b/docs/servicex.databinder.rst @@ -20,4 +20,3 @@ servicex.databinder_models :model-show-validator-summary: False :model-show-field-summary: False :undoc-members: - diff --git a/docs/transform_request.rst b/docs/transform_request.rst index 83b33fb7..15f0409d 100644 --- a/docs/transform_request.rst +++ b/docs/transform_request.rst @@ -4,7 +4,7 @@ Transformation Request Specify a Request ----------------- Transform requests are specified with one or more Sample specifications, and -optionally a General section and one or more definitions which are substituted +optionally a General section and one or more definitions which are substituted into the Sample specifications. These requests can be defined as: @@ -36,10 +36,10 @@ The General section of the request includes the following fields: * (Optional) ``Delivery``: Can be ``LocalCache`` (default) to download the results to the system on which you are running the client, or ``URLs`` to provide HTTPS URLs to the output on the ServiceX storage (you can also use the enums ``servicex.Delivery.LocalCache`` and ``servicex.Delivery.URLs``). The output files on the ServiceX storage will periodically get cleaned, so if you need to keep the results for long term use it is recommended that you download the output to local cache, but for transient use the URLs will be faster. In general, if you are running on your laptop away from the ServiceX site and are working with a small amount of -data, select ``LocalCache`` for ``Delivery``. If you are located at an analysis facility, please select ``URLs``. +data, select ``LocalCache`` for ``Delivery``. If you are located at an analysis facility, please select ``URLs``. The Definitions Sections ^^^^^^^^^^^^^^^^^^^^^^^^ The Definitions section (only available when setting up the request using YAML files) is a list of values that can be substituted into fields in the Sample -sections, defined using the YAML anchor/alias syntax. This is useful for defining common values that are used in multiple samples. This is an advanced concept. \ No newline at end of file +sections, defined using the YAML anchor/alias syntax. This is useful for defining common values that are used in multiple samples. This is an advanced concept. diff --git a/docs/transformer_matrix.md b/docs/transformer_matrix.md index 60eacb26..1242434c 100644 --- a/docs/transformer_matrix.md +++ b/docs/transformer_matrix.md @@ -40,8 +40,8 @@ Let's look at the structure of an Uproot-Raw query. ```python query = [ { - 'treename': 'reco', - 'filter_name': ['/mu.*/', 'runNumber', 'lbn', 'jet_pt_*'], + 'treename': 'reco', + 'filter_name': ['/mu.*/', 'runNumber', 'lbn', 'jet_pt_*'], 'cut':'(count_nonzero(jet_pt_NOSYS>40e3, axis=1)>=4)' }, { @@ -59,7 +59,7 @@ Each dictionary either has a `treename` key (indicating that it is a query on a * `expressions`, `cut`, `filter_name`, `aliases`: have the same meaning as for [`TTree.arrays()`](https://uproot.readthedocs.io/en/latest/uproot.behaviors.TTree.TTree.html#arrays) in `uproot`, except that functions aren't permitted (but *glob*s and _regular expressions_, which are special kinds of strings, are). Other keys will be ignored. - + Most queries will probably use `filter_names`, which selects specific branches, and `cut`, which selects specific rows. The `expressions` argument permits new values to be computed from the branches in the tree, and `aliases` can be used to introduce shorthand to make these expressions cleaner. The Uproot-Raw language extends the default `uproot` expression language by adding many functions from Awkward Array (the example above uses `awkward.count_nonzero`). This permits very powerful expressions for cuts and expression evaluation. diff --git a/docs/troubleshoot.rst b/docs/troubleshoot.rst index e07cb4ae..106d76df 100644 --- a/docs/troubleshoot.rst +++ b/docs/troubleshoot.rst @@ -1,4 +1,4 @@ Troubleshooting --------------- -Common errors encountered in Servicex and their remedies \ No newline at end of file +Common errors encountered in Servicex and their remedies diff --git a/docs/yaml.md b/docs/yaml.md index c6a32200..c2475358 100644 --- a/docs/yaml.md +++ b/docs/yaml.md @@ -43,4 +43,4 @@ which includes a `definitions.yaml` file that looks like this: By factoring the files like this and using anchors and aliases, the top-level file can be kept readable. ## A note on string handling -YAML tries to provide ways to "naturally" embed multiline strings in the configuration files. This can sometimes lead to somewhat unexpected results. We recommend the "block scalar" style, introducing a multiline string by starting with a pipe (|) and using a constant indentation for each line of the string that follows. You might find [this site](https://yaml-multiline.info/) useful to demonstrate the various potential modes. \ No newline at end of file +YAML tries to provide ways to "naturally" embed multiline strings in the configuration files. This can sometimes lead to somewhat unexpected results. We recommend the "block scalar" style, introducing a multiline string by starting with a pipe (|) and using a constant indentation for each line of the string that follows. You might find [this site](https://yaml-multiline.info/) useful to demonstrate the various potential modes. diff --git a/examples/FuncADL_Uproot_Dict.py b/examples/FuncADL_Uproot_Dict.py index 54b73f95..bee2f998 100644 --- a/examples/FuncADL_Uproot_Dict.py +++ b/examples/FuncADL_Uproot_Dict.py @@ -2,19 +2,21 @@ spec = { - 'Sample': [{ - 'Name': "FuncADL_Uproot_Dict", - 'Dataset': dataset.FileList( - [ - "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501 - "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501 - "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501 - ] - ), - 'Query': query.FuncADL_Uproot() - .FromTree('CollectionTree') - .Select(lambda e: {'el_pt': e['AnalysisElectronsAuxDyn.pt']}) # type: ignore - }] + "Sample": [ + { + "Name": "FuncADL_Uproot_Dict", + "Dataset": dataset.FileList( + [ + "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501 + "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501 + "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501 + ] + ), + "Query": query.FuncADL_Uproot() + .FromTree("CollectionTree") + .Select(lambda e: {"el_pt": e["AnalysisElectronsAuxDyn.pt"]}), # type: ignore + } + ] } print(f"Files: {deliver(spec)}") diff --git a/examples/FuncADL_Uproot_Typed.py b/examples/FuncADL_Uproot_Typed.py index b2c431c4..536c9e21 100644 --- a/examples/FuncADL_Uproot_Typed.py +++ b/examples/FuncADL_Uproot_Typed.py @@ -13,8 +13,10 @@ ] ), Query=query.FuncADL_Uproot() - .FromTree('CollectionTree') - .Select(lambda e: {'el_pt': e['AnalysisElectronsAuxDyn.pt']}) # type:ignore + .FromTree("CollectionTree") + .Select( + lambda e: {"el_pt": e["AnalysisElectronsAuxDyn.pt"]} + ), # type:ignore ) ] ) diff --git a/examples/PythonFunction_Dict.py b/examples/PythonFunction_Dict.py index eb3c6a55..ae0d3e39 100644 --- a/examples/PythonFunction_Dict.py +++ b/examples/PythonFunction_Dict.py @@ -3,23 +3,26 @@ def run_query(input_filenames=None): import uproot # type: ignore + with uproot.open({input_filenames: "CollectionTree"}) as o: br = o.arrays("AnalysisElectronsAuxDyn.pt") return br spec = { - 'Sample': [{ - 'Name': "PythonFunction_Dict", - 'Dataset': dataset.FileList( - [ - "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501 - "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501 - "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501 - ] - ), - 'Query': query.PythonFunction().with_uproot_function(run_query) - }] + "Sample": [ + { + "Name": "PythonFunction_Dict", + "Dataset": dataset.FileList( + [ + "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501 + "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501 + "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501 + ] + ), + "Query": query.PythonFunction().with_uproot_function(run_query), + } + ] } print(f"Files: {deliver(spec)}") diff --git a/examples/PythonFunction_Typed.py b/examples/PythonFunction_Typed.py index 051d3393..007386cb 100644 --- a/examples/PythonFunction_Typed.py +++ b/examples/PythonFunction_Typed.py @@ -3,6 +3,7 @@ def run_query(input_filenames=None): import uproot # type: ignore + with uproot.open({input_filenames: "CollectionTree"}) as o: br = o.arrays("AnalysisElectronsAuxDyn.pt") return br @@ -19,7 +20,7 @@ def run_query(input_filenames=None): "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501 ] ), - Query=query.PythonFunction().with_uproot_function(run_query) + Query=query.PythonFunction().with_uproot_function(run_query), ) ] ) diff --git a/examples/UprootRaw_Dict.py b/examples/UprootRaw_Dict.py index cc9e7461..be79645b 100644 --- a/examples/UprootRaw_Dict.py +++ b/examples/UprootRaw_Dict.py @@ -2,24 +2,26 @@ spec = { - 'Sample': [{ - 'Name': "UprootRaw_Dict", - 'Dataset': dataset.FileList( - [ - "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501 - "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501 - "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501 - ] - ), - 'Query': query.UprootRaw( - [ - { - "treename": "CollectionTree", - "filter_name": "AnalysisElectronsAuxDyn.pt", - } - ] - ) - }] + "Sample": [ + { + "Name": "UprootRaw_Dict", + "Dataset": dataset.FileList( + [ + "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501 + "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501 + "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501 + ] + ), + "Query": query.UprootRaw( + [ + { + "treename": "CollectionTree", + "filter_name": "AnalysisElectronsAuxDyn.pt", + } + ] + ), + } + ] } print(f"Files: {deliver(spec)}") diff --git a/examples/UprootRaw_Typed.py b/examples/UprootRaw_Typed.py index de4964bf..5897cefd 100644 --- a/examples/UprootRaw_Typed.py +++ b/examples/UprootRaw_Typed.py @@ -19,7 +19,7 @@ "filter_name": "AnalysisElectronsAuxDyn.pt", } ] - ) + ), ) ] ) diff --git a/examples/config_FuncADL_Uproot.yaml b/examples/config_FuncADL_Uproot.yaml index bb8b2c42..77272114 100644 --- a/examples/config_FuncADL_Uproot.yaml +++ b/examples/config_FuncADL_Uproot.yaml @@ -2,7 +2,7 @@ Sample: - Name: Uproot_FuncADL_YAML - Dataset: !FileList + Dataset: !FileList [ "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", diff --git a/examples/config_PythonFunction.yaml b/examples/config_PythonFunction.yaml index b6802383..627251a2 100644 --- a/examples/config_PythonFunction.yaml +++ b/examples/config_PythonFunction.yaml @@ -2,7 +2,7 @@ Sample: - Name: Uproot_PythonFunction_YAML - Dataset: !FileList + Dataset: !FileList [ "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", @@ -14,4 +14,4 @@ Sample: with uproot.open({input_filenames:"CollectionTree"}) as o: br = o.arrays("AnalysisElectronsAuxDyn.pt") - return br \ No newline at end of file + return br diff --git a/examples/config_UprootRaw.yaml b/examples/config_UprootRaw.yaml index c73cf3c6..766408ff 100644 --- a/examples/config_UprootRaw.yaml +++ b/examples/config_UprootRaw.yaml @@ -2,11 +2,11 @@ Sample: - Name: UprootRaw_YAML - Dataset: !FileList + Dataset: !FileList [ "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", "root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", ] Query: !UprootRaw | - [{"treename":"CollectionTree", "filter_name": "AnalysisElectronsAuxDyn.pt"}] \ No newline at end of file + [{"treename":"CollectionTree", "filter_name": "AnalysisElectronsAuxDyn.pt"}] diff --git a/examples/func_adl_xAOD_simple.py b/examples/func_adl_xAOD_simple.py index 353f19f7..08c9045e 100644 --- a/examples/func_adl_xAOD_simple.py +++ b/examples/func_adl_xAOD_simple.py @@ -3,24 +3,26 @@ def func_adl_xaod_simple(): query = q.FuncADL_ATLASr22() # type: ignore - jets_per_event = query.Select(lambda e: e.Jets('AnalysisJets')) + jets_per_event = query.Select(lambda e: e.Jets("AnalysisJets")) jet_info_per_event = jets_per_event.Select( lambda jets: { - 'pt': jets.Select(lambda j: j.pt()), - 'eta': jets.Select(lambda j: j.eta()) + "pt": jets.Select(lambda j: j.pt()), + "eta": jets.Select(lambda j: j.eta()), } ) spec = { - 'Sample': [{ - 'Name': "func_adl_xAOD_simple", - 'Dataset': dataset.FileList( - [ - "root://eospublic.cern.ch//eos/opendata/atlas/rucio/mc20_13TeV/DAOD_PHYSLITE.37622528._000013.pool.root.1", # noqa: E501 - ] - ), - 'Query': jet_info_per_event - }] + "Sample": [ + { + "Name": "func_adl_xAOD_simple", + "Dataset": dataset.FileList( + [ + "root://eospublic.cern.ch//eos/opendata/atlas/rucio/mc20_13TeV/DAOD_PHYSLITE.37622528._000013.pool.root.1", # noqa: E501 + ] + ), + "Query": jet_info_per_event, + } + ] } files = deliver(spec, servicex_name="servicex-uc-af") assert files is not None, "No files returned from deliver! Internal error" @@ -29,4 +31,4 @@ def func_adl_xaod_simple(): if __name__ == "__main__": files = func_adl_xaod_simple() - assert len(files['func_adl_xAOD_simple']) == 1 + assert len(files["func_adl_xAOD_simple"]) == 1 diff --git a/examples/func_adl_xAOD_typed.py b/examples/func_adl_xAOD_typed.py index c8588c9e..7b675846 100644 --- a/examples/func_adl_xAOD_typed.py +++ b/examples/func_adl_xAOD_typed.py @@ -4,26 +4,28 @@ def func_adl_xaod_typed(): query = FuncADLQueryPHYSLITE() # type: ignore - jets_per_event = query.Select(lambda e: e.Jets('AnalysisJets')) + jets_per_event = query.Select(lambda e: e.Jets("AnalysisJets")) jet_info_per_event = jets_per_event.Select( lambda jets: { - 'pt': jets.Select(lambda j: j.pt()), - 'eta': jets.Select(lambda j: j.eta()), - 'emf': jets.Select(lambda j: j.getAttribute[cpp_float]('EMFrac')) # type: ignore + "pt": jets.Select(lambda j: j.pt()), + "eta": jets.Select(lambda j: j.eta()), + "emf": jets.Select(lambda j: j.getAttribute[cpp_float]("EMFrac")), # type: ignore } ) spec = { - 'Sample': [{ - 'Name': "func_adl_xAOD_simple", - 'Dataset': dataset.FileList( - [ - "root://eospublic.cern.ch//eos/opendata/atlas/rucio/mc20_13TeV/DAOD_PHYSLITE.37622528._000013.pool.root.1", # noqa: E501 - ] - ), - 'Query': jet_info_per_event, - 'Codegen': 'atlasr22', - }] + "Sample": [ + { + "Name": "func_adl_xAOD_simple", + "Dataset": dataset.FileList( + [ + "root://eospublic.cern.ch//eos/opendata/atlas/rucio/mc20_13TeV/DAOD_PHYSLITE.37622528._000013.pool.root.1", # noqa: E501 + ] + ), + "Query": jet_info_per_event, + "Codegen": "atlasr22", + } + ] } files = deliver(spec, servicex_name="servicex-uc-af") assert files is not None, "No files returned from deliver! Internal error" @@ -32,4 +34,4 @@ def func_adl_xaod_typed(): if __name__ == "__main__": files = func_adl_xaod_typed() - assert len(files['func_adl_xAOD_simple']) == 1 + assert len(files["func_adl_xAOD_simple"]) == 1 diff --git a/servicex/__init__.py b/servicex/__init__.py index b54f9d03..4d8e523e 100644 --- a/servicex/__init__.py +++ b/servicex/__init__.py @@ -43,5 +43,5 @@ "ServiceXSpec", "deliver", "dataset", - "query" + "query", ] diff --git a/servicex/app/__init__.py b/servicex/app/__init__.py index bf610086..da4f95a8 100644 --- a/servicex/app/__init__.py +++ b/servicex/app/__init__.py @@ -26,8 +26,10 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + def is_terminal_output(): import sys + return sys.stdout.isatty() @@ -39,8 +41,10 @@ def pipeable_table(title: str): from rich.table import Table import rich.box - table = Table(title=title if is_terminal_output() else None, - show_header=is_terminal_output(), - box=rich.box.HEAVY_HEAD if is_terminal_output() else None) + table = Table( + title=title if is_terminal_output() else None, + show_header=is_terminal_output(), + box=rich.box.HEAVY_HEAD if is_terminal_output() else None, + ) return table diff --git a/servicex/app/cache.py b/servicex/app/cache.py index 085bd489..834051c2 100644 --- a/servicex/app/cache.py +++ b/servicex/app/cache.py @@ -67,13 +67,15 @@ def list(): r.request_id, r.submit_time.astimezone().strftime("%a, %Y-%m-%d %H:%M"), str(r.files), - r.result_format + r.result_format, ) rich.print(table) @cache_app.command() -def clear(force: bool = typer.Option(False, "-y", help="Force, don't ask for permission")): +def clear( + force: bool = typer.Option(False, "-y", help="Force, don't ask for permission") +): """ Clear the local query cache """ diff --git a/servicex/app/cli_options.py b/servicex/app/cli_options.py index ae09cf84..437b7de6 100644 --- a/servicex/app/cli_options.py +++ b/servicex/app/cli_options.py @@ -28,7 +28,9 @@ import typer -backend_cli_option = typer.Option(None, "-b", "--backend", - help="Name of backend server from .servicex file") -config_file_option = typer.Option(None, "-c", "--config", - help="ServiceX client configuration file") +backend_cli_option = typer.Option( + None, "-b", "--backend", help="Name of backend server from .servicex file" +) +config_file_option = typer.Option( + None, "-c", "--config", help="ServiceX client configuration file" +) diff --git a/servicex/app/codegen.py b/servicex/app/codegen.py index e4c41d20..70ac2af3 100644 --- a/servicex/app/codegen.py +++ b/servicex/app/codegen.py @@ -38,8 +38,9 @@ @codegen_app.command(no_args_is_help=False) def flush( - backend: Optional[str] = backend_cli_option, - config_path: Optional[str] = config_file_option): + backend: Optional[str] = backend_cli_option, + config_path: Optional[str] = config_file_option, +): """ Flush the available code generators from the cache """ @@ -51,8 +52,9 @@ def flush( @codegen_app.command(no_args_is_help=False) def list( - backend: Optional[str] = backend_cli_option, - config_path: Optional[str] = config_file_option): + backend: Optional[str] = backend_cli_option, + config_path: Optional[str] = config_file_option, +): """ List the available code generators """ diff --git a/servicex/app/datasets.py b/servicex/app/datasets.py index 5604558c..ca33c62c 100644 --- a/servicex/app/datasets.py +++ b/servicex/app/datasets.py @@ -43,18 +43,18 @@ @datasets_app.command(no_args_is_help=False) def list( - backend: Optional[str] = backend_cli_option, - config_path: Optional[str] = config_file_option, - did_finder: Optional[str] = typer.Option( - None, - help="Filter datasets by DID finder. Some useful values are 'rucio' or 'user'", - show_default=False, - ), - show_deleted: Optional[bool] = typer.Option( - False, - help="Show deleted datasets", - show_default=True, - ), + backend: Optional[str] = backend_cli_option, + config_path: Optional[str] = config_file_option, + did_finder: Optional[str] = typer.Option( + None, + help="Filter datasets by DID finder. Some useful values are 'rucio' or 'user'", + show_default=False, + ), + show_deleted: Optional[bool] = typer.Option( + False, + help="Show deleted datasets", + show_default=True, + ), ): """ List the datasets. Use fancy formatting if printing to a terminal. @@ -71,7 +71,9 @@ def list( if show_deleted: table.add_column("Deleted") - datasets = asyncio.run(sx.get_datasets(did_finder=did_finder, show_deleted=show_deleted)) + datasets = asyncio.run( + sx.get_datasets(did_finder=did_finder, show_deleted=show_deleted) + ) for d in datasets: # Format the CachedDataset object into a table row # The last_updated field is what we should be displaying, but that is @@ -80,7 +82,7 @@ def list( # https://github.com/ssl-hep/ServiceX/issues/906 is resolved d_name = d.name if d.did_finder != "user" else "File list" is_stale = "Yes" if d.is_stale else "" - last_used = d.last_used.strftime('%Y-%m-%dT%H:%M:%S') + last_used = d.last_used.strftime("%Y-%m-%dT%H:%M:%S") table.add_row( str(d.id), d_name, @@ -88,16 +90,16 @@ def list( "{:,}MB".format(round(d.size / 1e6)), d.lookup_status, last_used, - is_stale + is_stale, ) rich.print(table) @datasets_app.command(no_args_is_help=True) def get( - backend: Optional[str] = backend_cli_option, - config_path: Optional[str] = config_file_option, - dataset_id: int = typer.Argument(..., help="The ID of the dataset to get") + backend: Optional[str] = backend_cli_option, + config_path: Optional[str] = config_file_option, + dataset_id: int = typer.Argument(..., help="The ID of the dataset to get"), ): """ Get the details of a dataset. Output as a pretty, nested table if printing to a terminal. @@ -116,31 +118,32 @@ def get( for file in dataset.files: sub_table = Table(title="") sub_table.add_column(f"File ID: {file.id}") - for path in file.paths.split(','): + for path in file.paths.split(","): sub_table.add_row(path) - table.add_row( - sub_table - ) + table.add_row(sub_table) # Set alternating row styles table.row_styles = ["", ""] rich.print(table) else: - data = {"dataset": { - "id": dataset.id, - "name": dataset.name, - "files": [ - {"id": file.id, "paths": file.paths.split(',')} for file in dataset.files - ]} + data = { + "dataset": { + "id": dataset.id, + "name": dataset.name, + "files": [ + {"id": file.id, "paths": file.paths.split(",")} + for file in dataset.files + ], + } } rich.print_json(data=data) @datasets_app.command(no_args_is_help=True) def delete( - backend: Optional[str] = backend_cli_option, - config_path: Optional[str] = config_file_option, - dataset_id: int = typer.Argument(..., help="The ID of the dataset to delete") + backend: Optional[str] = backend_cli_option, + config_path: Optional[str] = config_file_option, + dataset_id: int = typer.Argument(..., help="The ID of the dataset to delete"), ): sx = ServiceXClient(backend=backend, config_path=config_path) result = asyncio.run(sx.delete_dataset(dataset_id)) diff --git a/servicex/app/main.py b/servicex/app/main.py index 3bbedc07..26863154 100644 --- a/servicex/app/main.py +++ b/servicex/app/main.py @@ -67,20 +67,24 @@ def main_info( @app.command() def deliver( - backend: Optional[str] = backend_cli_option, - config_path: Optional[str] = config_file_option, - spec_file: str = typer.Argument(..., help="Spec file to submit to serviceX"), - ignore_cache: Optional[bool] = typer.Option( - None, "--ignore-cache", help="Ignore local cache and always submit to ServiceX") + backend: Optional[str] = backend_cli_option, + config_path: Optional[str] = config_file_option, + spec_file: str = typer.Argument(..., help="Spec file to submit to serviceX"), + ignore_cache: Optional[bool] = typer.Option( + None, "--ignore-cache", help="Ignore local cache and always submit to ServiceX" + ), ): """ Deliver a file to the ServiceX cache. """ print(f"Delivering {spec_file} to ServiceX cache") - results = servicex_client.deliver(spec_file, servicex_name=backend, - config_path=config_path, - ignore_local_cache=ignore_cache) + results = servicex_client.deliver( + spec_file, + servicex_name=backend, + config_path=config_path, + ignore_local_cache=ignore_cache, + ) rich.print(results) diff --git a/servicex/app/transforms.py b/servicex/app/transforms.py index 4007ad7c..b4fed837 100644 --- a/servicex/app/transforms.py +++ b/servicex/app/transforms.py @@ -88,9 +88,7 @@ def transform_filter(status: Status) -> bool: transforms = sx.get_transforms() for t in transforms: if transform_filter(t.status): - table.add_row( - t.request_id, t.title, t.status, str(t.files_completed) - ) + table.add_row(t.request_id, t.title, t.status, str(t.files_completed)) rich.print(table) @@ -104,6 +102,7 @@ def files( """ List the files that were produced by a transform. """ + async def list_files(sx: ServiceXClient, transform_id: str) -> List[ResultFile]: transform = await sx.get_transform_status_async(transform_id) minio = MinioAdapter.for_transform(transform) @@ -126,11 +125,14 @@ def download( config_path: Optional[str] = config_file_option, transform_id: str = typer.Argument(help="Transform ID"), local_dir: str = typer.Option(".", "-d", help="Local dir to download to"), - concurrency: int = typer.Option(20, "--concurrency", help="Number of concurrent downloads"), + concurrency: int = typer.Option( + 20, "--concurrency", help="Number of concurrent downloads" + ), ): """ Download the files that were produced by a transform. """ + async def download_files(sx: ServiceXClient, transform_id: str, local_dir): s3_semaphore = asyncio.Semaphore(concurrency) @@ -164,9 +166,9 @@ async def download_with_progress(filename) -> Path: @transforms_app.command(no_args_is_help=True) def delete( - backend: Optional[str] = backend_cli_option, - config_path: Optional[str] = config_file_option, - transform_id_list: List[str] = typer.Argument(help="Transform ID"), + backend: Optional[str] = backend_cli_option, + config_path: Optional[str] = config_file_option, + transform_id_list: List[str] = typer.Argument(help="Transform ID"), ): """ Delete a completed transform along with the result files. @@ -181,9 +183,9 @@ def delete( @transforms_app.command(no_args_is_help=True) def cancel( - backend: Optional[str] = backend_cli_option, - config_path: Optional[str] = config_file_option, - transform_id_list: List[str] = typer.Argument(help="Transform ID"), + backend: Optional[str] = backend_cli_option, + config_path: Optional[str] = config_file_option, + transform_id_list: List[str] = typer.Argument(help="Transform ID"), ): """ Cancel a running transform request. @@ -200,7 +202,7 @@ class TimeFrame(str, Enum): """ day = ("day",) week = ("week",) - month = ("month") + month = ("month",) class LogLevel(str, Enum): @@ -235,13 +237,17 @@ def select_time(time_frame=TimeFrame.day): return time_string -def create_kibana_link_parameters(log_url, transform_id=None, log_level=None, time_frame=None): +def create_kibana_link_parameters( + log_url, transform_id=None, log_level=None, time_frame=None +): """ Create the _a and _g parameters for the kibana dashboard link """ if log_level: - a_parameter = f"&_a=(filters:!({add_query('requestId', transform_id)},"\ - f"{add_query('level', log_level.value.lower())}))" + a_parameter = ( + f"&_a=(filters:!({add_query('requestId', transform_id)}," + f"{add_query('level', log_level.value.lower())}))" + ) else: a_parameter = f"&_a=(filters:!({add_query('requestId', transform_id)}))" g_parameter = f"&_g=({select_time(time_frame.value.lower())})" @@ -253,12 +259,12 @@ def create_kibana_link_parameters(log_url, transform_id=None, log_level=None, ti def logs( backend: Optional[str] = backend_cli_option, transform_id: str = typer.Argument(help="Transform ID"), - log_level: Optional[LogLevel] = typer.Option("ERROR", "-l", "--log-level", - help="Level of Logs", - case_sensitive=False), - time_frame: Optional[TimeFrame] = typer.Option("month", "-f", "--time-frame", - help="Time Frame", - case_sensitive=False) + log_level: Optional[LogLevel] = typer.Option( + "ERROR", "-l", "--log-level", help="Level of Logs", case_sensitive=False + ), + time_frame: Optional[TimeFrame] = typer.Option( + "month", "-f", "--time-frame", help="Time Frame", case_sensitive=False + ), ): """ Open the URL to the Kibana dashboard of the logs of a tranformer @@ -266,10 +272,12 @@ def logs( sx = ServiceXClient(backend=backend) transforms = sx.get_transform_status(transform_id) if transforms and transforms.request_id == transform_id: - kibana_link = create_kibana_link_parameters(transforms.log_url, - transform_id=transform_id, - log_level=log_level, - time_frame=time_frame) + kibana_link = create_kibana_link_parameters( + transforms.log_url, + transform_id=transform_id, + log_level=log_level, + time_frame=time_frame, + ) print(kibana_link) webbrowser.open(kibana_link) else: diff --git a/servicex/databinder_models.py b/servicex/databinder_models.py index 339d195c..ac30b5ab 100644 --- a/servicex/databinder_models.py +++ b/servicex/databinder_models.py @@ -30,12 +30,16 @@ from typing import Union, Optional, List from pydantic import ( Field, - model_validator, field_validator, + model_validator, + field_validator, ) import logging -from servicex.dataset_identifier import (DataSetIdentifier, RucioDatasetIdentifier, - FileListDataset) +from servicex.dataset_identifier import ( + DataSetIdentifier, + RucioDatasetIdentifier, + FileListDataset, +) from servicex.query_core import QueryStringGenerator from servicex.models import ResultFormat, DocStringBaseModel @@ -46,7 +50,8 @@ class Sample(DocStringBaseModel): """ Represents a single transform request within a larger submission. """ - model_config = {'use_attribute_docstrings': True} + + model_config = {"use_attribute_docstrings": True} Name: str """ @@ -120,7 +125,9 @@ def validate_did_xor_file(cls, values): :param values: :return: """ - count = sum(["RucioDID" in values, "XRootDFiles" in values, "Dataset" in values]) + count = sum( + ["RucioDID" in values, "XRootDFiles" in values, "Dataset" in values] + ) if count > 1: raise ValueError("Only specify one of Dataset, XRootDFiles, or RucioDID.") if count == 0: @@ -155,9 +162,10 @@ def hash(self): [ self.dataset_identifier.hash, self.NFiles, - self.Query if (not self.Query or isinstance(self.Query, str)) + self.Query + if (not self.Query or isinstance(self.Query, str)) else self.Query.generate_selection_string(), - self.Codegen + self.Codegen, ] ).encode("utf-8") ) @@ -168,12 +176,14 @@ class General(DocStringBaseModel): """ Represents a group of samples to be transformed together. """ - model_config = {'use_attribute_docstrings': True} + + model_config = {"use_attribute_docstrings": True} class OutputFormatEnum(str, Enum): """ Specifies the output format for the transform request. """ + parquet = "parquet" """ Save the output as @@ -187,9 +197,9 @@ class OutputFormatEnum(str, Enum): """ def to_ResultFormat(self) -> ResultFormat: - """ This method is used to convert the OutputFormatEnum enum to the ResultFormat enum, - which is what is actually used for the TransformRequest. This allows us to use - different string values in the two enum classes to maintain backend compatibility + """This method is used to convert the OutputFormatEnum enum to the ResultFormat enum, + which is what is actually used for the TransformRequest. This allows us to use + different string values in the two enum classes to maintain backend compatibility """ if self == self.parquet: return ResultFormat.parquet @@ -230,7 +240,7 @@ class DeliveryEnum(str, Enum): Directory to output a yaml file describing the output files. """ - OutFilesetName: str = 'servicex_fileset' + OutFilesetName: str = "servicex_fileset" """ Name of the yaml file that will be created in the output directory. """ @@ -250,7 +260,8 @@ class ServiceXSpec(DocStringBaseModel): """ ServiceX Submission Specification - pass this into the ServiceX `deliver` function """ - model_config = {'use_attribute_docstrings': True} + + model_config = {"use_attribute_docstrings": True} General: _General = General() """ diff --git a/servicex/dataset/__init__.py b/servicex/dataset/__init__.py index ff14c5aa..7b5b4ebc 100644 --- a/servicex/dataset/__init__.py +++ b/servicex/dataset/__init__.py @@ -26,11 +26,13 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from ..dataset_identifier import (RucioDatasetIdentifier as Rucio, # noqa: F401 - FileListDataset as FileList, - CERNOpenDataDatasetIdentifier as CERNOpenData, - DataSetIdentifier as GenericDataSet, - XRootDDatasetIdentifier as XRootD) +from ..dataset_identifier import ( # noqa: F401 + RucioDatasetIdentifier as Rucio, + FileListDataset as FileList, + CERNOpenDataDatasetIdentifier as CERNOpenData, + DataSetIdentifier as GenericDataSet, + XRootDDatasetIdentifier as XRootD, +) -__any__ = ['Rucio', 'FileList', 'CERNOpenData', 'GenericDataSet', 'XRootD'] +__any__ = ["Rucio", "FileList", "CERNOpenData", "GenericDataSet", "XRootD"] diff --git a/servicex/dataset_group.py b/servicex/dataset_group.py index a5e1e25a..06407d99 100644 --- a/servicex/dataset_group.py +++ b/servicex/dataset_group.py @@ -74,23 +74,27 @@ async def as_signed_urls_async( d.as_signed_urls_async(provided_progress=progress) for d in self.datasets ] - return await asyncio.gather(*self.tasks, return_exceptions=return_exceptions) + return await asyncio.gather( + *self.tasks, return_exceptions=return_exceptions + ) as_signed_urls = make_sync(as_signed_urls_async) - async def as_files_async(self, - display_progress: bool = True, - provided_progress: Optional[Progress] = None, - return_exceptions: bool = False, - ) -> List[Union[TransformedResults, BaseException]]: + async def as_files_async( + self, + display_progress: bool = True, + provided_progress: Optional[Progress] = None, + return_exceptions: bool = False, + ) -> List[Union[TransformedResults, BaseException]]: # preflight auth if self.datasets: await self.datasets[0].servicex._get_authorization() with ExpandableProgress(display_progress, provided_progress) as progress: self.tasks = [ - d.as_files_async(provided_progress=progress) - for d in self.datasets + d.as_files_async(provided_progress=progress) for d in self.datasets ] - return await asyncio.gather(*self.tasks, return_exceptions=return_exceptions) + return await asyncio.gather( + *self.tasks, return_exceptions=return_exceptions + ) as_files = make_sync(as_files_async) diff --git a/servicex/dataset_identifier.py b/servicex/dataset_identifier.py index ee50f3c1..098ec780 100644 --- a/servicex/dataset_identifier.py +++ b/servicex/dataset_identifier.py @@ -36,6 +36,7 @@ class DataSetIdentifier: Base class for specifying the dataset to transform. This can either be a list of xRootD URIs or a rucio DID """ + def __init__(self, scheme: str, dataset: str, num_files: Optional[int] = None): self.scheme = scheme self.dataset = dataset @@ -52,13 +53,7 @@ def populate_transform_request(self, transform_request: TransformRequest) -> Non @property def hash(self): - sha = hashlib.sha256( - str( - [ - self.dataset - ] - ).encode("utf-8") - ) + sha = hashlib.sha256(str([self.dataset]).encode("utf-8")) return sha.hexdigest() @@ -74,14 +69,16 @@ def __init__(self, dataset: str, num_files: Optional[int] = None): returns the same subset of files. """ - if ':' not in dataset: + if ":" not in dataset: # Missing a colon means that no namespace is specified and the request # will fail on the backend - raise ValueError(f'Specified dataset {dataset} is missing a Rucio namespace. ' - 'Please specify the dataset ID in the form "namespace:dataset".') + raise ValueError( + f"Specified dataset {dataset} is missing a Rucio namespace. " + 'Please specify the dataset ID in the form "namespace:dataset".' + ) super().__init__("rucio", dataset, num_files=num_files) - yaml_tag = '!Rucio' + yaml_tag = "!Rucio" @classmethod def from_yaml(cls, _, node): @@ -110,7 +107,7 @@ def populate_transform_request(self, transform_request: TransformRequest) -> Non def did(self): return None - yaml_tag = '!FileList' + yaml_tag = "!FileList" @classmethod def from_yaml(cls, constructor, node): @@ -119,13 +116,7 @@ def from_yaml(cls, constructor, node): @property def hash(self): self.files.sort() - sha = hashlib.sha256( - str( - [ - self.files - ] - ).encode("utf-8") - ) + sha = hashlib.sha256(str([self.files]).encode("utf-8")) return sha.hexdigest() @@ -140,9 +131,9 @@ def __init__(self, dataset: int, num_files: Optional[int] = None): returns the same subset of files. """ - super().__init__("cernopendata", f'{dataset}', num_files=num_files) + super().__init__("cernopendata", f"{dataset}", num_files=num_files) - yaml_tag = '!CERNOpenData' + yaml_tag = "!CERNOpenData" @classmethod def from_yaml(cls, _, node): @@ -162,7 +153,7 @@ def __init__(self, pattern: str, num_files: Optional[int] = None): """ super().__init__("xrootd", pattern, num_files=num_files) - yaml_tag = '!XRootD' + yaml_tag = "!XRootD" @classmethod def from_yaml(cls, _, node): diff --git a/servicex/expandable_progress.py b/servicex/expandable_progress.py index f3f0e638..b8c8b02b 100644 --- a/servicex/expandable_progress.py +++ b/servicex/expandable_progress.py @@ -29,31 +29,40 @@ from typing import Optional -from rich.progress import Progress, TextColumn, BarColumn, MofNCompleteColumn, \ - TimeRemainingColumn, TaskID - - -DEFAULT_STYLE = [TextColumn("[progress.description]{task.description}"), - BarColumn(complete_style="rgb(114,156,31)", - finished_style="rgb(0,255,0)"), - MofNCompleteColumn(), - TimeRemainingColumn(compact=True, elapsed_when_finished=True) - ] - -BROKEN_STYLE = [TextColumn("[progress.description]{task.description}"), - BarColumn(complete_style="rgb(255,0,0)"), - MofNCompleteColumn(), - TimeRemainingColumn(compact=True, elapsed_when_finished=True) - ] +from rich.progress import ( + Progress, + TextColumn, + BarColumn, + MofNCompleteColumn, + TimeRemainingColumn, + TaskID, +) + + +DEFAULT_STYLE = [ + TextColumn("[progress.description]{task.description}"), + BarColumn(complete_style="rgb(114,156,31)", finished_style="rgb(0,255,0)"), + MofNCompleteColumn(), + TimeRemainingColumn(compact=True, elapsed_when_finished=True), +] + +BROKEN_STYLE = [ + TextColumn("[progress.description]{task.description}"), + BarColumn(complete_style="rgb(255,0,0)"), + MofNCompleteColumn(), + TimeRemainingColumn(compact=True, elapsed_when_finished=True), +] class ProgressCounts: - def __init__(self, - description: str, - task_id: TaskID, - start: Optional[int] = None, - total: Optional[int] = None, - completed: Optional[int] = None): + def __init__( + self, + description: str, + task_id: TaskID, + start: Optional[int] = None, + total: Optional[int] = None, + completed: Optional[int] = None, + ): self.description = description self.taskId = task_id @@ -63,10 +72,12 @@ def __init__(self, class ExpandableProgress: - def __init__(self, - display_progress: bool = True, - provided_progress: Optional[Progress | ExpandableProgress] = None, - overall_progress: bool = False): + def __init__( + self, + display_progress: bool = True, + provided_progress: Optional[Progress | ExpandableProgress] = None, + overall_progress: bool = False, + ): """ We want to be able to use rich progress bars in the async code, but there are some situtations where the user doesn't want them. Also we might be running @@ -91,8 +102,11 @@ def __init__(self, self.progress = TranformStatusProgress(*DEFAULT_STYLE) if provided_progress: - self.progress = provided_progress if isinstance(provided_progress, Progress) \ + self.progress = ( + provided_progress + if isinstance(provided_progress, Progress) else provided_progress.progress + ) else: self.progress = None @@ -123,14 +137,16 @@ def add_task(self, param, start, total): not self.overall_progress_download_task and not self.overall_progress_transform_task ): - self.overall_progress_transform_task = self.progress.add_task("Transform", - start=False, - total=None) - self.overall_progress_download_task = self.progress.add_task("Download/URLs", - start=False, - total=None) - - task_id = self.progress.add_task(param, start=start, total=total, visible=False) + self.overall_progress_transform_task = self.progress.add_task( + "Transform", start=False, total=None + ) + self.overall_progress_download_task = self.progress.add_task( + "Download/URLs", start=False, total=None + ) + + task_id = self.progress.add_task( + param, start=start, total=total, visible=False + ) new_task = ProgressCounts(param, task_id, start=start, total=total) self.progress_counts[task_id] = new_task return task_id @@ -164,16 +180,22 @@ def update(self, task_id, task_type, total=None, completed=None, **fields): overall_total += self.progress_counts[task].total if task_type == "Transform": - return self.progress.update(self.overall_progress_transform_task, - completed=overall_completed, - total=overall_total) + return self.progress.update( + self.overall_progress_transform_task, + completed=overall_completed, + total=overall_total, + ) else: - return self.progress.update(self.overall_progress_download_task, - completed=overall_completed, - total=overall_total) + return self.progress.update( + self.overall_progress_download_task, + completed=overall_completed, + total=overall_total, + ) if self.display_progress and not self.overall_progress: - return self.progress.update(task_id, completed=completed, total=total, **fields) + return self.progress.update( + task_id, completed=completed, total=total, **fields + ) def start_task(self, task_id, task_type): if self.display_progress and self.overall_progress: diff --git a/servicex/func_adl/func_adl_dataset.py b/servicex/func_adl/func_adl_dataset.py index 199a1396..6e951d32 100644 --- a/servicex/func_adl/func_adl_dataset.py +++ b/servicex/func_adl/func_adl_dataset.py @@ -111,7 +111,9 @@ def generate_qastle(self, a: ast.AST) -> str: source = a if top_function in self._execute_locally: # Request the default type here - default_format = self._ds.first_supported_datatype(["parquet", "root-ttree"]) + default_format = self._ds.first_supported_datatype( + ["parquet", "root-ttree"] + ) assert default_format is not None, "Unsupported ServiceX returned format" method_to_call = self._format_map[default_format] @@ -206,8 +208,8 @@ def as_qastle(self): class FuncADLQuery_Uproot(FuncADLQuery): - yaml_tag = '!FuncADL_Uproot' - default_codegen = 'uproot' + yaml_tag = "!FuncADL_Uproot" + default_codegen = "uproot" def __init__( self, @@ -222,8 +224,10 @@ def FromTree(self, tree_name): def generate_selection_string(self): if not self.tree_is_set: - raise ValueError('Uproot FuncADL query requires ' - 'that you set a tree name with FromTree()') + raise ValueError( + "Uproot FuncADL query requires " + "that you set a tree name with FromTree()" + ) return super().generate_selection_string() def set_provided_qastle(self, qastle_query: str): @@ -244,15 +248,15 @@ def from_yaml(cls, _, node): tree_match = re.match(from_tree_re, node.value) if tree_match: - query_string = f"EventDataset('bogus.root', '{tree_match.group(1)}')." \ - + tree_match.group(2) + query_string = ( + f"EventDataset('bogus.root', '{tree_match.group(1)}')." + + tree_match.group(2) + ) else: query_string = "EventDataset('bogus.root', 'events')." + node.value qastle_query = qastle.python_ast_to_text_ast( - qastle.insert_linq_nodes( - ast.parse(query_string) - ) + qastle.insert_linq_nodes(ast.parse(query_string)) ) query = cls() query.set_provided_qastle(qastle_query) @@ -260,20 +264,20 @@ def from_yaml(cls, _, node): class FuncADLQuery_ATLASr21(FuncADLQuery): - yaml_tag = '!FuncADL_ATLASr21' - default_codegen = 'atlasr21' + yaml_tag = "!FuncADL_ATLASr21" + default_codegen = "atlasr21" class FuncADLQuery_ATLASr22(FuncADLQuery): - yaml_tag = '!FuncADL_ATLASr22' - default_codegen = 'atlasr22' + yaml_tag = "!FuncADL_ATLASr22" + default_codegen = "atlasr22" class FuncADLQuery_ATLASxAOD(FuncADLQuery): - yaml_tag = '!FuncADL_ATLASxAOD' - default_codegen = 'atlasxaod' + yaml_tag = "!FuncADL_ATLASxAOD" + default_codegen = "atlasxaod" class FuncADLQuery_CMS(FuncADLQuery): - yaml_tag = '!FuncADL_CMS' - default_codegen = 'cms' + yaml_tag = "!FuncADL_CMS" + default_codegen = "cms" diff --git a/servicex/minio_adapter.py b/servicex/minio_adapter.py index 2103ded9..13f56084 100644 --- a/servicex/minio_adapter.py +++ b/servicex/minio_adapter.py @@ -70,8 +70,9 @@ def for_transform(cls, transform: TransformStatus): bucket=transform.request_id, ) - @retry(stop=stop_after_attempt(3), wait=wait_random_exponential(max=60), - reraise=True) + @retry( + stop=stop_after_attempt(3), wait=wait_random_exponential(max=60), reraise=True + ) async def list_bucket(self) -> List[ResultFile]: objects = await self.minio.list_objects(self.bucket) return [ @@ -80,11 +81,13 @@ async def list_bucket(self) -> List[ResultFile]: size=obj.size, extension=obj.object_name.split(".")[-1], ) - for obj in objects if not obj.is_dir + for obj in objects + if not obj.is_dir ] - @retry(stop=stop_after_attempt(3), wait=wait_random_exponential(max=60), - reraise=True) + @retry( + stop=stop_after_attempt(3), wait=wait_random_exponential(max=60), reraise=True + ) async def download_file( self, object_name: str, local_dir: str, shorten_filename: bool = False ) -> Path: @@ -103,8 +106,9 @@ async def download_file( ) return path.resolve() - @retry(stop=stop_after_attempt(3), wait=wait_random_exponential(max=60), - reraise=True) + @retry( + stop=stop_after_attempt(3), wait=wait_random_exponential(max=60), reraise=True + ) async def get_signed_url(self, object_name: str) -> str: return await self.minio.get_presigned_url( bucket_name=self.bucket, object_name=object_name, method="GET" diff --git a/servicex/models.py b/servicex/models.py index 5de50cb2..b078635b 100644 --- a/servicex/models.py +++ b/servicex/models.py @@ -38,13 +38,15 @@ def _get_typename(typeish) -> str: def _generate_model_docstring(model: type) -> str: - NL = '\n' - return '\n'.join([(model.__doc__ if model.__doc__ else model.__name__).strip(), - '', 'Args:'] - + [f' {field}: ({_get_typename(info.annotation)}) ' - f'{info.description.replace(NL, NL + " " * 8) if info.description else ""}' - for field, info in model.model_fields.items()] - ) + NL = "\n" + return "\n".join( + [(model.__doc__ if model.__doc__ else model.__name__).strip(), "", "Args:"] + + [ + f" {field}: ({_get_typename(info.annotation)}) " + f'{info.description.replace(NL, NL + " " * 8) if info.description else ""}' + for field, info in model.model_fields.items() + ] + ) class DocStringBaseModel(BaseModel): @@ -134,7 +136,7 @@ class TransformStatus(DocStringBaseModel): r""" Status object returned by servicex """ - model_config = {'use_attribute_docstrings': True} + model_config = {"use_attribute_docstrings": True} request_id: str did: str @@ -150,13 +152,25 @@ class TransformStatus(DocStringBaseModel): files: int files_completed: int = Field(validation_alias="files-completed") files_failed: int = Field(validation_alias="files-failed") - files_remaining: Optional[int] = Field(validation_alias="files-remaining", default=0) + files_remaining: Optional[int] = Field( + validation_alias="files-remaining", default=0 + ) submit_time: datetime = Field(validation_alias="submit-time", default=None) - finish_time: Optional[datetime] = Field(validation_alias="finish-time", default=None) - minio_endpoint: Optional[str] = Field(validation_alias="minio-endpoint", default=None) - minio_secured: Optional[bool] = Field(validation_alias="minio-secured", default=None) - minio_access_key: Optional[str] = Field(validation_alias="minio-access-key", default=None) - minio_secret_key: Optional[str] = Field(validation_alias="minio-secret-key", default=None) + finish_time: Optional[datetime] = Field( + validation_alias="finish-time", default=None + ) + minio_endpoint: Optional[str] = Field( + validation_alias="minio-endpoint", default=None + ) + minio_secured: Optional[bool] = Field( + validation_alias="minio-secured", default=None + ) + minio_access_key: Optional[str] = Field( + validation_alias="minio-access-key", default=None + ) + minio_secret_key: Optional[str] = Field( + validation_alias="minio-secret-key", default=None + ) log_url: Optional[str] = Field(validation_alias="log-url", default=None) @field_validator("finish_time", mode="before") @@ -171,7 +185,7 @@ class ResultFile(DocStringBaseModel): r""" Record reporting the properties of a transformed file result """ - model_config = {'use_attribute_docstrings': True} + model_config = {"use_attribute_docstrings": True} filename: str size: int @@ -183,7 +197,7 @@ class TransformedResults(DocStringBaseModel): Returned for a submission. Gives you everything you need to know about a completed transform. """ - model_config = {'use_attribute_docstrings': True} + model_config = {"use_attribute_docstrings": True} hash: str """Unique hash for transformation (used to look up results in cache)""" @@ -213,6 +227,7 @@ class DatasetFile(BaseModel): """ Model for a file in a cached dataset """ + id: int adler32: Optional[str] file_size: int @@ -224,6 +239,7 @@ class CachedDataset(BaseModel): """ Model for a cached dataset held by ServiceX server """ + id: int name: str did_finder: str diff --git a/servicex/python_dataset.py b/servicex/python_dataset.py index 3d1f4573..191ec72a 100644 --- a/servicex/python_dataset.py +++ b/servicex/python_dataset.py @@ -31,6 +31,7 @@ from textwrap import dedent from servicex.query_core import QueryStringGenerator import sys + if sys.version_info < (3, 11): from typing_extensions import Self else: @@ -38,8 +39,8 @@ class PythonFunction(QueryStringGenerator): - yaml_tag = '!PythonFunction' - default_codegen = 'python' + yaml_tag = "!PythonFunction" + default_codegen = "python" def __init__(self, python_function: Optional[Union[str, Callable]] = None): self.python_function: Optional[Union[str, Callable]] = python_function @@ -50,14 +51,18 @@ def with_uproot_function(self, f: Union[str, Callable]) -> Self: def generate_selection_string(self) -> str: if not self.python_function: - raise ValueError("You must provide a python function using with_uproot_function") + raise ValueError( + "You must provide a python function using with_uproot_function" + ) if isinstance(self.python_function, str): - return b64encode(dedent(self.python_function).encode("utf-8")).decode("utf-8") + return b64encode(dedent(self.python_function).encode("utf-8")).decode( + "utf-8" + ) else: - return b64encode(dedent(inspect.getsource(self.python_function)) - .encode("utf-8"))\ - .decode("utf-8") + return b64encode( + dedent(inspect.getsource(self.python_function)).encode("utf-8") + ).decode("utf-8") @classmethod def from_yaml(cls, _, node): diff --git a/servicex/query/__init__.py b/servicex/query/__init__.py index 6468ce2d..a79c6af4 100644 --- a/servicex/query/__init__.py +++ b/servicex/query/__init__.py @@ -27,11 +27,12 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import sys + if sys.version_info < (3, 10): from importlib_metadata import entry_points else: from importlib.metadata import entry_points -plugins = entry_points(group='servicex.query') +plugins = entry_points(group="servicex.query") for _ in plugins: globals()[_.name] = _.load() diff --git a/servicex/query_cache.py b/servicex/query_cache.py index bf002a2e..0a3550bd 100644 --- a/servicex/query_cache.py +++ b/servicex/query_cache.py @@ -50,10 +50,14 @@ def __init__(self, config: Configuration): def close(self): self.db.close() - def transformed_results(self, transform: TransformRequest, - completed_status: TransformStatus, data_dir: str, - file_list: List[str], - signed_urls) -> TransformedResults: + def transformed_results( + self, + transform: TransformRequest, + completed_status: TransformStatus, + data_dir: str, + file_list: List[str], + signed_urls, + ) -> TransformedResults: return TransformedResults( hash=transform.compute_hash(), title=transform.title, @@ -65,18 +69,22 @@ def transformed_results(self, transform: TransformRequest, signed_url_list=signed_urls, files=completed_status.files, result_format=transform.result_format, - log_url=completed_status.log_url + log_url=completed_status.log_url, ) def cache_transform(self, record: TransformedResults): transforms = Query() with self.lock: - self.db.upsert(json.loads(record.model_dump_json()), transforms.hash == record.hash) + self.db.upsert( + json.loads(record.model_dump_json()), transforms.hash == record.hash + ) def update_record(self, record: TransformedResults): transforms = Query() with self.lock: - self.db.update(json.loads(record.model_dump_json()), transforms.hash == record.hash) + self.db.update( + json.loads(record.model_dump_json()), transforms.hash == record.hash + ) def contains_hash(self, hash: str) -> bool: """ @@ -84,8 +92,9 @@ def contains_hash(self, hash: str) -> bool: """ transforms = Query() with self.lock: - records = self.db.search((transforms.hash == hash) - & ~(transforms.status == 'SUBMITTED')) + records = self.db.search( + (transforms.hash == hash) & ~(transforms.status == "SUBMITTED") + ) return len(records) > 0 def is_transform_request_submitted(self, hash_value: str) -> bool: @@ -101,7 +110,7 @@ def is_transform_request_submitted(self, hash_value: str) -> bool: if not records: return False - if "status" in records[0] and records[0]["status"] == 'SUBMITTED': + if "status" in records[0] and records[0]["status"] == "SUBMITTED": return True return False @@ -114,7 +123,7 @@ def get_transform_request_id(self, hash_value: str) -> Optional[str]: with self.lock: records = self.db.search(transform.hash == hash_value) - if not records or 'request_id' not in records[0]: + if not records or "request_id" not in records[0]: raise CacheException("Request Id not found") return records[0]["request_id"] @@ -124,7 +133,9 @@ def update_transform_status(self, hash_value: str, status: str) -> None: """ transform = Query() with self.lock: - self.db.upsert({"hash": hash_value, "status": status}, transform.hash == hash_value) + self.db.upsert( + {"hash": hash_value, "status": status}, transform.hash == hash_value + ) def update_transform_request_id(self, hash_value: str, request_id: str) -> None: """ @@ -132,8 +143,10 @@ def update_transform_request_id(self, hash_value: str, request_id: str) -> None: """ transform = Query() with self.lock: - self.db.upsert({"hash": hash_value, "request_id": request_id}, - transform.hash == hash_value) + self.db.upsert( + {"hash": hash_value, "request_id": request_id}, + transform.hash == hash_value, + ) def get_transform_by_hash(self, hash: str) -> Optional[TransformedResults]: """ @@ -141,8 +154,9 @@ def get_transform_by_hash(self, hash: str) -> Optional[TransformedResults]: """ transforms = Query() with self.lock: - records = records = self.db.search((transforms.hash == hash) - & ~(transforms.status == 'SUBMITTED')) + records = records = self.db.search( + (transforms.hash == hash) & ~(transforms.status == "SUBMITTED") + ) if not records: return None @@ -152,7 +166,9 @@ def get_transform_by_hash(self, hash: str) -> Optional[TransformedResults]: else: return TransformedResults(**records[0]) - def get_transform_by_request_id(self, request_id: str) -> Optional[TransformedResults]: + def get_transform_by_request_id( + self, request_id: str + ) -> Optional[TransformedResults]: """ Returns completed transformed results using a request id """ @@ -179,13 +195,15 @@ def cached_queries(self) -> List[TransformedResults]: transforms = Query() with self.lock: - result = [TransformedResults(**doc) for doc in - self.db.search(transforms.request_id.exists())] + result = [ + TransformedResults(**doc) + for doc in self.db.search(transforms.request_id.exists()) + ] return result def delete_record_by_request_id(self, request_id: str): with self.lock: - self.db.remove(where('request_id') == request_id) + self.db.remove(where("request_id") == request_id) def delete_record_by_hash(self, hash: str): transforms = Query() @@ -208,9 +226,11 @@ def get_codegen_by_backend(self, backend: str) -> Optional[dict]: def update_codegen_by_backend(self, backend: str, codegen_list: list): transforms = Query() with self.lock: - self.db.upsert({'backend': backend, 'codegens': codegen_list}, - transforms.backend == backend) + self.db.upsert( + {"backend": backend, "codegens": codegen_list}, + transforms.backend == backend, + ) def delete_codegen_by_backend(self, backend: str): with self.lock: - self.db.remove(where('backend') == backend) + self.db.remove(where("backend") == backend) diff --git a/servicex/query_core.py b/servicex/query_core.py index d58841c3..32a56d3d 100644 --- a/servicex/query_core.py +++ b/servicex/query_core.py @@ -62,7 +62,7 @@ class ServiceXException(Exception): - """ Something happened while trying to carry out a ServiceX request """ + """Something happened while trying to carry out a ServiceX request""" class Query: @@ -127,7 +127,7 @@ def __init__( def generate_selection_string(self) -> str: if self.query_string_generator is None: - raise RuntimeError('query string generator not set') + raise RuntimeError("query string generator not set") return self.query_string_generator.generate_selection_string() @property @@ -182,8 +182,12 @@ async def submit_and_download( :return: Transform results object which contains the list of files downloaded or the list of pre-signed urls """ - from servicex.app.transforms import \ - create_kibana_link_parameters, TimeFrame, LogLevel + from servicex.app.transforms import ( + create_kibana_link_parameters, + TimeFrame, + LogLevel, + ) + download_files_task = None loop = asyncio.get_running_loop() @@ -197,56 +201,67 @@ def transform_complete(task: Task): expandable_progress.refresh() if task.exception(): logger.error( - f"ServiceX Exception for request ID {self.request_id} ({self.title})\"", - exc_info=task.exception() + f'ServiceX Exception for request ID {self.request_id} ({self.title})"', + exc_info=task.exception(), ) self.cache.delete_record_by_request_id(self.request_id) if download_files_task: - import sys - if sys.version_info < (3, 9): - download_files_task.cancel() - else: - download_files_task.cancel("Transform failed") + download_files_task.cancel("Transform failed") raise task.exception() if self.current_status.status in DONE_STATUS: if self.current_status.files_failed: self.cache.delete_record_by_request_id(self.request_id) - titlestr = (f'"{self.current_status.title}" ' - if self.current_status.title is not None else '') + titlestr = ( + f'"{self.current_status.title}" ' + if self.current_status.title is not None + else "" + ) errorstr = ( f"Transform {titlestr}completed with failures: " f"{self.current_status.files_failed}/" f"{self.current_status.files} files failed. Will not cache." ) - failedfiles = (self.servicex.url + '/transformation-request/' - + f'/{self.request_id}/results?status=failure') - errorstr2 = ("A list of failed files is at [bold red on white]" - f"[link={failedfiles}]this link[/link][/bold red on white]") + failedfiles = ( + self.servicex.url + + "/transformation-request/" + + f"/{self.request_id}/results?status=failure" + ) + errorstr2 = ( + "A list of failed files is at [bold red on white]" + f"[link={failedfiles}]this link[/link][/bold red on white]" + ) logger.error(errorstr2) logger.error( f"Transform Request id: {self.current_status.request_id}" ) if self.current_status.log_url is not None: - kibana_link = \ - create_kibana_link_parameters(self.current_status.log_url, - self.current_status.request_id, - LogLevel.error, - TimeFrame.month) - logger.error(f"More information of '{self.title}' [bold red on white][link={kibana_link}]HERE[/link][/bold red on white]") # NOQA: E501 + kibana_link = create_kibana_link_parameters( + self.current_status.log_url, + self.current_status.request_id, + LogLevel.error, + TimeFrame.month, + ) + logger.error( + f"More information of '{self.title}' [bold red on white][link={kibana_link}]HERE[/link][/bold red on white]" # NOQA: E501 + ) if self.fail_if_incomplete: raise ServiceXException(errorstr) else: logger.info("Transforms completed successfully") else: # pragma: no cover - logger.info(f"Transforms finished with code {self.current_status.status}") + logger.info( + f"Transforms finished with code {self.current_status.status}" + ) sx_request = self.transform_request sx_request_hash = sx_request.compute_hash() # Invalidate the cache if the hash already present but if the user ignores cache - if self.ignore_cache and (self.cache.contains_hash(sx_request_hash) - or self.cache.is_transform_request_submitted(sx_request_hash)): + if self.ignore_cache and ( + self.cache.contains_hash(sx_request_hash) + or self.cache.is_transform_request_submitted(sx_request_hash) + ): self.cache.delete_record_by_hash(sx_request_hash) # Let's see if this is in the cache already, but respect the user's wishes @@ -260,9 +275,8 @@ def transform_complete(task: Task): # And that we grabbed the resulting files in the way that the user requested # (Downloaded, or obtained pre-signed URLs) if cached_record: - if ( - (signed_urls_only and cached_record.signed_url_list) - or (not signed_urls_only and cached_record.file_list) + if (signed_urls_only and cached_record.signed_url_list) or ( + not signed_urls_only and cached_record.file_list ): logger.info("Returning results from cache") return cached_record @@ -359,9 +373,7 @@ def transform_complete(task: Task): return transform_report except CancelledError: - logger.warning( - "Aborted file downloads due to transform failure" - ) + logger.warning("Aborted file downloads due to transform failure") _ = await monitor_task # raise exception, if it is there @@ -378,8 +390,11 @@ async def transform_status_listener( of status. Once we know the number of files in the dataset, update the progress bars. """ - from servicex.app.transforms import LogLevel, \ - create_kibana_link_parameters, TimeFrame + from servicex.app.transforms import ( + LogLevel, + create_kibana_link_parameters, + TimeFrame, + ) # Actual number of files in the dataset. We only know this once the DID # finder has completed its work. In the meantime transformers will already @@ -415,27 +430,29 @@ async def transform_status_listener( # update the download progress bar to get the total number of files progress.update( - download_task, - download_bar_title, - total=self.current_status.files + download_task, download_bar_title, total=self.current_status.files ) if self.current_status.status in DONE_STATUS: self.files_completed = self.current_status.files_completed self.files_failed = self.current_status.files_failed - titlestr = (f'"{self.current_status.title}" ' - if self.current_status.title is not None else '') + titlestr = ( + f'"{self.current_status.title}" ' + if self.current_status.title is not None + else "" + ) if self.current_status.status == Status.complete: if self.files_failed: - bar = 'failure' + bar = "failure" else: - bar = 'complete' + bar = "complete" progress.update( progress_task, progress_bar_title, self.current_status.files, completed=self.current_status.files_completed, - bar=bar) + bar=bar, + ) return elif self.current_status.status == Status.canceled: logger.warning( @@ -445,23 +462,29 @@ async def transform_status_listener( ) err_str = f"Request {titlestr}was canceled" if self.current_status.log_url is not None: - kibana_link = \ - create_kibana_link_parameters(self.current_status.log_url, - self.current_status.request_id, - LogLevel.error, - TimeFrame.month) - logger.error(f"{err_str}\nMore logfiles of '{self.title}' [bold red on white][link={kibana_link}]HERE[/link][/bold red on white]") # NOQA: E501" + kibana_link = create_kibana_link_parameters( + self.current_status.log_url, + self.current_status.request_id, + LogLevel.error, + TimeFrame.month, + ) + logger.error( + f"{err_str}\nMore logfiles of '{self.title}' [bold red on white][link={kibana_link}]HERE[/link][/bold red on white]" # NOQA: E501 + ) raise ServiceXException(err_str) else: err_str = f"Fatal issue in ServiceX server for request {titlestr}" if self.current_status.log_url is not None: - kibana_link = \ - create_kibana_link_parameters(self.current_status.log_url, - self.current_status.request_id, - LogLevel.error, - TimeFrame.month) - logger.error(f"{err_str}\nMore logfiles of '{self.title}' [bold red on white][link={kibana_link}]HERE[/link][/bold red on white]") # NOQA: E501" + kibana_link = create_kibana_link_parameters( + self.current_status.log_url, + self.current_status.request_id, + LogLevel.error, + TimeFrame.month, + ) + logger.error( + f"{err_str}\nMore logfiles of '{self.title}' [bold red on white][link={kibana_link}]HERE[/link][/bold red on white]" # NOQA: E501 + ) raise ServiceXException(err_str) await asyncio.sleep(self.servicex_polling_interval) @@ -472,9 +495,7 @@ async def retrieve_current_transform_status(self): # Is this the first time we've polled status? We now know the request ID. # Update the display and set our download directory. if not self.current_status: - logger.info( - f"ServiceX Transform {s.title}: {s.request_id}" - ) + logger.info(f"ServiceX Transform {s.title}: {s.request_id}") self.download_path = self.cache.cache_path_for_transform(s) self.current_status = s @@ -562,8 +583,10 @@ async def get_signed_url( # signing urls for a previous transform then we know it is complete as well if cached_record or ( self.current_status - and (self.current_status.status in DONE_STATUS - and self.current_status.files_completed == len(files_seen)) + and ( + self.current_status.status in DONE_STATUS + and self.current_status.files_completed == len(files_seen) + ) ): break @@ -618,17 +641,19 @@ async def as_signed_urls_async( class QueryStringGenerator(ABC): - '''This abstract class just defines an interface to give the selection string''' + """This abstract class just defines an interface to give the selection string""" + @abc.abstractmethod def generate_selection_string(self) -> str: - """ override with the selection string to send to ServiceX """ + """override with the selection string to send to ServiceX""" """ override with the codegen string you would like associated with this query class """ default_codegen: Optional[str] = None class GenericQueryStringGenerator(QueryStringGenerator): - '''Return the string from the initializer''' + """Return the string from the initializer""" + def __init__(self, query: str, codegen: str): self.query = query self.default_codegen = codegen diff --git a/servicex/servicex_adapter.py b/servicex/servicex_adapter.py index 29b801bd..baa7b5b0 100644 --- a/servicex/servicex_adapter.py +++ b/servicex/servicex_adapter.py @@ -34,7 +34,12 @@ from aiohttp_retry import RetryClient, ExponentialRetry, ClientResponse from aiohttp import ContentTypeError from google.auth import jwt -from tenacity import AsyncRetrying, stop_after_attempt, wait_fixed, retry_if_not_exception_type +from tenacity import ( + AsyncRetrying, + stop_after_attempt, + wait_fixed, + retry_if_not_exception_type, +) from servicex.models import TransformRequest, TransformStatus, CachedDataset @@ -46,7 +51,7 @@ class AuthorizationError(BaseException): async def _extract_message(r: ClientResponse): try: o = await r.json() - error_message = o.get('message', str(r)) + error_message = o.get("message", str(r)) except ContentTypeError: error_message = await r.text() return error_message @@ -65,7 +70,7 @@ async def _get_token(self): async with client.post(url, headers=headers, json=None) as r: if r.status == 200: o = await r.json() - self.token = o['access_token'] + self.token = o["access_token"] else: raise AuthorizationError( f"ServiceX access token request rejected [{r.status} {r.reason}]" @@ -73,7 +78,7 @@ async def _get_token(self): @staticmethod def _get_bearer_token_file(): - bearer_token_file = os.environ.get('BEARER_TOKEN_FILE') + bearer_token_file = os.environ.get("BEARER_TOKEN_FILE") bearer_token = None if bearer_token_file: with open(bearer_token_file, "r") as f: @@ -82,8 +87,11 @@ def _get_bearer_token_file(): async def _get_authorization(self, force_reauth: bool = False) -> Dict[str, str]: now = time.time() - if (self.token and jwt.decode(self.token, verify=False)["exp"] - now > 60 - and not force_reauth): + if ( + self.token + and jwt.decode(self.token, verify=False)["exp"] - now > 60 + and not force_reauth + ): # if less than one minute validity, renew return {"Authorization": f"Bearer {self.token}"} else: @@ -94,8 +102,11 @@ async def _get_authorization(self, force_reauth: bool = False) -> Dict[str, str] if not bearer_token and not self.refresh_token: return {} - if not self.token or force_reauth or\ - float(jwt.decode(self.token, verify=False)["exp"]) - now < 60: + if ( + not self.token + or force_reauth + or float(jwt.decode(self.token, verify=False)["exp"]) - now < 60 + ): await self._get_token() return {"Authorization": f"Bearer {self.token}"} @@ -103,16 +114,21 @@ async def get_transforms(self) -> List[TransformStatus]: headers = await self._get_authorization() retry_options = ExponentialRetry(attempts=3, start_timeout=10) async with RetryClient(retry_options=retry_options) as client: - async with client.get(url=f"{self.url}/servicex/transformation", - headers=headers) as r: + async with client.get( + url=f"{self.url}/servicex/transformation", headers=headers + ) as r: if r.status == 401: - raise AuthorizationError(f"Not authorized to access serviceX at {self.url}") + raise AuthorizationError( + f"Not authorized to access serviceX at {self.url}" + ) elif r.status > 400: error_message = await _extract_message(r) - raise RuntimeError("ServiceX WebAPI Error during transformation " - f"submission: {r.status} - {error_message}") + raise RuntimeError( + "ServiceX WebAPI Error during transformation " + f"submission: {r.status} - {error_message}" + ) o = await r.json() - statuses = [TransformStatus(**status) for status in o['requests']] + statuses = [TransformStatus(**status) for status in o["requests"]] return statuses def get_code_generators(self): @@ -121,46 +137,47 @@ def get_code_generators(self): if r.status_code == 403: raise AuthorizationError( - f"Not authorized to access serviceX at {self.url}") + f"Not authorized to access serviceX at {self.url}" + ) return r.json() - async def get_datasets(self, did_finder=None, show_deleted=False) -> List[CachedDataset]: + async def get_datasets( + self, did_finder=None, show_deleted=False + ) -> List[CachedDataset]: headers = await self._get_authorization() params = {"did-finder": did_finder} if did_finder else {} if show_deleted: - params['show-deleted'] = True + params["show-deleted"] = True async with ClientSession() as session: async with session.get( - headers=headers, - url=f"{self.url}/servicex/datasets", - params=params) as r: + headers=headers, url=f"{self.url}/servicex/datasets", params=params + ) as r: if r.status == 403: raise AuthorizationError( - f"Not authorized to access serviceX at {self.url}") + f"Not authorized to access serviceX at {self.url}" + ) elif r.status != 200: msg = await _extract_message(r) raise RuntimeError(f"Failed to get datasets: {r.status} - {msg}") result = await r.json() - datasets = [CachedDataset(**d) for d in result['datasets']] + datasets = [CachedDataset(**d) for d in result["datasets"]] return datasets async def get_dataset(self, dataset_id=None) -> CachedDataset: headers = await self._get_authorization() - path_template = '/servicex/datasets/{dataset_id}' + path_template = "/servicex/datasets/{dataset_id}" url = self.url + path_template.format(dataset_id=dataset_id) async with ClientSession() as session: - async with session.get( - headers=headers, - url=url - ) as r: + async with session.get(headers=headers, url=url) as r: if r.status == 403: raise AuthorizationError( - f"Not authorized to access serviceX at {self.url}") + f"Not authorized to access serviceX at {self.url}" + ) elif r.status == 404: raise ValueError(f"Dataset {dataset_id} not found") elif r.status != 200: @@ -173,84 +190,89 @@ async def get_dataset(self, dataset_id=None) -> CachedDataset: async def delete_dataset(self, dataset_id=None) -> bool: headers = await self._get_authorization() - path_template = '/servicex/datasets/{dataset_id}' + path_template = "/servicex/datasets/{dataset_id}" url = self.url + path_template.format(dataset_id=dataset_id) async with ClientSession() as session: - async with session.delete( - headers=headers, - url=url) as r: + async with session.delete(headers=headers, url=url) as r: if r.status == 403: raise AuthorizationError( - f"Not authorized to access serviceX at {self.url}") + f"Not authorized to access serviceX at {self.url}" + ) elif r.status == 404: raise ValueError(f"Dataset {dataset_id} not found") elif r.status != 200: msg = await _extract_message(r) raise RuntimeError(f"Failed to delete dataset {dataset_id} - {msg}") result = await r.json() - return result['stale'] + return result["stale"] async def delete_transform(self, transform_id=None): headers = await self._get_authorization() - path_template = f'/servicex/transformation/{transform_id}' + path_template = f"/servicex/transformation/{transform_id}" url = self.url + path_template.format(transform_id=transform_id) async with ClientSession() as session: - async with session.delete( - headers=headers, - url=url) as r: + async with session.delete(headers=headers, url=url) as r: if r.status == 403: raise AuthorizationError( - f"Not authorized to access serviceX at {self.url}") + f"Not authorized to access serviceX at {self.url}" + ) elif r.status == 404: raise ValueError(f"Transform {transform_id} not found") elif r.status != 200: msg = await _extract_message(r) - raise RuntimeError(f"Failed to delete transform {transform_id} - {msg}") + raise RuntimeError( + f"Failed to delete transform {transform_id} - {msg}" + ) async def cancel_transform(self, transform_id=None): headers = await self._get_authorization() - path_template = f'/servicex/transformation/{transform_id}/cancel' + path_template = f"/servicex/transformation/{transform_id}/cancel" url = self.url + path_template.format(transform_id=transform_id) async with ClientSession() as session: - async with session.get( - headers=headers, - url=url) as r: + async with session.get(headers=headers, url=url) as r: if r.status == 403: raise AuthorizationError( - f"Not authorized to access serviceX at {self.url}") + f"Not authorized to access serviceX at {self.url}" + ) elif r.status == 404: raise ValueError(f"Transform {transform_id} not found") elif r.status != 200: msg = await _extract_message(r) - raise RuntimeError(f"Failed to cancel transform {transform_id} - {msg}") + raise RuntimeError( + f"Failed to cancel transform {transform_id} - {msg}" + ) async def submit_transform(self, transform_request: TransformRequest) -> str: headers = await self._get_authorization() retry_options = ExponentialRetry(attempts=3, start_timeout=30) async with RetryClient(retry_options=retry_options) as client: - async with client.post(url=f"{self.url}/servicex/transformation", - headers=headers, - json=transform_request.model_dump(by_alias=True, - exclude_none=True)) as r: + async with client.post( + url=f"{self.url}/servicex/transformation", + headers=headers, + json=transform_request.model_dump(by_alias=True, exclude_none=True), + ) as r: if r.status == 401: raise AuthorizationError( - f"Not authorized to access serviceX at {self.url}") + f"Not authorized to access serviceX at {self.url}" + ) elif r.status == 400: message = await _extract_message(r) raise ValueError(f"Invalid transform request: {message}") elif r.status > 400: error_message = await _extract_message(r) - raise RuntimeError("ServiceX WebAPI Error during transformation " - f"submission: {r.status} - {error_message}") + raise RuntimeError( + "ServiceX WebAPI Error during transformation " + f"submission: {r.status} - {error_message}" + ) else: o = await r.json() - return o['request_id'] + return o["request_id"] async def get_transform_status(self, request_id: str) -> TransformStatus: headers = await self._get_authorization() @@ -258,28 +280,34 @@ async def get_transform_status(self, request_id: str) -> TransformStatus: async with RetryClient(retry_options=retry_options) as client: try: async for attempt in AsyncRetrying( - retry=retry_if_not_exception_type(ValueError), - stop=stop_after_attempt(3), - wait=wait_fixed(3), - reraise=True): + retry=retry_if_not_exception_type(ValueError), + stop=stop_after_attempt(3), + wait=wait_fixed(3), + reraise=True, + ): with attempt: - async with client.get(url=f"{self.url}/servicex/" - f"transformation/{request_id}", - headers=headers) as r: + async with client.get( + url=f"{self.url}/servicex/" f"transformation/{request_id}", + headers=headers, + ) as r: if r.status == 401: # perhaps we just ran out of auth validity the last time? # refetch auth then raise an error for retry headers = await self._get_authorization(True) raise AuthorizationError( - f"Not authorized to access serviceX at {self.url}") + f"Not authorized to access serviceX at {self.url}" + ) if r.status == 404: raise ValueError(f"Transform ID {request_id} not found") elif r.status > 400: error_message = await _extract_message(r) - raise RuntimeError("ServiceX WebAPI Error during transformation: " - f"{r.status} - {error_message}") + raise RuntimeError( + "ServiceX WebAPI Error during transformation: " + f"{r.status} - {error_message}" + ) o = await r.json() return TransformStatus(**o) except RuntimeError as e: - raise RuntimeError("ServiceX WebAPI Error " - f"while getting transform status: {e}") + raise RuntimeError( + "ServiceX WebAPI Error " f"while getting transform status: {e}" + ) diff --git a/servicex/servicex_client.py b/servicex/servicex_client.py index 1deaecf7..177c62a0 100644 --- a/servicex/servicex_client.py +++ b/servicex/servicex_client.py @@ -52,12 +52,14 @@ class ReturnValueException(Exception): - """ An exception occurred at some point while obtaining this result from ServiceX """ + """An exception occurred at some point while obtaining this result from ServiceX""" + def __init__(self, exc): import copy - message = ('Exception occurred while making ServiceX request.\n' - + (''.join(traceback.format_exception(type(exc), exc, exc.__traceback__))) - ) + + message = "Exception occurred while making ServiceX request.\n" + ( + "".join(traceback.format_exception(type(exc), exc, exc.__traceback__)) + ) super().__init__(message) self._exc = copy.copy(exc) @@ -65,6 +67,7 @@ def __init__(self, exc): class GuardList(Sequence): def __init__(self, data: Union[Sequence, Exception]): import copy + super().__init__() if isinstance(data, Exception): self._data = ReturnValueException(data) @@ -95,7 +98,7 @@ def __repr__(self): return repr(self._data) else: data = cast(ReturnValueException, self._data) - return f'Invalid GuardList: {repr(data._exc)}' + return f"Invalid GuardList: {repr(data._exc)}" def _load_ServiceXSpec( @@ -116,6 +119,7 @@ def _load_ServiceXSpec( import sys from ccorp.ruamel.yaml.include import YAML + yaml = YAML() if sys.version_info < (3, 10): @@ -159,7 +163,7 @@ def get_codegen(_sample: Sample, _general: General): result_format=config.General.OutputFormat.to_ResultFormat(), ignore_cache=sample.IgnoreLocalCache, query=sample.Query, - fail_if_incomplete=fail_if_incomplete + fail_if_incomplete=fail_if_incomplete, ) logger.debug(f"Query string: {query.generate_selection_string()}") query.ignore_cache = sample.IgnoreLocalCache @@ -168,19 +172,26 @@ def get_codegen(_sample: Sample, _general: General): return datasets -def _output_handler(config: ServiceXSpec, requests: List[Query], - results: List[Union[TransformedResults, Exception]]): +def _output_handler( + config: ServiceXSpec, + requests: List[Query], + results: List[Union[TransformedResults, Exception]], +): matched_results = zip(requests, results) if config.General.Delivery == General.DeliveryEnum.URLs: - out_dict = {obj[0].title: GuardList(obj[1].signed_url_list - if not isinstance(obj[1], Exception) - else obj[1]) - for obj in matched_results} + out_dict = { + obj[0].title: GuardList( + obj[1].signed_url_list if not isinstance(obj[1], Exception) else obj[1] + ) + for obj in matched_results + } elif config.General.Delivery == General.DeliveryEnum.LocalCache: - out_dict = {obj[0].title: GuardList(obj[1].file_list - if not isinstance(obj[1], Exception) - else obj[1]) - for obj in matched_results} + out_dict = { + obj[0].title: GuardList( + obj[1].file_list if not isinstance(obj[1], Exception) else obj[1] + ) + for obj in matched_results + } if config.General.OutputDirectory: import yaml as yl @@ -201,8 +212,7 @@ def deliver( servicex_name: Optional[str] = None, return_exceptions: bool = True, fail_if_incomplete: bool = True, - ignore_local_cache: bool = False - + ignore_local_cache: bool = False, ): config = _load_ServiceXSpec(config) @@ -364,7 +374,9 @@ def generic_query( if isinstance(query, str): if codegen is None: - raise RuntimeError("A pure string query requires a codegen argument as well") + raise RuntimeError( + "A pure string query requires a codegen argument as well" + ) query = GenericQueryStringGenerator(query, codegen) if not isinstance(query, QueryStringGenerator): raise ValueError("query argument must be string or QueryStringGenerator") @@ -391,7 +403,7 @@ def generic_query( result_format=result_format, ignore_cache=ignore_cache, query_string_generator=query, - fail_if_incomplete=fail_if_incomplete + fail_if_incomplete=fail_if_incomplete, ) return qobj diff --git a/servicex/uproot_raw/uproot_raw.py b/servicex/uproot_raw/uproot_raw.py index 1bc0c0c6..654dad7e 100644 --- a/servicex/uproot_raw/uproot_raw.py +++ b/servicex/uproot_raw/uproot_raw.py @@ -39,7 +39,8 @@ class TreeSubQuery(DocStringBaseModel): ones of the same name for uproot.arrays(): https://uproot.readthedocs.io/en/stable/uproot.behaviors.TBranch.HasBranches.html#uproot-behaviors-tbranch-hasbranches-arrays """ - model_config = {'use_attribute_docstrings': True} + + model_config = {"use_attribute_docstrings": True} treename: Union[Mapping[str, str], List[str], str] """Name of input ntuple in file""" @@ -57,7 +58,8 @@ class TreeSubQuery(DocStringBaseModel): class CopyHistogramSubQuery(DocStringBaseModel): """Request the copying of a ROOT object from the input file to the output.""" - model_config = {'use_attribute_docstrings': True} + + model_config = {"use_attribute_docstrings": True} copy_histograms: Union[List[str], str] """Objects to copy""" @@ -68,15 +70,18 @@ class CopyHistogramSubQuery(DocStringBaseModel): @pydantic.dataclasses.dataclass class UprootRawQuery(QueryStringGenerator): - yaml_tag = '!UprootRaw' + yaml_tag = "!UprootRaw" query: Union[List[SubQuery], SubQuery] - default_codegen: str = 'uproot-raw' + default_codegen: str = "uproot-raw" def generate_selection_string(self): import json + final_query: List[SubQuery] - if isinstance(self.query, get_args(SubQuery)): # from Python 3.10 we don't need "get_args" + if isinstance( + self.query, get_args(SubQuery) + ): # from Python 3.10 we don't need "get_args" final_query = [self.query] else: final_query = self.query @@ -86,6 +91,7 @@ def generate_selection_string(self): def from_yaml(cls, _, node): code = node.value import json + queries = json.loads(code) q = cls(queries) return q diff --git a/tests/app/test_app.py b/tests/app/test_app.py index ed8d103a..76ced5c5 100644 --- a/tests/app/test_app.py +++ b/tests/app/test_app.py @@ -30,20 +30,21 @@ def test_app_version(script_runner): import servicex._version - result = script_runner.run(['servicex', '--version']) + + result = script_runner.run(["servicex", "--version"]) assert result.returncode == 0 - assert result.stdout == f'ServiceX {servicex._version.__version__}\n' + assert result.stdout == f"ServiceX {servicex._version.__version__}\n" def test_deliver(script_runner): - with patch('servicex.app.main.servicex_client') as mock_servicex_client: - mock_servicex_client.deliver = Mock(return_value={ - "UprootRaw_YAML": [ - "/tmp/foo.root", - "/tmp/bar.root" - ]}) - result = script_runner.run(['servicex', 'deliver', "foo.yaml"]) + with patch("servicex.app.main.servicex_client") as mock_servicex_client: + mock_servicex_client.deliver = Mock( + return_value={"UprootRaw_YAML": ["/tmp/foo.root", "/tmp/bar.root"]} + ) + result = script_runner.run(["servicex", "deliver", "foo.yaml"]) assert result.returncode == 0 - result_rows = result.stdout.split('\n') - assert result_rows[0] == 'Delivering foo.yaml to ServiceX cache' - assert result_rows[1] == "{'UprootRaw_YAML': ['/tmp/foo.root', '/tmp/bar.root']}" + result_rows = result.stdout.split("\n") + assert result_rows[0] == "Delivering foo.yaml to ServiceX cache" + assert ( + result_rows[1] == "{'UprootRaw_YAML': ['/tmp/foo.root', '/tmp/bar.root']}" + ) diff --git a/tests/app/test_codegen.py b/tests/app/test_codegen.py index f8fa0e8f..8b6a885d 100644 --- a/tests/app/test_codegen.py +++ b/tests/app/test_codegen.py @@ -25,24 +25,36 @@ def test_codegen_list(script_runner): - with patch('servicex.servicex_adapter.ServiceXAdapter.get_code_generators', return_value={ - "uproot": "http://uproot-codegen", - "xaod": "http://xaod-codegen" - }): - result = script_runner.run(['servicex', 'codegen', 'list', '-c', - 'tests/example_config.yaml']) + with patch( + "servicex.servicex_adapter.ServiceXAdapter.get_code_generators", + return_value={"uproot": "http://uproot-codegen", "xaod": "http://xaod-codegen"}, + ): + result = script_runner.run( + ["servicex", "codegen", "list", "-c", "tests/example_config.yaml"] + ) assert result.returncode == 0 - assert result.stdout == '''{ + assert ( + result.stdout + == """{ "uproot": "http://uproot-codegen", "xaod": "http://xaod-codegen" } -''' +""" + ) def test_codegen_flush(script_runner): - with patch('servicex.query_cache.QueryCache.delete_codegen_by_backend') as p: - result = script_runner.run(['servicex', 'codegen', 'flush', - '-c', 'tests/example_config.yaml', - '-b', 'localhost']) + with patch("servicex.query_cache.QueryCache.delete_codegen_by_backend") as p: + result = script_runner.run( + [ + "servicex", + "codegen", + "flush", + "-c", + "tests/example_config.yaml", + "-b", + "localhost", + ] + ) assert result.returncode == 0 - p.assert_called_once_with('localhost') + p.assert_called_once_with("localhost") diff --git a/tests/app/test_datasets.py b/tests/app/test_datasets.py index 59b61814..3b7230e9 100644 --- a/tests/app/test_datasets.py +++ b/tests/app/test_datasets.py @@ -37,15 +37,15 @@ def dataset(): adler32="some_adler32_hash", file_size=1024, file_events=100, - paths="/path/to/file1" + paths="/path/to/file1", ), DatasetFile( id=2, adler32="another_adler32_hash", file_size=2048, file_events=200, - paths="/path/to/file2" - ) + paths="/path/to/file2", + ), ] cached_dataset = CachedDataset( @@ -59,72 +59,86 @@ def dataset(): last_updated=datetime.now(), lookup_status="completed", is_stale=False, - files=dataset_files + files=dataset_files, ) return cached_dataset @pytest.mark.asyncio def test_datasets_list(script_runner, dataset): - with patch('servicex.app.datasets.ServiceXClient') as mock_servicex: + with patch("servicex.app.datasets.ServiceXClient") as mock_servicex: mock_get_datasets = AsyncMock(return_value=[dataset]) mock_servicex.return_value.get_datasets = mock_get_datasets - result = script_runner.run(['servicex', 'datasets', 'list', - '-c', 'tests/example_config.yaml']) + result = script_runner.run( + ["servicex", "datasets", "list", "-c", "tests/example_config.yaml"] + ) assert result.returncode == 0 result_row = result.stdout.split(" ") assert len(result_row) == 7, f"Expected 7 elements, got {len(result_row)}" # Assert specific index values - assert result_row[0].strip() == '42' - assert result_row[1] == 'test_dataset' - assert result_row[2] == '2' - assert result_row[3] == '0MB' - assert result_row[4] == 'completed' + assert result_row[0].strip() == "42" + assert result_row[1] == "test_dataset" + assert result_row[2] == "2" + assert result_row[3] == "0MB" + assert result_row[4] == "completed" mock_get_datasets.assert_called_once_with(did_finder=None, show_deleted=False) mock_get_datasets.reset_mock() - result = script_runner.run(['servicex', 'datasets', 'list', - '-c', 'tests/example_config.yaml', - '--did-finder', 'some_finder', - '--show-deleted']) + result = script_runner.run( + [ + "servicex", + "datasets", + "list", + "-c", + "tests/example_config.yaml", + "--did-finder", + "some_finder", + "--show-deleted", + ] + ) assert result.returncode == 0 - mock_get_datasets.assert_called_once_with(did_finder='some_finder', show_deleted=True) + mock_get_datasets.assert_called_once_with( + did_finder="some_finder", show_deleted=True + ) def test_dataset_get(script_runner, dataset): - with patch('servicex.app.datasets.ServiceXClient') as mock_servicex: + with patch("servicex.app.datasets.ServiceXClient") as mock_servicex: mock_get_dataset = AsyncMock(return_value=dataset) mock_servicex.return_value.get_dataset = mock_get_dataset - result = script_runner.run(['servicex', 'datasets', 'get', '42', - '-c', 'tests/example_config.yaml']) + result = script_runner.run( + ["servicex", "datasets", "get", "42", "-c", "tests/example_config.yaml"] + ) assert result.returncode == 0 mock_get_dataset.assert_called_once_with(42) # The output is a json document result_doc = json.loads(result.stdout) - assert result_doc['dataset']['id'] == 42 - assert len(result_doc['dataset']['files']) == 2 + assert result_doc["dataset"]["id"] == 42 + assert len(result_doc["dataset"]["files"]) == 2 def test_dataset_delete(script_runner): - with patch('servicex.app.datasets.ServiceXClient') as mock_servicex: + with patch("servicex.app.datasets.ServiceXClient") as mock_servicex: mock_delete_dataset = AsyncMock(return_value=True) mock_servicex.return_value.delete_dataset = mock_delete_dataset - result = script_runner.run(['servicex', 'datasets', 'delete', - '-c', 'tests/example_config.yaml', '42']) + result = script_runner.run( + ["servicex", "datasets", "delete", "-c", "tests/example_config.yaml", "42"] + ) assert result.returncode == 0 assert result.stdout == "Dataset 42 deleted\n" mock_delete_dataset.assert_called_once_with(42) mock_delete_dataset_not_found = AsyncMock(return_value=False) mock_servicex.return_value.delete_dataset = mock_delete_dataset_not_found - result = script_runner.run(['servicex', 'datasets', 'delete', - '-c', 'tests/example_config.yaml', '42']) + result = script_runner.run( + ["servicex", "datasets", "delete", "-c", "tests/example_config.yaml", "42"] + ) assert result.returncode == 1 mock_delete_dataset.assert_called_once_with(42) assert result.stdout == "Dataset 42 not found\n" diff --git a/tests/app/test_transforms.py b/tests/app/test_transforms.py index a1230cb6..eb4e3910 100644 --- a/tests/app/test_transforms.py +++ b/tests/app/test_transforms.py @@ -26,7 +26,13 @@ import pytest -from servicex.models import TransformStatus, ResultDestination, ResultFormat, Status, ResultFile +from servicex.models import ( + TransformStatus, + ResultDestination, + ResultFormat, + Status, + ResultFile, +) @pytest.fixture @@ -61,7 +67,7 @@ def transform_status_record() -> TransformStatus: "minio_secured": True, "minio_access_key": "test-access-key", "minio_secret_key": "test-secret-key", - "log_url": "https://logs.example.com/test-job" + "log_url": "https://logs.example.com/test-job", } return TransformStatus(**base_data) @@ -70,110 +76,135 @@ def transform_status_record() -> TransformStatus: @pytest.fixture def result_files(): return [ - ResultFile( - filename="test_file", - size=1024, - extension="parquet" - ), - ResultFile( - filename="test_file2", - size=2048, - extension="parquet" - )] + ResultFile(filename="test_file", size=1024, extension="parquet"), + ResultFile(filename="test_file2", size=2048, extension="parquet"), + ] def test_transforms_list(script_runner, transform_status_record): - with patch('servicex.app.transforms.ServiceXClient') as mock_servicex: + with patch("servicex.app.transforms.ServiceXClient") as mock_servicex: transform_status_record.status = Status.running mock_list_transforms = Mock(return_value=[transform_status_record]) mock_servicex.return_value.get_transforms = mock_list_transforms - result = script_runner.run(['servicex', 'transforms', 'list', - '-c', 'tests/example_config.yaml']) + result = script_runner.run( + ["servicex", "transforms", "list", "-c", "tests/example_config.yaml"] + ) assert result.returncode == 0 result_row = result.stdout.split(" ") assert len(result_row) == 4 - assert result_row[0].strip() == 'test-request-123' - assert result_row[1] == 'Test Transform Job' - assert result_row[2] == 'Running' - assert result_row[3].strip() == '8' + assert result_row[0].strip() == "test-request-123" + assert result_row[1] == "Test Transform Job" + assert result_row[2] == "Running" + assert result_row[3].strip() == "8" mock_list_transforms.assert_called_once() mock_list_transforms.reset_mock() - result = script_runner.run(['servicex', 'transforms', 'list', - '-c', 'tests/example_config.yaml', - '--complete']) + result = script_runner.run( + [ + "servicex", + "transforms", + "list", + "-c", + "tests/example_config.yaml", + "--complete", + ] + ) assert result.returncode == 0 assert len(result.stdout.strip()) == 0 -@pytest.mark.parametrize("transform_state, report_complete, report_running, expected", [ - # Scenario 1: No flags set (report all) - (Status.complete, False, False, True), - (Status.running, False, False, True), - - # Scenario 2: Complete records only - (Status.complete, True, False, True), - (Status.running, True, False, False), - - # Scenario 3: Running records only - (Status.complete, False, True, False), - (Status.running, False, True, True), - - # Scenario 4: Both flags set - (Status.complete, True, True, True), - (Status.running, True, True, True), -]) -def test_transforms_list_filters(script_runner, transform_status_record, - transform_state, report_complete, report_running, expected): - with patch('servicex.app.transforms.ServiceXClient') as mock_servicex: +@pytest.mark.parametrize( + "transform_state, report_complete, report_running, expected", + [ + # Scenario 1: No flags set (report all) + (Status.complete, False, False, True), + (Status.running, False, False, True), + # Scenario 2: Complete records only + (Status.complete, True, False, True), + (Status.running, True, False, False), + # Scenario 3: Running records only + (Status.complete, False, True, False), + (Status.running, False, True, True), + # Scenario 4: Both flags set + (Status.complete, True, True, True), + (Status.running, True, True, True), + ], +) +def test_transforms_list_filters( + script_runner, + transform_status_record, + transform_state, + report_complete, + report_running, + expected, +): + with patch("servicex.app.transforms.ServiceXClient") as mock_servicex: transform_status_record.status = transform_state mock_list_transforms = Mock(return_value=[transform_status_record]) mock_servicex.return_value.get_transforms = mock_list_transforms - command_line = ['servicex', 'transforms', 'list', - '-c', 'tests/example_config.yaml'] + command_line = [ + "servicex", + "transforms", + "list", + "-c", + "tests/example_config.yaml", + ] if report_complete: - command_line.append('--complete') + command_line.append("--complete") if report_running: - command_line.append('--running') + command_line.append("--running") result = script_runner.run(command_line) assert result.returncode == 0 - assert len(result.stdout.strip()) if expected else len(result.stdout.strip()) == 0 + assert ( + len(result.stdout.strip()) if expected else len(result.stdout.strip()) == 0 + ) def test_list_files(script_runner, transform_status_record, result_files): - with patch('servicex.app.transforms.ServiceXClient') as mock_servicex: - with patch('servicex.app.transforms.MinioAdapter') as mock_minio: + with patch("servicex.app.transforms.ServiceXClient") as mock_servicex: + with patch("servicex.app.transforms.MinioAdapter") as mock_minio: mock_transform_status = AsyncMock(return_value=transform_status_record) - mock_servicex.return_value.get_transform_status_async = mock_transform_status + mock_servicex.return_value.get_transform_status_async = ( + mock_transform_status + ) mock_minio_adapter = Mock() mock_minio_adapter.list_bucket = AsyncMock(return_value=result_files) mock_minio.for_transform = Mock(return_value=mock_minio_adapter) - result = script_runner.run(['servicex', 'transforms', 'files', - '-c', 'tests/example_config.yaml', - 'test-request-123']) + result = script_runner.run( + [ + "servicex", + "transforms", + "files", + "-c", + "tests/example_config.yaml", + "test-request-123", + ] + ) assert result.returncode == 0 result_rows = result.stdout.strip().split("\n") assert len(result_rows) == 2 result_row = result_rows[1].split(" ") - assert result_row[0].strip() == 'test_file2' - assert result_row[1] == '0.00' - assert result_row[2] == 'parquet' + assert result_row[0].strip() == "test_file2" + assert result_row[1] == "0.00" + assert result_row[2] == "parquet" - mock_transform_status.assert_called_once_with('test-request-123') + mock_transform_status.assert_called_once_with("test-request-123") mock_minio.for_transform.assert_called_once_with(transform_status_record) mock_minio_adapter.list_bucket.assert_called_once() def test_download_files(script_runner, transform_status_record, result_files): - with patch('servicex.app.transforms.ServiceXClient') as mock_servicex: - with patch('servicex.app.transforms.MinioAdapter') as mock_minio: + with patch("servicex.app.transforms.ServiceXClient") as mock_servicex: + with patch("servicex.app.transforms.MinioAdapter") as mock_minio: mock_transform_status = AsyncMock(return_value=transform_status_record) - mock_servicex.return_value.get_transform_status_async = mock_transform_status + mock_servicex.return_value.get_transform_status_async = ( + mock_transform_status + ) mock_minio_adapter = Mock() mock_minio_adapter.list_bucket = AsyncMock(return_value=result_files) @@ -181,45 +212,66 @@ def test_download_files(script_runner, transform_status_record, result_files): return_value=Path("/tmp/test_file.parquet") ) mock_minio.for_transform = Mock(return_value=mock_minio_adapter) - result = script_runner.run(['servicex', 'transforms', 'download', - '-c', 'tests/example_config.yaml', - 'test-request-123']) + result = script_runner.run( + [ + "servicex", + "transforms", + "download", + "-c", + "tests/example_config.yaml", + "test-request-123", + ] + ) assert result.returncode == 0 result_rows = result.stdout.strip().split("\n") assert len(result_rows) == 3 assert result_rows[1] == "/tmp/test_file.parquet" - mock_transform_status.assert_called_once_with('test-request-123') + mock_transform_status.assert_called_once_with("test-request-123") mock_minio.for_transform.assert_called_once_with(transform_status_record) mock_minio_adapter.list_bucket.assert_called_once() assert mock_minio_adapter.download_file.call_count == 2 - assert mock_minio_adapter.download_file.mock_calls[0].args[0] == 'test_file' + assert mock_minio_adapter.download_file.mock_calls[0].args[0] == "test_file" def test_delete_transform(script_runner, transform_status_record): - with patch('servicex.app.transforms.ServiceXClient') as mock_servicex: + with patch("servicex.app.transforms.ServiceXClient") as mock_servicex: mock_delete_transform = AsyncMock(return_value=True) mock_servicex.return_value.delete_transform = mock_delete_transform mock_delete_local = Mock(return_value=True) mock_servicex.return_value.delete_local_transform = mock_delete_local - result = script_runner.run(['servicex', 'transforms', 'delete', - '-c', 'tests/example_config.yaml', - 'test-request-123']) + result = script_runner.run( + [ + "servicex", + "transforms", + "delete", + "-c", + "tests/example_config.yaml", + "test-request-123", + ] + ) assert result.returncode == 0 assert result.stdout == "Transform test-request-123 deleted\n" - mock_delete_transform.assert_called_once_with('test-request-123') + mock_delete_transform.assert_called_once_with("test-request-123") def test_cancel_transform(script_runner, transform_status_record): - with patch('servicex.app.transforms.ServiceXClient') as mock_servicex: + with patch("servicex.app.transforms.ServiceXClient") as mock_servicex: mock_cancel_transform = AsyncMock(return_value=True) mock_servicex.return_value.cancel_transform = mock_cancel_transform - result = script_runner.run(['servicex', 'transforms', 'cancel', - '-c', 'tests/example_config.yaml', - 'test-request-123']) + result = script_runner.run( + [ + "servicex", + "transforms", + "cancel", + "-c", + "tests/example_config.yaml", + "test-request-123", + ] + ) assert result.returncode == 0 assert result.stdout == "Transform test-request-123 cancelled\n" - mock_cancel_transform.assert_called_once_with('test-request-123') + mock_cancel_transform.assert_called_once_with("test-request-123") diff --git a/tests/conftest.py b/tests/conftest.py index c7ae6d61..772abd12 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -72,7 +72,7 @@ def python_dataset(dummy_parquet_file): result_format=ResultFormat.parquet, sx_adapter=None, # type: ignore config=None, # type: ignore - query_cache=None # type: ignore + query_cache=None, # type: ignore ) # type: ignore def foo(): @@ -188,7 +188,10 @@ def transformed_result_signed_url() -> TransformedResults: submit_time=datetime.now(), data_dir="/foo/bar", file_list=[], - signed_url_list=['https://dummy.junk.io/1.parquet', 'https://dummy.junk.io/2.parquet'], + signed_url_list=[ + "https://dummy.junk.io/1.parquet", + "https://dummy.junk.io/2.parquet", + ], files=2, result_format=ResultFormat.root_ttree, ) @@ -196,10 +199,9 @@ def transformed_result_signed_url() -> TransformedResults: @fixture def dummy_parquet_file(): - data = {'column1': [1, 2, 3, 4], - 'column2': ['A', 'B', 'C', 'D']} + data = {"column1": [1, 2, 3, 4], "column2": ["A", "B", "C", "D"]} df = pd.DataFrame(data) - parquet_file_path = '1.parquet' + parquet_file_path = "1.parquet" df.to_parquet(parquet_file_path, index=False) yield parquet_file_path @@ -210,11 +212,13 @@ def dummy_parquet_file(): @fixture def codegen_list(): - return {'atlasr21': 'http://servicex-code-gen-atlasr21:8000', - 'atlasr22': 'http://servicex-code-gen-atlasr22:8000', - 'atlasxaod': 'http://servicex-code-gen-atlasxaod:8000', - 'cms': 'http://servicex-code-gen-cms:8000', - 'cmssw-5-3-32': 'http://servicex-code-gen-cmssw-5-3-32:8000', - 'python': 'http://servicex-code-gen-python:8000', - 'uproot': 'http://servicex-code-gen-uproot:8000', - 'uproot-raw': 'http://servicex-code-gen-uproot-raw:8000'} + return { + "atlasr21": "http://servicex-code-gen-atlasr21:8000", + "atlasr22": "http://servicex-code-gen-atlasr22:8000", + "atlasxaod": "http://servicex-code-gen-atlasxaod:8000", + "cms": "http://servicex-code-gen-cms:8000", + "cmssw-5-3-32": "http://servicex-code-gen-cmssw-5-3-32:8000", + "python": "http://servicex-code-gen-python:8000", + "uproot": "http://servicex-code-gen-uproot:8000", + "uproot-raw": "http://servicex-code-gen-uproot-raw:8000", + } diff --git a/tests/test_config.py b/tests/test_config.py index e24c1672..f2141eb3 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -32,18 +32,18 @@ from servicex.configuration import Configuration -@patch('servicex.configuration.tempfile.gettempdir', return_value="./mytemp") +@patch("servicex.configuration.tempfile.gettempdir", return_value="./mytemp") def test_config_read(tempdir): # Windows style user name - os.environ['UserName'] = "p_higgs" + os.environ["UserName"] = "p_higgs" c = Configuration.read(config_path="tests/example_config.yaml") assert c.cache_path == "mytemp/servicex_p_higgs" # Reset environment - del os.environ['UserName'] + del os.environ["UserName"] # Linux style user name - os.environ['USER'] = "p_higgs2" + os.environ["USER"] = "p_higgs2" c = Configuration.read(config_path="tests/example_config.yaml") assert c.cache_path == "mytemp/servicex_p_higgs2" @@ -52,17 +52,17 @@ def test_config_read(tempdir): Configuration.read(config_path="invalid.yaml") -@patch('servicex.configuration.tempfile.gettempdir', return_value="./mytemp") +@patch("servicex.configuration.tempfile.gettempdir", return_value="./mytemp") def test_default_cache_path(tempdir): # Windows style user name - os.environ['UserName'] = "p_higgs" + os.environ["UserName"] = "p_higgs" c = Configuration.read(config_path="tests/example_config_no_cache_path.yaml") assert c.cache_path == "mytemp/servicex_p_higgs" - del os.environ['UserName'] + del os.environ["UserName"] # Linux style user name - os.environ['USER'] = "p_higgs" + os.environ["USER"] = "p_higgs" c = Configuration.read(config_path="tests/example_config_no_cache_path.yaml") assert c.cache_path == "mytemp/servicex_p_higgs" - del os.environ['USER'] + del os.environ["USER"] diff --git a/tests/test_databinder.py b/tests/test_databinder.py index c2541253..ffd19f1e 100644 --- a/tests/test_databinder.py +++ b/tests/test_databinder.py @@ -95,16 +95,16 @@ def test_list_of_root_files(): def test_output_format(): spec = basic_spec() - spec['General'] = {'OutputFormat': 'root-ttree'} + spec["General"] = {"OutputFormat": "root-ttree"} ServiceXSpec.model_validate(spec) - spec['General'] = {'OutputFormat': 'parquet'} + spec["General"] = {"OutputFormat": "parquet"} ServiceXSpec.model_validate(spec) - spec['General'] = {'OutputFormat': OutputFormat.root_ttree} + spec["General"] = {"OutputFormat": OutputFormat.root_ttree} ServiceXSpec.model_validate(spec) - spec['General'] = {'OutputFormat': OutputFormat.parquet} + spec["General"] = {"OutputFormat": OutputFormat.parquet} ServiceXSpec.model_validate(spec) with pytest.raises(ValidationError): - spec['General'] = {'OutputFormat': 'root-tree'} + spec["General"] = {"OutputFormat": "root-tree"} ServiceXSpec.model_validate(spec) @@ -155,7 +155,9 @@ def test_dataset_rucio_did_numfiles(): samples=[ { "Name": "sampleA", - "Dataset": dataset.Rucio("user.ivukotic:user.ivukotic.single_top_tW__nominal"), + "Dataset": dataset.Rucio( + "user.ivukotic:user.ivukotic.single_top_tW__nominal" + ), "NFiles": 12, "Query": "a", } @@ -178,8 +180,9 @@ def test_dataset_zerofiles(): samples=[ { "Name": "sampleA", - "Dataset": - dataset.Rucio("user.ivukotic:user.ivukotic.single_top_tW__nominal"), + "Dataset": dataset.Rucio( + "user.ivukotic:user.ivukotic.single_top_tW__nominal" + ), "NFiles": 0, "Query": "a", } @@ -193,9 +196,10 @@ def test_dataset_zerofiles(): samples=[ { "Name": "sampleA", - "Dataset": - dataset.Rucio("user.ivukotic:user.ivukotic.single_top_tW__nominal", - num_files=0), + "Dataset": dataset.Rucio( + "user.ivukotic:user.ivukotic.single_top_tW__nominal", + num_files=0, + ), "Query": "a", } ] @@ -208,10 +212,12 @@ def test_dataset_zerofiles(): samples=[ { "Name": "sampleA", - "Dataset": dataset.FileList([ - "root://eospublic.cern.ch//file1.root", - "root://eospublic.cern.ch//file2.root", - ]), + "Dataset": dataset.FileList( + [ + "root://eospublic.cern.ch//file1.root", + "root://eospublic.cern.ch//file2.root", + ] + ), "Query": "a", } ] @@ -221,28 +227,32 @@ def test_dataset_zerofiles(): def test_cernopendata(): - spec = ServiceXSpec.model_validate({ - "Sample": [ - { - "Name": "sampleA", - "Dataset": dataset.CERNOpenData(1507), - "Function": "a" - } - ] - }) + spec = ServiceXSpec.model_validate( + { + "Sample": [ + { + "Name": "sampleA", + "Dataset": dataset.CERNOpenData(1507), + "Function": "a", + } + ] + } + ) assert spec.Sample[0].dataset_identifier.did == "cernopendata://1507" def test_xrootd(): - spec = ServiceXSpec.model_validate({ - "Sample": [ - { - "Name": "sampleA", - "Dataset": dataset.XRootD('root://blablabla/*/?.root'), - "Function": "a" - } - ] - }) + spec = ServiceXSpec.model_validate( + { + "Sample": [ + { + "Name": "sampleA", + "Dataset": dataset.XRootD("root://blablabla/*/?.root"), + "Function": "a", + } + ] + } + ) assert spec.Sample[0].dataset_identifier.did == "xrootd://root://blablabla/*/?.root" @@ -278,6 +288,7 @@ def test_invalid_dataset_identifier(): def test_submit_mapping(transformed_result, codegen_list): from servicex import deliver + spec = { "General": { "Codegen": "uproot-raw", @@ -287,68 +298,80 @@ def test_submit_mapping(transformed_result, codegen_list): "Name": "sampleA", "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", "Query": "[{'treename': 'nominal'}]", - "Codegen": "uproot-raw" + "Codegen": "uproot-raw", } - ] + ], } - with patch('servicex.dataset_group.DatasetGroup.as_files', - return_value=[transformed_result]), \ - patch('servicex.servicex_client.ServiceXClient.get_code_generators', - return_value=codegen_list): - results = deliver(spec, config_path='tests/example_config.yaml') - assert list(results['sampleA']) == ['1.parquet'] + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], + ), patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ): + results = deliver(spec, config_path="tests/example_config.yaml") + assert list(results["sampleA"]) == ["1.parquet"] def test_submit_mapping_signed_urls(transformed_result_signed_url, codegen_list): from servicex import deliver + spec = { - "General": { - "Delivery": "URLs" - }, + "General": {"Delivery": "URLs"}, "Sample": [ { "Name": "sampleA", "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", "Query": "[{'treename': 'nominal'}]", - "Codegen": "uproot-raw" + "Codegen": "uproot-raw", } - ] + ], } - with patch('servicex.dataset_group.DatasetGroup.as_signed_urls', - return_value=[transformed_result_signed_url]), \ - patch('servicex.servicex_client.ServiceXClient.get_code_generators', - return_value=codegen_list): - results = deliver(spec, config_path='tests/example_config.yaml') - assert list(results['sampleA']) == ['https://dummy.junk.io/1.parquet', - 'https://dummy.junk.io/2.parquet'] + with patch( + "servicex.dataset_group.DatasetGroup.as_signed_urls", + return_value=[transformed_result_signed_url], + ), patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ): + results = deliver(spec, config_path="tests/example_config.yaml") + assert list(results["sampleA"]) == [ + "https://dummy.junk.io/1.parquet", + "https://dummy.junk.io/2.parquet", + ] def test_submit_mapping_failure(transformed_result, codegen_list): from servicex import deliver + spec = { "Sample": [ { "Name": "sampleA", "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", "Query": "[{'treename': 'nominal'}]", - "Codegen": "uproot-raw" + "Codegen": "uproot-raw", } ] } - with patch('servicex.dataset_group.DatasetGroup.as_files', - return_value=[ServiceXException("dummy")]), \ - patch('servicex.servicex_client.ServiceXClient.get_code_generators', - return_value=codegen_list): - results = deliver(spec, config_path='tests/example_config.yaml') + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[ServiceXException("dummy")], + ), patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ): + results = deliver(spec, config_path="tests/example_config.yaml") assert len(results) == 1 with pytest.raises(ReturnValueException): # should expect an exception to be thrown on access - for _ in results['sampleA']: + for _ in results["sampleA"]: pass def test_submit_mapping_failure_signed_urls(codegen_list): from servicex import deliver + spec = { "General": {"Delivery": "URLs"}, "Sample": [ @@ -356,28 +379,35 @@ def test_submit_mapping_failure_signed_urls(codegen_list): "Name": "sampleA", "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", "Query": "[{'treename': 'nominal'}]", - "Codegen": "uproot-raw" + "Codegen": "uproot-raw", } - ] + ], } - with patch('servicex.dataset_group.DatasetGroup.as_signed_urls', - return_value=[ServiceXException("dummy")]), \ - patch('servicex.servicex_client.ServiceXClient.get_code_generators', - return_value=codegen_list): - results = deliver(spec, config_path='tests/example_config.yaml', return_exceptions=False) + with patch( + "servicex.dataset_group.DatasetGroup.as_signed_urls", + return_value=[ServiceXException("dummy")], + ), patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ): + results = deliver( + spec, config_path="tests/example_config.yaml", return_exceptions=False + ) assert len(results) == 1 with pytest.raises(ReturnValueException): # should expect an exception to be thrown on access - for _ in results['sampleA']: + for _ in results["sampleA"]: pass def test_yaml(tmp_path): from servicex.servicex_client import _load_ServiceXSpec from servicex.dataset import FileList, Rucio, CERNOpenData + # Nominal paths with open(path := (tmp_path / "python.yaml"), "w") as f: - f.write(""" + f.write( + """ General: OutputFormat: root-ttree Delivery: LocalCache @@ -409,30 +439,38 @@ def run_query(input_filenames=None): Query: !UprootRaw '[{"treename": "nominal"}]' - Name: ttH7 Dataset: !XRootD root://eosatlas.cern.ch//eos/atlas/path/*/file.root -""") +""" + ) f.flush() result = _load_ServiceXSpec(path) - assert type(result.Sample[0].Query).__name__ == 'PythonFunction' - assert type(result.Sample[1].Query).__name__ == 'FuncADLQuery_Uproot' - assert type(result.Sample[2].Query).__name__ == 'UprootRawQuery' + assert type(result.Sample[0].Query).__name__ == "PythonFunction" + assert type(result.Sample[1].Query).__name__ == "FuncADLQuery_Uproot" + assert type(result.Sample[2].Query).__name__ == "UprootRawQuery" assert isinstance(result.Sample[3].dataset_identifier, Rucio) - assert (result.Sample[3].dataset_identifier.did - == 'rucio://user.kchoi:user.kchoi.fcnc_tHq_ML.ttH.v112') + assert ( + result.Sample[3].dataset_identifier.did + == "rucio://user.kchoi:user.kchoi.fcnc_tHq_ML.ttH.v112" + ) assert isinstance(result.Sample[4].dataset_identifier, FileList) - assert (result.Sample[4].dataset_identifier.files - == ["/path/to/file1.root", "/path/to/file2.root"]) + assert result.Sample[4].dataset_identifier.files == [ + "/path/to/file1.root", + "/path/to/file2.root", + ] assert isinstance(result.Sample[5].dataset_identifier, CERNOpenData) - assert result.Sample[5].dataset_identifier.did == 'cernopendata://1507' - assert (result.Sample[6].dataset_identifier.did - == 'xrootd://root://eosatlas.cern.ch//eos/atlas/path/*/file.root') + assert result.Sample[5].dataset_identifier.did == "cernopendata://1507" + assert ( + result.Sample[6].dataset_identifier.did + == "xrootd://root://eosatlas.cern.ch//eos/atlas/path/*/file.root" + ) # Path from string result2 = _load_ServiceXSpec(str(path)) - assert type(result2.Sample[0].Query).__name__ == 'PythonFunction' + assert type(result2.Sample[0].Query).__name__ == "PythonFunction" # Python syntax error with open(path := (tmp_path / "python.yaml"), "w") as f: - f.write(""" + f.write( + """ General: OutputFormat: root-ttree Delivery: LocalCache @@ -443,14 +481,16 @@ def run_query(input_filenames=None): Query: !PythonFunction | def run_query(input_filenames=None): i ==== 18 # syntax error -""") +""" + ) f.flush() with pytest.raises(SyntaxError): _load_ServiceXSpec(path) # Duplicate samples with different names but same dataset and query with open(path := (tmp_path / "python.yaml"), "w") as f: - f.write(""" + f.write( + """ General: OutputFormat: root-ttree Delivery: LocalCache @@ -466,7 +506,8 @@ def run_query(input_filenames=None): - Name: ttH5 Dataset: !FileList ["/path/to/file1.root", "/path/to/file2.root"] Query: !UprootRaw '[{"treename": "nominal"}]' - """) + """ + ) f.flush() with pytest.raises(RuntimeError): _load_ServiceXSpec(path) @@ -474,7 +515,8 @@ def run_query(input_filenames=None): # Duplicate samples with different names but same datasets (multiple) and query # change the order of the datasets with open(path := (tmp_path / "python.yaml"), "w") as f: - f.write(""" + f.write( + """ General: OutputFormat: root-ttree Delivery: LocalCache @@ -490,7 +532,8 @@ def run_query(input_filenames=None): - Name: ttH5 Dataset: !FileList ["/path/to/file1.root", "/path/to/file2.root"] Query: !UprootRaw '[{"treename": "nominal"}]' - """) + """ + ) f.flush() with pytest.raises(RuntimeError): _load_ServiceXSpec(path) @@ -498,7 +541,8 @@ def run_query(input_filenames=None): # Samples with different names but same datasets(multiple) and query # different NFiles with open(path := (tmp_path / "python.yaml"), "w") as f: - f.write(""" + f.write( + """ General: OutputFormat: root-ttree Delivery: LocalCache @@ -512,16 +556,18 @@ def run_query(input_filenames=None): NFiles: 1 Dataset: !FileList ["/path/to/file1.root", "/path/to/file2.root"] Query: !UprootRaw '[{"treename": "nominal"}]' - """) + """ + ) f.flush() result = _load_ServiceXSpec(path) - assert type(result.Sample[0].Query).__name__ == 'UprootRawQuery' - assert type(result.Sample[1].Query).__name__ == 'UprootRawQuery' + assert type(result.Sample[0].Query).__name__ == "UprootRawQuery" + assert type(result.Sample[1].Query).__name__ == "UprootRawQuery" # Samples with different names but same datasets(multiple) and # different queries with open(path := (tmp_path / "python.yaml"), "w") as f: - f.write(""" + f.write( + """ General: OutputFormat: root-ttree Delivery: LocalCache @@ -533,24 +579,30 @@ def run_query(input_filenames=None): - Name: ttH6 Dataset: !FileList ["/path/to/file1.root", "/path/to/file2.root"] Query: !UprootRaw '[{"treename": "CollectionTree"}]' - """) + """ + ) f.flush() result = _load_ServiceXSpec(path) - assert type(result.Sample[0].Query).__name__ == 'UprootRawQuery' - assert type(result.Sample[1].Query).__name__ == 'UprootRawQuery' + assert type(result.Sample[0].Query).__name__ == "UprootRawQuery" + assert type(result.Sample[1].Query).__name__ == "UprootRawQuery" def test_yaml_include(tmp_path): from servicex.servicex_client import _load_ServiceXSpec + # Create two files, one has definitions for the other and is included by it - with open(tmp_path / "definitions.yaml", "w") as f1, \ - open(path2 := (tmp_path / "parent.yaml"), "w") as f2: - f1.write(""" + with open(tmp_path / "definitions.yaml", "w") as f1, open( + path2 := (tmp_path / "parent.yaml"), "w" + ) as f2: + f1.write( + """ - &DEF_query !PythonFunction | def run_query(input_filenames=None): return [] -""") - f2.write(""" +""" + ) + f2.write( + """ Definitions: !include definitions.yaml @@ -562,7 +614,8 @@ def run_query(input_filenames=None): - Name: ttH RucioDID: user.kchoi:user.kchoi.fcnc_tHq_ML.ttH.v11 Query: *DEF_query -""") +""" + ) f1.flush() f2.flush() _load_ServiceXSpec(path2) @@ -571,84 +624,106 @@ def run_query(input_filenames=None): def test_funcadl_query(transformed_result, codegen_list): from servicex import deliver from servicex.query import FuncADL_Uproot # type: ignore - spec = ServiceXSpec.model_validate({ - "Sample": [ - { - "Name": "sampleA", - "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", - "Query": FuncADL_Uproot().FromTree("nominal") - .Select(lambda e: {"lep_pt": e["lep_pt"]}) - } - ] - }) - with patch('servicex.dataset_group.DatasetGroup.as_files', - return_value=[transformed_result]), \ - patch('servicex.servicex_client.ServiceXClient.get_code_generators', - return_value=codegen_list): - deliver(spec, config_path='tests/example_config.yaml') + + spec = ServiceXSpec.model_validate( + { + "Sample": [ + { + "Name": "sampleA", + "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", + "Query": FuncADL_Uproot() + .FromTree("nominal") + .Select(lambda e: {"lep_pt": e["lep_pt"]}), + } + ] + } + ) + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], + ), patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ): + deliver(spec, config_path="tests/example_config.yaml") def test_query_with_codegen_override(transformed_result, codegen_list): from servicex import deliver from servicex.query import FuncADL_Uproot # type: ignore + # first, with General override - spec = ServiceXSpec.model_validate({ - "General": { - "Codegen": "does-not-exist" - }, - "Sample": [ - { - "Name": "sampleA", - "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", - "Query": FuncADL_Uproot().FromTree("nominal") - .Select(lambda e: {"lep_pt": e["lep_pt"]}) - } - ] - }) - with patch('servicex.dataset_group.DatasetGroup.as_files', - return_value=[transformed_result]), \ - patch('servicex.servicex_client.ServiceXClient.get_code_generators', - return_value=codegen_list): + spec = ServiceXSpec.model_validate( + { + "General": {"Codegen": "does-not-exist"}, + "Sample": [ + { + "Name": "sampleA", + "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", + "Query": FuncADL_Uproot() + .FromTree("nominal") + .Select(lambda e: {"lep_pt": e["lep_pt"]}), + } + ], + } + ) + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], + ), patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ): with pytest.raises(NameError) as excinfo: - deliver(spec, config_path='tests/example_config.yaml') + deliver(spec, config_path="tests/example_config.yaml") # if this has propagated correctly, the override worked - assert excinfo.value.args[0].startswith('does-not-exist') + assert excinfo.value.args[0].startswith("does-not-exist") # second, with sample-level override - spec = ServiceXSpec.model_validate({ - "Sample": [ - { - "Name": "sampleA", - "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", - "Query": FuncADL_Uproot().FromTree("nominal") - .Select(lambda e: {"lep_pt": e["lep_pt"]}), - "Codegen": "does-not-exist" - } - ] - }) - with patch('servicex.dataset_group.DatasetGroup.as_files', - return_value=[transformed_result]), \ - patch('servicex.servicex_client.ServiceXClient.get_code_generators', - return_value=codegen_list): + spec = ServiceXSpec.model_validate( + { + "Sample": [ + { + "Name": "sampleA", + "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", + "Query": FuncADL_Uproot() + .FromTree("nominal") + .Select(lambda e: {"lep_pt": e["lep_pt"]}), + "Codegen": "does-not-exist", + } + ] + } + ) + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], + ), patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ): with pytest.raises(NameError) as excinfo: - deliver(spec, config_path='tests/example_config.yaml') + deliver(spec, config_path="tests/example_config.yaml") # if this has propagated correctly, the override worked - assert excinfo.value.args[0].startswith('does-not-exist') + assert excinfo.value.args[0].startswith("does-not-exist") def test_databinder_load_dict(): from servicex.query import FuncADL_Uproot # type: ignore from servicex.servicex_client import _load_ServiceXSpec - _load_ServiceXSpec({ - "Sample": [ - { - "Name": "sampleA", - "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", - "Query": FuncADL_Uproot().FromTree("nominal") - .Select(lambda e: {"lep_pt": e["lep_pt"]}) - } - ] - }) + + _load_ServiceXSpec( + { + "Sample": [ + { + "Name": "sampleA", + "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", + "Query": FuncADL_Uproot() + .FromTree("nominal") + .Select(lambda e: {"lep_pt": e["lep_pt"]}), + } + ] + } + ) def test_python_query(transformed_result, codegen_list): @@ -661,103 +736,136 @@ def run_query(input_filenames=None): query = PythonFunction().with_uproot_function(run_query) - spec = ServiceXSpec.model_validate({ - "Sample": [ - { - "Name": "sampleA", - "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", - "Query": query - } - ] - }) - with patch('servicex.dataset_group.DatasetGroup.as_files', - return_value=[transformed_result]), \ - patch('servicex.servicex_client.ServiceXClient.get_code_generators', - return_value=codegen_list): - deliver(spec, config_path='tests/example_config.yaml') + spec = ServiceXSpec.model_validate( + { + "Sample": [ + { + "Name": "sampleA", + "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", + "Query": query, + } + ] + } + ) + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], + ), patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ): + deliver(spec, config_path="tests/example_config.yaml") def test_uproot_raw_query(transformed_result, codegen_list): from servicex import deliver from servicex.query import UprootRaw # type: ignore - spec = ServiceXSpec.model_validate({ - "Sample": [ - { - "Name": "sampleA", - "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", - "Query": UprootRaw([{"treename": "nominal"}]) - } - ] - }) - with patch('servicex.dataset_group.DatasetGroup.as_files', - return_value=[transformed_result]), \ - patch('servicex.servicex_client.ServiceXClient.get_code_generators', - return_value=codegen_list): - deliver(spec, config_path='tests/example_config.yaml') + + spec = ServiceXSpec.model_validate( + { + "Sample": [ + { + "Name": "sampleA", + "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", + "Query": UprootRaw([{"treename": "nominal"}]), + } + ] + } + ) + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], + ), patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ): + deliver(spec, config_path="tests/example_config.yaml") def test_uproot_raw_query_parquet(transformed_result, codegen_list): from servicex import deliver from servicex.query import UprootRaw # type: ignore - spec = ServiceXSpec.model_validate({ - "General": { - "OutputFormat": "parquet" - }, - "Sample": [ - { - "Name": "sampleA", - "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", - "Query": UprootRaw([{"treename": "nominal"}]) - } - ] - }) + + spec = ServiceXSpec.model_validate( + { + "General": {"OutputFormat": "parquet"}, + "Sample": [ + { + "Name": "sampleA", + "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", + "Query": UprootRaw([{"treename": "nominal"}]), + } + ], + } + ) print(spec) - with patch('servicex.dataset_group.DatasetGroup.as_files', - return_value=[transformed_result]), \ - patch('servicex.servicex_client.ServiceXClient.get_code_generators', - return_value=codegen_list): - deliver(spec, config_path='tests/example_config.yaml') + with patch( + "servicex.dataset_group.DatasetGroup.as_files", + return_value=[transformed_result], + ), patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ): + deliver(spec, config_path="tests/example_config.yaml") def test_generic_query(codegen_list): from servicex.servicex_client import ServiceXClient - spec = ServiceXSpec.model_validate({ - "General": { - "Codegen": "uproot-raw", - }, - "Sample": [ - { - "Name": "sampleA", - "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", - "Query": "[{'treename': 'nominal'}]" - } - ] - }) - with patch('servicex.servicex_client.ServiceXClient.get_code_generators', - return_value=codegen_list): - sx = ServiceXClient(config_path='tests/example_config.yaml') - query = sx.generic_query(dataset_identifier=spec.Sample[0].RucioDID, - codegen=spec.General.Codegen, query=spec.Sample[0].Query) + + spec = ServiceXSpec.model_validate( + { + "General": { + "Codegen": "uproot-raw", + }, + "Sample": [ + { + "Name": "sampleA", + "RucioDID": "user.ivukotic:user.ivukotic.single_top_tW__nominal", + "Query": "[{'treename': 'nominal'}]", + } + ], + } + ) + with patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ): + sx = ServiceXClient(config_path="tests/example_config.yaml") + query = sx.generic_query( + dataset_identifier=spec.Sample[0].RucioDID, + codegen=spec.General.Codegen, + query=spec.Sample[0].Query, + ) assert query.generate_selection_string() == "[{'treename': 'nominal'}]" - query = sx.generic_query(dataset_identifier=spec.Sample[0].RucioDID, - result_format=spec.General.OutputFormat.to_ResultFormat(), - codegen=spec.General.Codegen, query=spec.Sample[0].Query) - assert query.result_format == 'root-file' + query = sx.generic_query( + dataset_identifier=spec.Sample[0].RucioDID, + result_format=spec.General.OutputFormat.to_ResultFormat(), + codegen=spec.General.Codegen, + query=spec.Sample[0].Query, + ) + assert query.result_format == "root-file" query.query_string_generator = None with pytest.raises(RuntimeError): query.generate_selection_string() with pytest.raises(ValueError): - query = sx.generic_query(dataset_identifier=spec.Sample[0].RucioDID, - codegen=spec.General.Codegen, query=5) + query = sx.generic_query( + dataset_identifier=spec.Sample[0].RucioDID, + codegen=spec.General.Codegen, + query=5, + ) with pytest.raises(NameError): - query = sx.generic_query(dataset_identifier=spec.Sample[0].RucioDID, - codegen='nonsense', query=spec.Sample[0].Query) + query = sx.generic_query( + dataset_identifier=spec.Sample[0].RucioDID, + codegen="nonsense", + query=spec.Sample[0].Query, + ) with pytest.raises(RuntimeError): # no codegen specified by generic class - query = sx.generic_query(dataset_identifier=spec.Sample[0].RucioDID, - query=spec.Sample[0].Query) + query = sx.generic_query( + dataset_identifier=spec.Sample[0].RucioDID, query=spec.Sample[0].Query + ) def test_entrypoint_import(): - """ This will check that we have at least the Python transformer defined in servicex.query """ + """This will check that we have at least the Python transformer defined in servicex.query""" from servicex.query import PythonFunction # type: ignore # noqa: F401 diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 31e3a098..2ef18283 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -49,9 +49,14 @@ async def test_as_signed_urls_happy(transformed_result): # Test when display_progress is True and provided_progress is None did = FileListDataset("/foo/bar/baz.root") - dataset = Query(dataset_identifier=did, codegen="uproot", - title="", config=None, - sx_adapter=None, query_cache=None) + dataset = Query( + dataset_identifier=did, + codegen="uproot", + title="", + config=None, + sx_adapter=None, + query_cache=None, + ) dataset.submit_and_download = AsyncMock() dataset.submit_and_download.return_value = transformed_result @@ -63,23 +68,34 @@ async def test_as_signed_urls_happy(transformed_result): async def test_as_signed_urls_happy_dataset_group(transformed_result): # Test when display_progress is True and provided_progress is None did = FileListDataset("/foo/bar/baz.root") - dataset = Query(dataset_identifier=did, codegen="uproot", - title="", config=None, - sx_adapter=None, query_cache=None) + dataset = Query( + dataset_identifier=did, + codegen="uproot", + title="", + config=None, + sx_adapter=None, + query_cache=None, + ) dataset.submit_and_download = AsyncMock() dataset.submit_and_download.return_value = transformed_result - result = dataset.as_signed_urls(display_progress=True, provided_progress=None, - dataset_group=True) + result = dataset.as_signed_urls( + display_progress=True, provided_progress=None, dataset_group=True + ) assert result == transformed_result @pytest.mark.asyncio async def test_as_files_happy(transformed_result): did = FileListDataset("/foo/bar/baz.root") - dataset = Query(dataset_identifier=did, codegen="uproot", - title="", config=None, - sx_adapter=None, query_cache=None) + dataset = Query( + dataset_identifier=did, + codegen="uproot", + title="", + config=None, + sx_adapter=None, + query_cache=None, + ) dataset.submit_and_download = AsyncMock() dataset.submit_and_download.return_value = transformed_result @@ -110,8 +126,10 @@ async def test_download_files(python_dataset): python_dataset.configuration = config minio_mock.download_file.return_value = Path("/path/to/downloaded_file") minio_mock.get_signed_url.return_value = Path("http://example.com/signed_url") - minio_mock.list_bucket.return_value = [Mock(filename="file1.txt"), - Mock(filename="file2.txt")] + minio_mock.list_bucket.return_value = [ + Mock(filename="file1.txt"), + Mock(filename="file2.txt"), + ] progress_mock = Mock() python_dataset.minio_polling_interval = 0 @@ -136,8 +154,10 @@ async def test_download_files_with_signed_urls(python_dataset): python_dataset.configuration = config minio_mock.download_file.return_value = "/path/to/downloaded_file" minio_mock.get_signed_url.return_value = "http://example.com/signed_url" - minio_mock.list_bucket.return_value = [Mock(filename="file1.txt"), - Mock(filename="file2.txt")] + minio_mock.list_bucket.return_value = [ + Mock(filename="file1.txt"), + Mock(filename="file2.txt"), + ] progress_mock = Mock() python_dataset.minio_polling_interval = 0 @@ -150,7 +170,10 @@ async def test_download_files_with_signed_urls(python_dataset): ) minio_mock.download_file.assert_not_called() minio_mock.get_signed_url.assert_called() - assert result_uris == ["http://example.com/signed_url", "http://example.com/signed_url"] + assert result_uris == [ + "http://example.com/signed_url", + "http://example.com/signed_url", + ] @pytest.mark.asyncio @@ -161,8 +184,9 @@ async def test_transform_status_listener_happy(python_dataset): status = Mock(files=10, files_completed=5, files_failed=1, status=Status.complete) python_dataset.current_status = status python_dataset.retrieve_current_transform_status = AsyncMock(return_value=status) - await python_dataset.transform_status_listener(progress, progress_task, "mock_title", - download_task, "mock_title") + await python_dataset.transform_status_listener( + progress, progress_task, "mock_title", download_task, "mock_title" + ) python_dataset.retrieve_current_transform_status.assert_awaited_once() # progress.update.assert_called_with(progress_task, total=10) @@ -181,9 +205,12 @@ async def test_transform_status_listener_cancelled(python_dataset): python_dataset.current_status = status python_dataset.retrieve_current_transform_status = AsyncMock(return_value=status) with pytest.raises(ServiceXException, match=r"Request .*was canceled"): - with patch("servicex.app.transforms.create_kibana_link_parameters") as mock_link: - await python_dataset.transform_status_listener(progress, progress_task, "mock_title", - download_task, "mock_title") + with patch( + "servicex.app.transforms.create_kibana_link_parameters" + ) as mock_link: + await python_dataset.transform_status_listener( + progress, progress_task, "mock_title", download_task, "mock_title" + ) mock_link.assert_called_once() python_dataset.retrieve_current_transform_status.assert_awaited_once() assert python_dataset.files_completed == 5 @@ -191,7 +218,9 @@ async def test_transform_status_listener_cancelled(python_dataset): @pytest.mark.asyncio -async def test_retrieve_current_transform_status_status_none(python_dataset, completed_status): +async def test_retrieve_current_transform_status_status_none( + python_dataset, completed_status +): with tempfile.TemporaryDirectory() as temp_dir: python_dataset.current_status = None python_dataset.servicex = AsyncMock() @@ -203,8 +232,12 @@ async def test_retrieve_current_transform_status_status_none(python_dataset, com await python_dataset.retrieve_current_transform_status() assert python_dataset.current_status == completed_status - result = Path(os.path.join(Path(python_dataset.configuration.cache_path), - completed_status.request_id)) + result = Path( + os.path.join( + Path(python_dataset.configuration.cache_path), + completed_status.request_id, + ) + ) python_dataset.download_path = result assert python_dataset.minio is not None assert isinstance(python_dataset.minio, MinioAdapter) @@ -212,7 +245,9 @@ async def test_retrieve_current_transform_status_status_none(python_dataset, com @pytest.mark.asyncio -async def test_retrieve_current_transform_status_status_not(python_dataset, completed_status): +async def test_retrieve_current_transform_status_status_not( + python_dataset, completed_status +): with tempfile.TemporaryDirectory() as temp_dir: python_dataset.servicex = AsyncMock() python_dataset.servicex.get_transform_status.return_value = completed_status @@ -223,8 +258,12 @@ async def test_retrieve_current_transform_status_status_not(python_dataset, comp await python_dataset.retrieve_current_transform_status() assert python_dataset.current_status == completed_status - result = Path(os.path.join(Path(python_dataset.configuration.cache_path), - completed_status.request_id)) + result = Path( + os.path.join( + Path(python_dataset.configuration.cache_path), + completed_status.request_id, + ) + ) python_dataset.download_path = result assert python_dataset.minio is not None assert isinstance(python_dataset.minio, MinioAdapter) @@ -254,14 +293,18 @@ async def test_submit_and_download_cache_miss(python_dataset, completed_status): signed_urls_only = False expandable_progress = ExpandableProgress() - result = await python_dataset.submit_and_download(signed_urls_only, expandable_progress) + result = await python_dataset.submit_and_download( + signed_urls_only, expandable_progress + ) assert result is not None assert result.request_id == "b8c508d0-ccf2-4deb-a1f7-65c839eebabf" cache.close() @pytest.mark.asyncio -async def test_submit_and_download_cache_miss_overall_progress(python_dataset, completed_status): +async def test_submit_and_download_cache_miss_overall_progress( + python_dataset, completed_status +): with tempfile.TemporaryDirectory() as temp_dir: python_dataset.current_status = None python_dataset.servicex = AsyncMock() @@ -284,8 +327,9 @@ async def test_submit_and_download_cache_miss_overall_progress(python_dataset, c expandable_progress = ExpandableProgress(overall_progress=True) dataset_group = True - result = await python_dataset.submit_and_download(signed_urls_only, expandable_progress, - dataset_group) + result = await python_dataset.submit_and_download( + signed_urls_only, expandable_progress, dataset_group + ) assert result is not None assert result.request_id == "b8c508d0-ccf2-4deb-a1f7-65c839eebabf" cache.close() @@ -300,9 +344,11 @@ async def test_submit_and_download_no_result_format(python_dataset, completed_st cache = QueryCache(config) python_dataset.cache = cache python_dataset.configuration = config - with pytest.raises(ValueError, - match=r"Unable to determine the result file format. " - r"Use set_result_format method"): + with pytest.raises( + ValueError, + match=r"Unable to determine the result file format. " + r"Use set_result_format method", + ): python_dataset.result_format = None python_dataset.servicex = AsyncMock() python_dataset.cache.get_transform_by_hash = Mock() @@ -314,7 +360,9 @@ async def test_submit_and_download_no_result_format(python_dataset, completed_st python_dataset.download_files.return_value = [] signed_urls_only = False expandable_progress = ExpandableProgress() - await python_dataset.submit_and_download(signed_urls_only, expandable_progress) + await python_dataset.submit_and_download( + signed_urls_only, expandable_progress + ) cache.close() @@ -324,7 +372,9 @@ def test_set_title(python_dataset): @pytest.mark.asyncio -async def test_submit_and_download_cache_miss_signed_urls_only(python_dataset, completed_status): +async def test_submit_and_download_cache_miss_signed_urls_only( + python_dataset, completed_status +): with tempfile.TemporaryDirectory() as temp_dir: python_dataset.current_status = None python_dataset.servicex = AsyncMock() @@ -345,14 +395,18 @@ async def test_submit_and_download_cache_miss_signed_urls_only(python_dataset, c signed_urls_only = True expandable_progress = ExpandableProgress() - result = await python_dataset.submit_and_download(signed_urls_only, expandable_progress) + result = await python_dataset.submit_and_download( + signed_urls_only, expandable_progress + ) assert result is not None assert result.request_id == "b8c508d0-ccf2-4deb-a1f7-65c839eebabf" cache.close() @pytest.mark.asyncio -async def test_submit_and_download_cache_files_request_urls(python_dataset, transformed_result): +async def test_submit_and_download_cache_files_request_urls( + python_dataset, transformed_result +): with tempfile.TemporaryDirectory() as temp_dir: python_dataset.current_status = None python_dataset.servicex = AsyncMock() @@ -363,20 +417,28 @@ async def test_submit_and_download_cache_files_request_urls(python_dataset, tran python_dataset.servicex = AsyncMock() python_dataset.cache.get_transform_by_hash = Mock() python_dataset.cache.get_transform_by_hash.return_value = transformed_result - status = Mock(files=10, files_completed=5, files_failed=1, status=Status.complete) + status = Mock( + files=10, files_completed=5, files_failed=1, status=Status.complete + ) python_dataset.current_status = status - python_dataset.retrieve_current_transform_status = AsyncMock(return_value=status) + python_dataset.retrieve_current_transform_status = AsyncMock( + return_value=status + ) signed_urls_only = True expandable_progress = ExpandableProgress() - result = await python_dataset.submit_and_download(signed_urls_only, expandable_progress) + result = await python_dataset.submit_and_download( + signed_urls_only, expandable_progress + ) assert result is not None assert result.request_id == transformed_result.request_id cache.close() @pytest.mark.asyncio -async def test_submit_and_download_cache_urls_request_files(python_dataset, transformed_result): +async def test_submit_and_download_cache_urls_request_files( + python_dataset, transformed_result +): with tempfile.TemporaryDirectory() as temp_dir: python_dataset.current_status = None python_dataset.servicex = AsyncMock() @@ -389,13 +451,19 @@ async def test_submit_and_download_cache_urls_request_files(python_dataset, tran transformed_result.signed_url_list = ["a.b.c.com"] transformed_result.file_list = [] python_dataset.cache.get_transform_by_hash.return_value = transformed_result - status = Mock(files=10, files_completed=5, files_failed=1, status=Status.complete) + status = Mock( + files=10, files_completed=5, files_failed=1, status=Status.complete + ) python_dataset.current_status = status - python_dataset.retrieve_current_transform_status = AsyncMock(return_value=status) + python_dataset.retrieve_current_transform_status = AsyncMock( + return_value=status + ) signed_urls_only = False expandable_progress = ExpandableProgress() - result = await python_dataset.submit_and_download(signed_urls_only, expandable_progress) + result = await python_dataset.submit_and_download( + signed_urls_only, expandable_progress + ) assert result is not None assert result.request_id == transformed_result.request_id cache.close() @@ -429,7 +497,9 @@ async def test_network_loss(python_dataset, transformed_result): signed_urls_only = False expandable_progress = ExpandableProgress() - result = await python_dataset.submit_and_download(signed_urls_only, expandable_progress) + result = await python_dataset.submit_and_download( + signed_urls_only, expandable_progress + ) assert result is not None assert result.request_id == "123-45-6789" cache.close() @@ -437,8 +507,8 @@ async def test_network_loss(python_dataset, transformed_result): @pytest.mark.asyncio async def test_submit_and_download_get_request_id_from_previous_submitted_request( - python_dataset, - completed_status): + python_dataset, completed_status +): with tempfile.TemporaryDirectory() as temp_dir: python_dataset.current_status = None python_dataset.servicex = AsyncMock() @@ -455,10 +525,14 @@ async def test_submit_and_download_get_request_id_from_previous_submitted_reques python_dataset.download_files = AsyncMock() python_dataset.download_files.return_value = [] python_dataset.cache.is_transform_request_submitted = Mock(return_value=True) - python_dataset.cache.get_transform_request_id = Mock(return_value="b8c508d0-ccf2-4deb-a1f7-65c839eebabf") # noqa + python_dataset.cache.get_transform_request_id = Mock( + return_value="b8c508d0-ccf2-4deb-a1f7-65c839eebabf" + ) # noqa signed_urls_only = True expandable_progress = ExpandableProgress() - result = await python_dataset.submit_and_download(signed_urls_only, expandable_progress) + result = await python_dataset.submit_and_download( + signed_urls_only, expandable_progress + ) assert result is not None assert result.request_id == "b8c508d0-ccf2-4deb-a1f7-65c839eebabf" diff --git a/tests/test_dataset_group.py b/tests/test_dataset_group.py index 5d2aa222..53c54646 100644 --- a/tests/test_dataset_group.py +++ b/tests/test_dataset_group.py @@ -50,8 +50,9 @@ async def test_as_signed_urls(mocker, transformed_result): ds1.servicex._get_authorization = AsyncMock() ds2 = mocker.Mock() - ds2.as_signed_urls_async = AsyncMock(return_value=transformed_result.model_copy( - update={"request_id": "98-765-432"})) + ds2.as_signed_urls_async = AsyncMock( + return_value=transformed_result.model_copy(update={"request_id": "98-765-432"}) + ) group = DatasetGroup([ds1, ds2]) results = await group.as_signed_urls_async() @@ -68,8 +69,9 @@ async def test_as_files(mocker, transformed_result): ds1.servicex._get_authorization = AsyncMock() ds2 = mocker.Mock() - ds2.as_files_async = AsyncMock(return_value=transformed_result.model_copy( - update={"request_id": "98-765-432"})) + ds2.as_files_async = AsyncMock( + return_value=transformed_result.model_copy(update={"request_id": "98-765-432"}) + ) group = DatasetGroup([ds1, ds2]) results = await group.as_files_async() diff --git a/tests/test_dataset_identifier.py b/tests/test_dataset_identifier.py index 5e1f93a2..ceb5fd8c 100644 --- a/tests/test_dataset_identifier.py +++ b/tests/test_dataset_identifier.py @@ -25,8 +25,11 @@ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from servicex.dataset_identifier import DataSetIdentifier, RucioDatasetIdentifier, \ - FileListDataset +from servicex.dataset_identifier import ( + DataSetIdentifier, + RucioDatasetIdentifier, + FileListDataset, +) import pytest diff --git a/tests/test_default_endpoint.py b/tests/test_default_endpoint.py index 8d1b18cb..7e57e33c 100644 --- a/tests/test_default_endpoint.py +++ b/tests/test_default_endpoint.py @@ -4,14 +4,18 @@ def test_default_endpoint(codegen_list): - with patch('servicex.servicex_client.ServiceXClient.get_code_generators', - return_value=codegen_list): + with patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ): sx = ServiceXClient(config_path="tests/example_config.yaml") assert sx.servicex.url == "http://localhost:5000" def test_first_endpoint(codegen_list): - with patch('servicex.servicex_client.ServiceXClient.get_code_generators', - return_value=codegen_list): + with patch( + "servicex.servicex_client.ServiceXClient.get_code_generators", + return_value=codegen_list, + ): sx = ServiceXClient(config_path="tests/example_config_default_endpoint.yaml") assert sx.servicex.url == "https://servicex.af.uchicago.edu" diff --git a/tests/test_expandable_progress.py b/tests/test_expandable_progress.py index 9de4c50d..7f75bb8f 100644 --- a/tests/test_expandable_progress.py +++ b/tests/test_expandable_progress.py @@ -31,8 +31,10 @@ from rich.progress import TextColumn, BarColumn, MofNCompleteColumn, TimeRemainingColumn -@patch("servicex.expandable_progress.TranformStatusProgress", - return_value=MagicMock(TranformStatusProgress)) +@patch( + "servicex.expandable_progress.TranformStatusProgress", + return_value=MagicMock(TranformStatusProgress), +) def test_progress(mock_progress): with ExpandableProgress() as progress: assert progress.progress == mock_progress.return_value @@ -41,8 +43,10 @@ def test_progress(mock_progress): assert mock_progress.return_value.stop.call_count == 1 -@patch("servicex.expandable_progress.TranformStatusProgress", - return_value=MagicMock(TranformStatusProgress)) +@patch( + "servicex.expandable_progress.TranformStatusProgress", + return_value=MagicMock(TranformStatusProgress), +) def test_overall_progress(mock_progress): with ExpandableProgress(overall_progress=True) as progress: assert progress.progress == mock_progress.return_value @@ -51,8 +55,10 @@ def test_overall_progress(mock_progress): assert mock_progress.return_value.stop.call_count == 1 -@patch("servicex.expandable_progress.TranformStatusProgress", - return_value=MagicMock(TranformStatusProgress)) +@patch( + "servicex.expandable_progress.TranformStatusProgress", + return_value=MagicMock(TranformStatusProgress), +) def test_overall_progress_mock(mock_progress): with ExpandableProgress(overall_progress=True) as progress: assert progress.progress == mock_progress.return_value @@ -61,8 +67,10 @@ def test_overall_progress_mock(mock_progress): assert mock_progress.return_value.stop.call_count == 1 -@patch("servicex.expandable_progress.TranformStatusProgress", - return_value=MagicMock(TranformStatusProgress)) +@patch( + "servicex.expandable_progress.TranformStatusProgress", + return_value=MagicMock(TranformStatusProgress), +) def test_refresh_mock(mock_progress): with ExpandableProgress(overall_progress=True) as progress: progress.refresh() @@ -92,8 +100,10 @@ def stop(self) -> None: assert provided_progress.stop.call_count == 0 -@patch("servicex.expandable_progress.TranformStatusProgress", - return_value=MagicMock(TranformStatusProgress)) +@patch( + "servicex.expandable_progress.TranformStatusProgress", + return_value=MagicMock(TranformStatusProgress), +) def test_no_progress(mock_progress): with ExpandableProgress(display_progress=False) as progress: assert not progress.progress @@ -117,31 +127,29 @@ def test_nested_expandable_progress(): def test_get_renderables_without_failure(mock_make_tasks_table): progress = TranformStatusProgress( TextColumn("[progress.description]{task.description}"), - BarColumn(complete_style="rgb(114,156,31)", - finished_style="rgb(0,255,0)"), + BarColumn(complete_style="rgb(114,156,31)", finished_style="rgb(0,255,0)"), MofNCompleteColumn(), - TimeRemainingColumn(compact=True, elapsed_when_finished=True) + TimeRemainingColumn(compact=True, elapsed_when_finished=True), ) progress.add_task("test_without_failure") list(progress.get_renderables()) mock_make_tasks_table.assert_called() mock_make_tasks_table.assert_called_with(progress.tasks) - assert progress.columns[1].complete_style == 'rgb(114,156,31)' + assert progress.columns[1].complete_style == "rgb(114,156,31)" def test_get_renderables_with_failure(): progress = TranformStatusProgress( TextColumn("[progress.description]{task.description}"), - BarColumn(complete_style="rgb(114,156,31)", - finished_style="rgb(0,255,0)"), + BarColumn(complete_style="rgb(114,156,31)", finished_style="rgb(0,255,0)"), MofNCompleteColumn(), - TimeRemainingColumn(compact=True, elapsed_when_finished=True) + TimeRemainingColumn(compact=True, elapsed_when_finished=True), ) progress.add_task("test_with_failure", bar="failure") list(progress.get_renderables()) assert len(progress.columns) == 4 assert isinstance(progress.columns[1], BarColumn) - assert progress.columns[1].complete_style == 'rgb(255,0,0)' + assert progress.columns[1].complete_style == "rgb(255,0,0)" def test_progress_advance(): diff --git a/tests/test_func_adl_dataset.py b/tests/test_func_adl_dataset.py index 5bc6e98c..a4239ab8 100644 --- a/tests/test_func_adl_dataset.py +++ b/tests/test_func_adl_dataset.py @@ -46,24 +46,26 @@ def test_requires_tree(): def test_a_query(): query = FuncADLQuery_Uproot() - query = query.FromTree("nominal") \ - .Select(lambda e: {"lep_pt": e["lep_pt"]}) + query = query.FromTree("nominal").Select(lambda e: {"lep_pt": e["lep_pt"]}) - assert (query.generate_selection_string() - == "(call Select (call EventDataset 'bogus.root' 'nominal') " - "(lambda (list e) (dict (list 'lep_pt') " - "(list (subscript e 'lep_pt')))))" - ) + assert ( + query.generate_selection_string() + == "(call Select (call EventDataset 'bogus.root' 'nominal') " + "(lambda (list e) (dict (list 'lep_pt') " + "(list (subscript e 'lep_pt')))))" + ) def test_set_query(): - qastle = "(call Select (call EventDataset 'bogus.root' 'nominal') " \ - "(lambda (list e) (dict (list 'lep_pt') " \ - "(list (subscript e 'lep_pt')))))" + qastle = ( + "(call Select (call EventDataset 'bogus.root' 'nominal') " + "(lambda (list e) (dict (list 'lep_pt') " + "(list (subscript e 'lep_pt')))))" + ) query = FuncADLQuery_Uproot() query.set_provided_qastle(qastle) - assert (query.generate_selection_string() == qastle) + assert query.generate_selection_string() == qastle def test_type(): @@ -75,9 +77,7 @@ class my_type_info: def fork_it_over(self) -> int: ... - datasource = FuncADLQuery[my_type_info]( - item_type=my_type_info - ) + datasource = FuncADLQuery[my_type_info](item_type=my_type_info) assert datasource.item_type == my_type_info diff --git a/tests/test_guardlist.py b/tests/test_guardlist.py index e945513f..6bc5dc8a 100644 --- a/tests/test_guardlist.py +++ b/tests/test_guardlist.py @@ -4,10 +4,10 @@ def test_guardlist(): gl1 = GuardList([1]) - assert str(gl1) == '[1]' + assert str(gl1) == "[1]" assert gl1[0] == 1 gl2 = GuardList(ValueError()) - assert str(gl2) == 'Invalid GuardList: ValueError()' + assert str(gl2) == "Invalid GuardList: ValueError()" with pytest.raises(ReturnValueException): gl2[0] with pytest.raises(ReturnValueException): diff --git a/tests/test_output_handler.py b/tests/test_output_handler.py index eed864dd..479644f3 100644 --- a/tests/test_output_handler.py +++ b/tests/test_output_handler.py @@ -9,7 +9,7 @@ def test_output_directory(tmp_path): "General": { "Codegen": "python", "Delivery": "LocalCache", - "OutputDirectory": str(tmp_path) + "OutputDirectory": str(tmp_path), }, "Sample": [ {"Name": "sampleA", "RucioDID": "user.kchoi:sampleA", "Query": "a"}, diff --git a/tests/test_python_dataset.py b/tests/test_python_dataset.py index 05181514..47d5fd37 100644 --- a/tests/test_python_dataset.py +++ b/tests/test_python_dataset.py @@ -60,7 +60,9 @@ def run_query(input_filenames=None): print("Greetings from your query") return [] """ - selection = datasource.with_uproot_function(string_function).generate_selection_string() + selection = datasource.with_uproot_function( + string_function + ).generate_selection_string() print(selection) print("==============") print(b64decode(selection)) diff --git a/tests/test_query_cache.py b/tests/test_query_cache.py index 95e3d70d..84d8c7d7 100644 --- a/tests/test_query_cache.py +++ b/tests/test_query_cache.py @@ -126,7 +126,8 @@ def test_cache_transform(transform_request, completed_status): data_dir="/foo/baz", file_list=file_uris, signed_urls=[], - ).model_dump_json()) + ).model_dump_json() + ) record["hash"] = transform_request.compute_hash() record["status"] = "COMPLETE" cache.db.insert(record) @@ -198,10 +199,10 @@ def test_update_codegen_by_backend_single(): config = Configuration(cache_path=temp_dir, api_endpoints=[]) # type: ignore cache = QueryCache(config) codegens = Query() - cache.update_codegen_by_backend('backend_1', ['codegen_1']) - result = cache.db.search(codegens.backend == 'backend_1') + cache.update_codegen_by_backend("backend_1", ["codegen_1"]) + result = cache.db.search(codegens.backend == "backend_1") assert len(result) == 1 - assert result[0] == {'backend': 'backend_1', 'codegens': ['codegen_1']} + assert result[0] == {"backend": "backend_1", "codegens": ["codegen_1"]} cache.close() @@ -209,9 +210,9 @@ def test_get_codegen_by_backend_single(): with tempfile.TemporaryDirectory() as temp_dir: config = Configuration(cache_path=temp_dir, api_endpoints=[]) # type: ignore cache = QueryCache(config) - cache.update_codegen_by_backend('backend_1', ['codegen_1']) + cache.update_codegen_by_backend("backend_1", ["codegen_1"]) result = cache.get_codegen_by_backend("backend_1") - assert result == {'backend': 'backend_1', 'codegens': ['codegen_1']} + assert result == {"backend": "backend_1", "codegens": ["codegen_1"]} cache.close() @@ -219,11 +220,11 @@ def test_delete_codegen_by_backend(): with tempfile.TemporaryDirectory() as temp_dir: config = Configuration(cache_path=temp_dir, api_endpoints=[]) # type: ignore cache = QueryCache(config) - cache.update_codegen_by_backend('backend_1', ['codegen_1']) + cache.update_codegen_by_backend("backend_1", ["codegen_1"]) result = cache.get_codegen_by_backend("backend_1") - assert result == {'backend': 'backend_1', 'codegens': ['codegen_1']} + assert result == {"backend": "backend_1", "codegens": ["codegen_1"]} - cache.delete_codegen_by_backend('backend_1') + cache.delete_codegen_by_backend("backend_1") result = cache.get_codegen_by_backend("backend_1") assert result is None cache.close() @@ -233,7 +234,7 @@ def test_delete_codegen_by_backend_nonexistent(): with tempfile.TemporaryDirectory() as temp_dir: config = Configuration(cache_path=temp_dir, api_endpoints=[]) # type: ignore cache = QueryCache(config) - cache.delete_codegen_by_backend('backend_1') + cache.delete_codegen_by_backend("backend_1") with pytest.raises(Exception): assert False cache.close() @@ -253,9 +254,9 @@ def test_add_both_codegen_and_transform_to_cache(transform_request, completed_st ) ) - cache.update_codegen_by_backend('backend_1', ['codegen_1']) + cache.update_codegen_by_backend("backend_1", ["codegen_1"]) result = cache.get_codegen_by_backend("backend_1") - assert result == {'backend': 'backend_1', 'codegens': ['codegen_1']} + assert result == {"backend": "backend_1", "codegens": ["codegen_1"]} assert len(cache.cached_queries()) == 1 cache.close() @@ -319,7 +320,7 @@ def test_get_transform_request_id(transform_request, completed_status): print(request_id) # update the transform request with a request id and then check for the request id - cache.update_transform_status(hash_value, 'SUBMITTED') + cache.update_transform_status(hash_value, "SUBMITTED") cache.update_transform_request_id(hash_value, "123456") request_id = cache.get_transform_request_id(hash_value) assert request_id == "123456" diff --git a/tests/test_servicex_adapter.py b/tests/test_servicex_adapter.py index d2187918..99d8f5c0 100644 --- a/tests/test_servicex_adapter.py +++ b/tests/test_servicex_adapter.py @@ -57,23 +57,31 @@ def test_result_formats(): @pytest.mark.asyncio @patch("servicex.servicex_adapter.RetryClient.get") async def test_get_transforms(mock_get, servicex, transform_status_response): - mock_get.return_value.__aenter__.return_value.json.return_value = transform_status_response + mock_get.return_value.__aenter__.return_value.json.return_value = ( + transform_status_response + ) mock_get.return_value.__aenter__.return_value.status = 200 t = await servicex.get_transforms() assert len(t) == 1 assert t[0].request_id == "b8c508d0-ccf2-4deb-a1f7-65c839eebabf" - mock_get.assert_called_with(url='https://servicex.org/servicex/transformation', headers={}) + mock_get.assert_called_with( + url="https://servicex.org/servicex/transformation", headers={} + ) @pytest.mark.asyncio @patch("servicex.servicex_adapter.RetryClient.get") async def test_get_transforms_error(mock_get, servicex, transform_status_response): - mock_get.return_value.__aenter__.return_value.json.return_value = {'message': 'error_message'} + mock_get.return_value.__aenter__.return_value.json.return_value = { + "message": "error_message" + } mock_get.return_value.__aenter__.return_value.status = 500 with pytest.raises(RuntimeError) as err: await servicex.get_transforms() - assert "ServiceX WebAPI Error during transformation submission: 500 - error_message" \ - == str(err.value) + assert ( + "ServiceX WebAPI Error during transformation submission: 500 - error_message" + == str(err.value) + ) @pytest.mark.asyncio @@ -86,58 +94,68 @@ async def test_get_transforms_auth_error(mock_get, servicex): @pytest.mark.asyncio -@patch('servicex.servicex_adapter.jwt.decode') -async def test_get_transforms_wlcg_bearer_token(decode, - servicex, - transform_status_response): +@patch("servicex.servicex_adapter.jwt.decode") +async def test_get_transforms_wlcg_bearer_token( + decode, servicex, transform_status_response +): token_file = tempfile.NamedTemporaryFile(mode="w+t", delete=False) - token_file.write("""" + token_file.write( + """" eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c - """) + """ + ) token_file.close() - os.environ['BEARER_TOKEN_FILE'] = token_file.name + os.environ["BEARER_TOKEN_FILE"] = token_file.name # Try with an expired token with pytest.raises(AuthorizationError) as err: - decode.return_value = {'exp': 0.0} + decode.return_value = {"exp": 0.0} await servicex.get_transforms() assert "ServiceX access token request rejected:" in str(err.value) os.remove(token_file.name) - del os.environ['BEARER_TOKEN_FILE'] + del os.environ["BEARER_TOKEN_FILE"] @pytest.mark.asyncio -@patch('servicex.servicex_adapter.RetryClient.post') -@patch('servicex.servicex_adapter.RetryClient.get') +@patch("servicex.servicex_adapter.RetryClient.post") +@patch("servicex.servicex_adapter.RetryClient.get") async def test_get_transforms_with_refresh(get, post, transform_status_response): servicex = ServiceXAdapter(url="https://servicex.org", refresh_token="refrescas") - post.return_value.__aenter__.return_value.json.return_value = {"access_token": "luckycharms"} + post.return_value.__aenter__.return_value.json.return_value = { + "access_token": "luckycharms" + } post.return_value.__aenter__.return_value.status = 200 - get.return_value.__aenter__.return_value.json.return_value = transform_status_response + get.return_value.__aenter__.return_value.json.return_value = ( + transform_status_response + ) get.return_value.__aenter__.return_value.status = 200 await servicex.get_transforms() - post.assert_called_with('https://servicex.org/token/refresh', - headers={'Authorization': 'Bearer refrescas'}, json=None) + post.assert_called_with( + "https://servicex.org/token/refresh", + headers={"Authorization": "Bearer refrescas"}, + json=None, + ) - get.assert_called_with(url='https://servicex.org/servicex/transformation', - headers={'Authorization': 'Bearer luckycharms'}) + get.assert_called_with( + url="https://servicex.org/servicex/transformation", + headers={"Authorization": "Bearer luckycharms"}, + ) -@patch('servicex.servicex_adapter.httpx.Client.get') +@patch("servicex.servicex_adapter.httpx.Client.get") def test_get_codegens(get, servicex): - get.return_value = httpx.Response(200, json={ - "uproot": "http://uproot-codegen", - "xaod": "http://xaod-codegen" - }) + get.return_value = httpx.Response( + 200, json={"uproot": "http://uproot-codegen", "xaod": "http://xaod-codegen"} + ) c = servicex.get_code_generators() assert len(c) == 2 assert c["uproot"] == "http://uproot-codegen" -@patch('servicex.servicex_adapter.httpx.Client.get') +@patch("servicex.servicex_adapter.httpx.Client.get") def test_get_codegens_error(get, servicex): get.return_value = httpx.Response(403) with pytest.raises(AuthorizationError) as err: @@ -164,13 +182,14 @@ def dataset(): "adler32": "62c594d4", "file_size": 34831129, "file_events": 0, - "paths": "https://xenia.nevis.columbia.edu:1094/atlas/dq2/rucio/user/mtost/06/a1/user.mtost.40294033._000002.less_jet_and_new_GN.root" # NOQA: E501 - }] + "paths": "https://xenia.nevis.columbia.edu:1094/atlas/dq2/rucio/user/mtost/06/a1/user.mtost.40294033._000002.less_jet_and_new_GN.root", # NOQA: E501 + } + ], } @pytest.mark.asyncio -@patch('servicex.servicex_adapter.ClientSession.get') +@patch("servicex.servicex_adapter.ClientSession.get") async def test_get_datasets(get, servicex, dataset): get.return_value.__aenter__.return_value.json.return_value = {"datasets": [dataset]} get.return_value.__aenter__.return_value.status = 200 @@ -179,14 +198,12 @@ async def test_get_datasets(get, servicex, dataset): assert len(c) == 1 assert c[0].id == 123 get.assert_called_with( - url='https://servicex.org/servicex/datasets', - params={}, - headers={} + url="https://servicex.org/servicex/datasets", params={}, headers={} ) @pytest.mark.asyncio -@patch('servicex.servicex_adapter.ClientSession.get') +@patch("servicex.servicex_adapter.ClientSession.get") async def test_get_datasets_show_deleted(get, servicex, dataset): get.return_value.__aenter__.return_value.json.return_value = {"datasets": [dataset]} get.return_value.__aenter__.return_value.status = 200 @@ -194,14 +211,14 @@ async def test_get_datasets_show_deleted(get, servicex, dataset): assert len(c) == 1 assert c[0].id == 123 get.assert_called_with( - url='https://servicex.org/servicex/datasets', - params={'show-deleted': True}, - headers={} + url="https://servicex.org/servicex/datasets", + params={"show-deleted": True}, + headers={}, ) @pytest.mark.asyncio -@patch('servicex.servicex_adapter.ClientSession.get') +@patch("servicex.servicex_adapter.ClientSession.get") async def test_get_datasets_auth_error(get, servicex): get.return_value.__aenter__.return_value.status = 403 with pytest.raises(AuthorizationError) as err: @@ -210,7 +227,7 @@ async def test_get_datasets_auth_error(get, servicex): @pytest.mark.asyncio -@patch('servicex.servicex_adapter.ClientSession.get') +@patch("servicex.servicex_adapter.ClientSession.get") async def test_get_dataset(get, servicex, dataset): get.return_value.__aenter__.return_value.json.return_value = dataset get.return_value.__aenter__.return_value.status = 200 @@ -220,7 +237,7 @@ async def test_get_dataset(get, servicex, dataset): @pytest.mark.asyncio -@patch('servicex.servicex_adapter.ClientSession.get') +@patch("servicex.servicex_adapter.ClientSession.get") async def test_get_dataset_errors(get, servicex, dataset): get.return_value.__aenter__.return_value.status = 403 with pytest.raises(AuthorizationError) as err: @@ -232,8 +249,10 @@ async def test_get_dataset_errors(get, servicex, dataset): await servicex.get_dataset(123) assert "Dataset 123 not found" in str(err.value) - get.return_value.__aenter__.return_value.json.side_effect = ContentTypeError(None, None) - get.return_value.__aenter__.return_value.text.return_value = 'error_message' + get.return_value.__aenter__.return_value.json.side_effect = ContentTypeError( + None, None + ) + get.return_value.__aenter__.return_value.text.return_value = "error_message" get.return_value.__aenter__.return_value.status = 500 with pytest.raises(RuntimeError) as err: await servicex.get_dataset(123) @@ -241,24 +260,23 @@ async def test_get_dataset_errors(get, servicex, dataset): @pytest.mark.asyncio -@patch('servicex.servicex_adapter.ClientSession.delete') +@patch("servicex.servicex_adapter.ClientSession.delete") async def test_delete_dataset(delete, servicex): delete.return_value.__aenter__.return_value.json.return_value = { - 'dataset-id': 123, - 'stale': True + "dataset-id": 123, + "stale": True, } delete.return_value.__aenter__.return_value.status = 200 r = await servicex.delete_dataset(123) delete.assert_called_with( - url='https://servicex.org/servicex/datasets/123', - headers={} + url="https://servicex.org/servicex/datasets/123", headers={} ) assert r @pytest.mark.asyncio -@patch('servicex.servicex_adapter.ClientSession.delete') +@patch("servicex.servicex_adapter.ClientSession.delete") async def test_delete_dataset_errors(delete, servicex): delete.return_value.__aenter__.return_value.status = 403 with pytest.raises(AuthorizationError) as err: @@ -270,8 +288,10 @@ async def test_delete_dataset_errors(delete, servicex): await servicex.delete_dataset(123) assert "Dataset 123 not found" in str(err.value) - delete.return_value.__aenter__.return_value.json.side_effect = ContentTypeError(None, None) - delete.return_value.__aenter__.return_value.text.return_value = 'error_message' + delete.return_value.__aenter__.return_value.json.side_effect = ContentTypeError( + None, None + ) + delete.return_value.__aenter__.return_value.text.return_value = "error_message" delete.return_value.__aenter__.return_value.status = 500 with pytest.raises(RuntimeError) as err: await servicex.delete_dataset(123) @@ -279,18 +299,17 @@ async def test_delete_dataset_errors(delete, servicex): @pytest.mark.asyncio -@patch('servicex.servicex_adapter.ClientSession.delete') +@patch("servicex.servicex_adapter.ClientSession.delete") async def test_delete_transform(delete, servicex): delete.return_value.__aenter__.return_value.status = 200 await servicex.delete_transform("123-45-6789") delete.assert_called_with( - url='https://servicex.org/servicex/transformation/123-45-6789', - headers={} + url="https://servicex.org/servicex/transformation/123-45-6789", headers={} ) @pytest.mark.asyncio -@patch('servicex.servicex_adapter.ClientSession.delete') +@patch("servicex.servicex_adapter.ClientSession.delete") async def test_delete_transform_errors(delete, servicex): delete.return_value.__aenter__.return_value.status = 403 with pytest.raises(AuthorizationError) as err: @@ -302,8 +321,10 @@ async def test_delete_transform_errors(delete, servicex): await servicex.delete_transform("123-45-6789") assert "Transform 123-45-6789 not found" in str(err.value) - delete.return_value.__aenter__.return_value.json.side_effect = ContentTypeError(None, None) - delete.return_value.__aenter__.return_value.text.return_value = 'error_message' + delete.return_value.__aenter__.return_value.json.side_effect = ContentTypeError( + None, None + ) + delete.return_value.__aenter__.return_value.text.return_value = "error_message" delete.return_value.__aenter__.return_value.status = 500 with pytest.raises(RuntimeError) as err: await servicex.delete_transform("123-45-6789") @@ -311,7 +332,7 @@ async def test_delete_transform_errors(delete, servicex): @pytest.mark.asyncio -@patch('servicex.servicex_adapter.ClientSession.get') +@patch("servicex.servicex_adapter.ClientSession.get") async def test_cancel_transform(get, servicex): get.return_value.__aenter__.return_value.json.return_value = { "message": "Canceled transformation request 123" @@ -320,13 +341,12 @@ async def test_cancel_transform(get, servicex): await servicex.cancel_transform(123) get.assert_called_with( - url='https://servicex.org/servicex/transformation/123/cancel', - headers={} + url="https://servicex.org/servicex/transformation/123/cancel", headers={} ) @pytest.mark.asyncio -@patch('servicex.servicex_adapter.ClientSession.get') +@patch("servicex.servicex_adapter.ClientSession.get") async def test_cancel_transform_errors(get, servicex): get.return_value.__aenter__.return_value.status = 403 with pytest.raises(AuthorizationError) as err: @@ -338,8 +358,10 @@ async def test_cancel_transform_errors(get, servicex): await servicex.cancel_transform(123) assert "Transform 123 not found" in str(err.value) - get.return_value.__aenter__.return_value.json.side_effect = ContentTypeError(None, None) - get.return_value.__aenter__.return_value.text.return_value = 'error_message' + get.return_value.__aenter__.return_value.json.side_effect = ContentTypeError( + None, None + ) + get.return_value.__aenter__.return_value.text.return_value = "error_message" get.return_value.__aenter__.return_value.status = 500 with pytest.raises(RuntimeError) as err: await servicex.cancel_transform(123) @@ -347,9 +369,11 @@ async def test_cancel_transform_errors(get, servicex): @pytest.mark.asyncio -@patch('servicex.servicex_adapter.RetryClient.post') +@patch("servicex.servicex_adapter.RetryClient.post") async def test_submit(post, servicex): - post.return_value.__aenter__.return_value.json.return_value = {"request_id": "123-456-789"} + post.return_value.__aenter__.return_value.json.return_value = { + "request_id": "123-456-789" + } post.return_value.__aenter__.return_value.status = 200 request = TransformRequest( title="Test submission", @@ -357,14 +381,14 @@ async def test_submit(post, servicex): selection="(call EventDataset)", codegen="uproot", result_destination=ResultDestination.object_store, - result_format=ResultFormat.parquet + result_format=ResultFormat.parquet, ) result = await servicex.submit_transform(request) assert result == "123-456-789" @pytest.mark.asyncio -@patch('servicex.servicex_adapter.RetryClient.post') +@patch("servicex.servicex_adapter.RetryClient.post") async def test_submit_errors(post, servicex): post.return_value.__aenter__.return_value.status = 401 request = TransformRequest( @@ -373,46 +397,58 @@ async def test_submit_errors(post, servicex): selection="(call EventDataset)", codegen="uproot", result_destination=ResultDestination.object_store, - result_format=ResultFormat.parquet + result_format=ResultFormat.parquet, ) with pytest.raises(AuthorizationError) as err: await servicex.submit_transform(request) assert "Not authorized to access serviceX at" in str(err.value) - post.return_value.__aenter__.return_value.json.side_effect = ContentTypeError(None, None) - post.return_value.__aenter__.return_value.text.return_value = 'error_message' + post.return_value.__aenter__.return_value.json.side_effect = ContentTypeError( + None, None + ) + post.return_value.__aenter__.return_value.text.return_value = "error_message" post.return_value.__aenter__.return_value.status = 500 with pytest.raises(RuntimeError) as err: await servicex.submit_transform(request) - assert "ServiceX WebAPI Error during transformation submission: 500 - error_message" \ - == str(err.value) + assert ( + "ServiceX WebAPI Error during transformation submission: 500 - error_message" + == str(err.value) + ) post.return_value.__aenter__.return_value.json.reset_mock() - post.return_value.__aenter__.return_value.json.return_value = {"message": "error_message"} + post.return_value.__aenter__.return_value.json.return_value = { + "message": "error_message" + } post.return_value.__aenter__.return_value.status = 400 with pytest.raises(ValueError) as err: await servicex.submit_transform(request) assert "Invalid transform request: error_message" == str(err.value) - post.return_value.__aenter__.return_value.json.return_value = {"message": "error_message"} + post.return_value.__aenter__.return_value.json.return_value = { + "message": "error_message" + } post.return_value.__aenter__.return_value.status = 410 with pytest.raises(RuntimeError) as err: await servicex.submit_transform(request) - assert "ServiceX WebAPI Error during transformation submission: 410 - error_message" \ - == str(err.value) + assert ( + "ServiceX WebAPI Error during transformation submission: 410 - error_message" + == str(err.value) + ) @pytest.mark.asyncio -@patch('servicex.servicex_adapter.RetryClient.get') +@patch("servicex.servicex_adapter.RetryClient.get") async def test_get_transform_status(get, servicex, transform_status_response): - get.return_value.__aenter__.return_value.json.return_value = transform_status_response['requests'][0] # NOQA: E501 + get.return_value.__aenter__.return_value.json.return_value = ( + transform_status_response["requests"][0] + ) # NOQA: E501 get.return_value.__aenter__.return_value.status = 200 result = await servicex.get_transform_status("b8c508d0-ccf2-4deb-a1f7-65c839eebabf") assert result.request_id == "b8c508d0-ccf2-4deb-a1f7-65c839eebabf" @pytest.mark.asyncio -@patch('servicex.servicex_adapter.RetryClient.get') +@patch("servicex.servicex_adapter.RetryClient.get") async def test_get_transform_status_errors(get, servicex): with pytest.raises(AuthorizationError) as err: get.return_value.__aenter__.return_value.status = 401 @@ -422,27 +458,31 @@ async def test_get_transform_status_errors(get, servicex): with pytest.raises(ValueError) as err: get.return_value.__aenter__.return_value.status = 404 await servicex.get_transform_status("b8c508d0-ccf2-4deb-a1f7-65c839eebabf") - assert "Transform ID b8c508d0-ccf2-4deb-a1f7-65c839eebabf not found" == str(err.value) + assert "Transform ID b8c508d0-ccf2-4deb-a1f7-65c839eebabf not found" == str( + err.value + ) with pytest.raises(RuntimeError) as err: get.return_value.__aenter__.return_value.status = 500 async def patch_json(): - return {'message': 'fifteen'} + return {"message": "fifteen"} + get.return_value.__aenter__.return_value.json = patch_json await servicex.get_transform_status("b8c508d0-ccf2-4deb-a1f7-65c839eebabf") assert "ServiceX WebAPI Error during transformation" in str(err.value) @pytest.mark.asyncio -@patch('servicex.servicex_adapter.TransformStatus', side_effect=RuntimeError) -@patch('servicex.servicex_adapter.RetryClient.get') -async def test_get_tranform_status_retry_error(get, - mock_transform_status, - servicex, - transform_status_response): +@patch("servicex.servicex_adapter.TransformStatus", side_effect=RuntimeError) +@patch("servicex.servicex_adapter.RetryClient.get") +async def test_get_tranform_status_retry_error( + get, mock_transform_status, servicex, transform_status_response +): with pytest.raises(RuntimeError) as err: - get.return_value.__aenter__.return_value.json.return_value = transform_status_response['requests'][0] # NOQA: E501 + get.return_value.__aenter__.return_value.json.return_value = ( + transform_status_response["requests"][0] + ) # NOQA: E501 get.return_value.__aenter__.return_value.status = 200 await servicex.get_transform_status("b8c508d0-ccf2-4deb-a1f7-65c839eebabf") assert "ServiceX WebAPI Error while getting transform status:" in str(err.value) @@ -452,12 +492,13 @@ async def test_get_tranform_status_retry_error(get, async def test_get_authorization(servicex): servicex.token = "token" servicex.refresh_token = "refresh" - with patch('google.auth.jwt.decode', return_value={'exp': time.time() + 90}): + with patch("google.auth.jwt.decode", return_value={"exp": time.time() + 90}): r = await servicex._get_authorization() assert r.get("Authorization") == "Bearer token" - with patch('servicex.servicex_adapter.ServiceXAdapter._get_token', return_value='token')\ - as get_token: - with patch('google.auth.jwt.decode', return_value={'exp': time.time() - 90}): + with patch( + "servicex.servicex_adapter.ServiceXAdapter._get_token", return_value="token" + ) as get_token: + with patch("google.auth.jwt.decode", return_value={"exp": time.time() - 90}): r = await servicex._get_authorization() get_token.assert_called_once() diff --git a/tests/test_servicex_app_transforms.py b/tests/test_servicex_app_transforms.py index ef92a029..00c587d1 100644 --- a/tests/test_servicex_app_transforms.py +++ b/tests/test_servicex_app_transforms.py @@ -1,5 +1,9 @@ from servicex.app.transforms import LogLevel, TimeFrame -from servicex.app.transforms import add_query, select_time, create_kibana_link_parameters +from servicex.app.transforms import ( + add_query, + select_time, + create_kibana_link_parameters, +) def test_add_query(): @@ -29,44 +33,64 @@ def test_select_time(): def test_create_kibana_link_parameters(): - initial_log_url = "https://atlas-kibana.mwt2.org:5601/s/servicex/app"\ - "/dashboards?auth_provider_hint=anonymous1#/view/"\ - "2d2b3b40-f34e-11ed-a6d8-9f6a16cd6d78?embed=true&_g=()"\ - "&show-time-filter=true&hide-filter-bar=true" + initial_log_url = ( + "https://atlas-kibana.mwt2.org:5601/s/servicex/app" + "/dashboards?auth_provider_hint=anonymous1#/view/" + "2d2b3b40-f34e-11ed-a6d8-9f6a16cd6d78?embed=true&_g=()" + "&show-time-filter=true&hide-filter-bar=true" + ) transform_id = "d2ede739-9779-4075-95b1-0c7fae1de408" log_level = LogLevel.error time_frame = TimeFrame.day - final_url = "https://atlas-kibana.mwt2.org:5601/s/servicex/app/dashboards?"\ - "auth_provider_hint=anonymous1#/view/2d2b3b40-f34e-11ed-a6d8-9f6a16cd6d78?"\ - "embed=true&_g=(time:(from:now%2Fd,to:now%2Fd))"\ - "&_a=(filters:!((query:(match_phrase:"\ - "(requestId:'d2ede739-9779-4075-95b1-0c7fae1de408'))),"\ - "(query:(match_phrase:(level:'error')))))&show-time-filter=true"\ - "&hide-filter-bar=true" - assert create_kibana_link_parameters(initial_log_url, transform_id, - log_level, time_frame) == final_url + final_url = ( + "https://atlas-kibana.mwt2.org:5601/s/servicex/app/dashboards?" + "auth_provider_hint=anonymous1#/view/2d2b3b40-f34e-11ed-a6d8-9f6a16cd6d78?" + "embed=true&_g=(time:(from:now%2Fd,to:now%2Fd))" + "&_a=(filters:!((query:(match_phrase:" + "(requestId:'d2ede739-9779-4075-95b1-0c7fae1de408')))," + "(query:(match_phrase:(level:'error')))))&show-time-filter=true" + "&hide-filter-bar=true" + ) + assert ( + create_kibana_link_parameters( + initial_log_url, transform_id, log_level, time_frame + ) + == final_url + ) transform_id = "93713b34-2f0b-4d53-8412-8afa98626516" log_level = LogLevel.info time_frame = TimeFrame.month - final_url = "https://atlas-kibana.mwt2.org:5601/s/servicex/app/dashboards?"\ - "auth_provider_hint=anonymous1#/view/2d2b3b40-f34e-11ed-a6d8-9f6a16cd6d78?"\ - "embed=true&_g=(time:(from:now-30d%2Fd,to:now))"\ - "&_a=(filters:!((query:(match_phrase:"\ - "(requestId:'93713b34-2f0b-4d53-8412-8afa98626516'))),"\ - "(query:(match_phrase:(level:'info')))))&show-time-filter=true"\ - "&hide-filter-bar=true" - assert create_kibana_link_parameters(initial_log_url, transform_id, - log_level, time_frame) == final_url + final_url = ( + "https://atlas-kibana.mwt2.org:5601/s/servicex/app/dashboards?" + "auth_provider_hint=anonymous1#/view/2d2b3b40-f34e-11ed-a6d8-9f6a16cd6d78?" + "embed=true&_g=(time:(from:now-30d%2Fd,to:now))" + "&_a=(filters:!((query:(match_phrase:" + "(requestId:'93713b34-2f0b-4d53-8412-8afa98626516')))," + "(query:(match_phrase:(level:'info')))))&show-time-filter=true" + "&hide-filter-bar=true" + ) + assert ( + create_kibana_link_parameters( + initial_log_url, transform_id, log_level, time_frame + ) + == final_url + ) transform_id = "93713b34-2f0b-4d53-8412-8afa98626516" log_level = None time_frame = TimeFrame.month - final_url = "https://atlas-kibana.mwt2.org:5601/s/servicex/app/dashboards?"\ - "auth_provider_hint=anonymous1#/view/2d2b3b40-f34e-11ed-a6d8-9f6a16cd6d78?"\ - "embed=true&_g=(time:(from:now-30d%2Fd,to:now))"\ - "&_a=(filters:!((query:(match_phrase:"\ - "(requestId:'93713b34-2f0b-4d53-8412-8afa98626516')))))"\ - "&show-time-filter=true&hide-filter-bar=true" - assert create_kibana_link_parameters(initial_log_url, transform_id, - log_level, time_frame) == final_url + final_url = ( + "https://atlas-kibana.mwt2.org:5601/s/servicex/app/dashboards?" + "auth_provider_hint=anonymous1#/view/2d2b3b40-f34e-11ed-a6d8-9f6a16cd6d78?" + "embed=true&_g=(time:(from:now-30d%2Fd,to:now))" + "&_a=(filters:!((query:(match_phrase:" + "(requestId:'93713b34-2f0b-4d53-8412-8afa98626516')))))" + "&show-time-filter=true&hide-filter-bar=true" + ) + assert ( + create_kibana_link_parameters( + initial_log_url, transform_id, log_level, time_frame + ) + == final_url + ) diff --git a/tests/test_servicex_client.py b/tests/test_servicex_client.py index 7b10b288..a2e3ac8e 100644 --- a/tests/test_servicex_client.py +++ b/tests/test_servicex_client.py @@ -39,7 +39,7 @@ @fixture def servicex_adaptor(mocker): - adapter_mock = mocker.patch('servicex.servicex_client.ServiceXAdapter') + adapter_mock = mocker.patch("servicex.servicex_client.ServiceXAdapter") mock_adapter = MagicMock(spec=ServiceXAdapter) adapter_mock.return_value = mock_adapter @@ -48,13 +48,10 @@ def servicex_adaptor(mocker): @fixture def mock_cache(mocker): - cache_mock = mocker.patch('servicex.servicex_client.QueryCache') + cache_mock = mocker.patch("servicex.servicex_client.QueryCache") mock_cache = MagicMock(spec=QueryCache) mock_cache.get_codegen_by_backend.return_value = { - "codegens": { - "ROOT": "my_root_generator", - "UPROOT": "my_uproot_generator" - } + "codegens": {"ROOT": "my_root_generator", "UPROOT": "my_uproot_generator"} } cache_mock.return_value = mock_cache return cache_mock @@ -108,32 +105,35 @@ def transformed_results() -> TransformedResults: "data_dir": "/tmp/servicex/results", "file_list": [ "/tmp/servicex/results/output1.parquet", - "/tmp/servicex/results/output2.parquet" + "/tmp/servicex/results/output2.parquet", ], "signed_url_list": [ "https://example.com/signed-url-1", - "https://example.com/signed-url-2" + "https://example.com/signed-url-2", ], "files": 2, "result_format": ResultFormat.parquet, - "log_url": "https://logs.servicex.com/request-789" + "log_url": "https://logs.servicex.com/request-789", } return TransformedResults(**base_data) def test_delete_transform_from_cache(mock_cache, servicex_adaptor, transformed_results): - with patch('servicex.servicex_client.QueryCache') as mock_cache: - mock_cache.return_value.get_transform_by_request_id = \ - MagicMock(return_value=transformed_results) - with patch('servicex.servicex_client.shutil.rmtree') as mock_rmtree: + with patch("servicex.servicex_client.QueryCache") as mock_cache: + mock_cache.return_value.get_transform_by_request_id = MagicMock( + return_value=transformed_results + ) + with patch("servicex.servicex_client.shutil.rmtree") as mock_rmtree: sx = ServiceXClient(config_path="tests/example_config.yaml") sx.delete_transform_from_cache("servicex-request-789") - mock_cache.return_value.\ - get_transform_by_request_id.\ - assert_called_once_with("servicex-request-789") - mock_rmtree.assert_called_once_with('/tmp/servicex/results', ignore_errors=True) - mock_cache.return_value.\ - delete_record_by_request_id.\ - assert_called_once_with("servicex-request-789") + mock_cache.return_value.get_transform_by_request_id.assert_called_once_with( + "servicex-request-789" + ) + mock_rmtree.assert_called_once_with( + "/tmp/servicex/results", ignore_errors=True + ) + mock_cache.return_value.delete_record_by_request_id.assert_called_once_with( + "servicex-request-789" + ) diff --git a/tests/test_servicex_dataset.py b/tests/test_servicex_dataset.py index 6acfaf69..628dc2e0 100644 --- a/tests/test_servicex_dataset.py +++ b/tests/test_servicex_dataset.py @@ -36,8 +36,14 @@ from servicex.dataset_identifier import FileListDataset from servicex.expandable_progress import ExpandableProgress from servicex.func_adl.func_adl_dataset import FuncADLQuery_Uproot -from servicex.models import (TransformStatus, Status, ResultFile, ResultFormat, - TransformRequest, TransformedResults) +from servicex.models import ( + TransformStatus, + Status, + ResultFile, + ResultFormat, + TransformRequest, + TransformedResults, +) from servicex.query_cache import QueryCache from servicex.query_core import ServiceXException, Query from servicex.servicex_client import ServiceXClient @@ -169,10 +175,13 @@ def servicex(): file2 = ResultFile(filename="file2", size=100, extension="parquet") -def transformed_results(transform: TransformRequest, - completed_status: TransformStatus, data_dir: str, - file_list: List[str], - signed_urls) -> TransformedResults: +def transformed_results( + transform: TransformRequest, + completed_status: TransformStatus, + data_dir: str, + file_list: List[str], + signed_urls, +) -> TransformedResults: return TransformedResults( hash=transform.compute_hash(), title=transform.title, @@ -184,7 +193,7 @@ def transformed_results(transform: TransformRequest, signed_url_list=signed_urls, files=completed_status.files, result_format=transform.result_format, - log_url=completed_status.log_url + log_url=completed_status.log_url, ) @@ -206,13 +215,15 @@ async def test_submit(mocker): mock_minio = AsyncMock() mock_minio.list_bucket = AsyncMock(side_effect=[[file1], [file1, file2]]) - mock_minio.download_file = AsyncMock(side_effect=lambda a, _, shorten_filename: PurePath(a)) + mock_minio.download_file = AsyncMock( + side_effect=lambda a, _, shorten_filename: PurePath(a) + ) mock_cache = mocker.MagicMock(QueryCache) mock_cache.get_transform_by_hash = mocker.MagicMock(return_value=None) mock_cache.transformed_results = mocker.MagicMock(side_effect=transformed_results) mock_cache.cache_transform = mocker.MagicMock(side_effect=cache_transform) - mock_cache.cache_path_for_transform = mocker.MagicMock(return_value=PurePath('.')) + mock_cache.cache_path_for_transform = mocker.MagicMock(return_value=PurePath(".")) mocker.patch("servicex.minio_adapter.MinioAdapter", return_value=mock_minio) did = FileListDataset("/foo/bar/baz.root") datasource = Query( @@ -226,10 +237,11 @@ async def test_submit(mocker): datasource.query_string_generator = FuncADLQuery_Uproot().FromTree("nominal") with ExpandableProgress(display_progress=False) as progress: datasource.result_format = ResultFormat.parquet - result = await datasource.submit_and_download(signed_urls_only=False, - expandable_progress=progress) + result = await datasource.submit_and_download( + signed_urls_only=False, expandable_progress=progress + ) print(mock_minio.download_file.call_args) - assert result.file_list == ['file1', 'file2'] + assert result.file_list == ["file1", "file2"] mock_cache.cache_transform.assert_called_once() @@ -247,13 +259,15 @@ async def test_submit_partial_success(mocker): mock_minio = AsyncMock() mock_minio.list_bucket = AsyncMock(side_effect=[[file1], [file1]]) - mock_minio.download_file = AsyncMock(side_effect=lambda a, _, shorten_filename: PurePath(a)) + mock_minio.download_file = AsyncMock( + side_effect=lambda a, _, shorten_filename: PurePath(a) + ) mock_cache = mocker.MagicMock(QueryCache) mock_cache.get_transform_by_hash = mocker.MagicMock(return_value=None) mock_cache.transformed_results = mocker.MagicMock(side_effect=transformed_results) mock_cache.cache_transform = mocker.MagicMock(side_effect=cache_transform) - mock_cache.cache_path_for_transform = mocker.MagicMock(return_value=PurePath('.')) + mock_cache.cache_path_for_transform = mocker.MagicMock(return_value=PurePath(".")) mocker.patch("servicex.minio_adapter.MinioAdapter", return_value=mock_minio) did = FileListDataset("/foo/bar/baz.root") datasource = Query( @@ -267,16 +281,17 @@ async def test_submit_partial_success(mocker): datasource.query_string_generator = FuncADLQuery_Uproot().FromTree("nominal") with ExpandableProgress(display_progress=False) as progress: datasource.result_format = ResultFormat.parquet - result = await datasource.submit_and_download(signed_urls_only=False, - expandable_progress=progress) + result = await datasource.submit_and_download( + signed_urls_only=False, expandable_progress=progress + ) print(mock_minio.download_file.call_args) - assert result.file_list == ['file1'] + assert result.file_list == ["file1"] mock_cache.cache_transform.assert_not_called() @pytest.mark.asyncio async def test_use_of_cache(mocker): - """ Do we pick up the cache on the second request for the same transform? """ + """Do we pick up the cache on the second request for the same transform?""" servicex = AsyncMock() servicex.submit_transform = AsyncMock() servicex.submit_transform.return_value = {"request_id": '123-456-789"'} @@ -287,8 +302,10 @@ async def test_use_of_cache(mocker): ] mock_minio = AsyncMock() mock_minio.list_bucket = AsyncMock(return_value=[file1, file2]) - mock_minio.download_file = AsyncMock(side_effect=lambda a, _, shorten_filename: PurePath(a)) - mock_minio.get_signed_url = AsyncMock(side_effect=['http://file1', 'http://file2']) + mock_minio.download_file = AsyncMock( + side_effect=lambda a, _, shorten_filename: PurePath(a) + ) + mock_minio.get_signed_url = AsyncMock(side_effect=["http://file1", "http://file2"]) mocker.patch("servicex.minio_adapter.MinioAdapter", return_value=mock_minio) @@ -306,10 +323,13 @@ async def test_use_of_cache(mocker): ) datasource.query_string_generator = FuncADLQuery_Uproot().FromTree("nominal") datasource.result_format = ResultFormat.parquet - upd = mocker.patch.object(cache, 'update_record', side_effect=cache.update_record) + upd = mocker.patch.object( + cache, "update_record", side_effect=cache.update_record + ) with ExpandableProgress(display_progress=False) as progress: - result1 = await datasource.submit_and_download(signed_urls_only=True, - expandable_progress=progress) + result1 = await datasource.submit_and_download( + signed_urls_only=True, expandable_progress=progress + ) upd.assert_not_called() upd.reset_mock() assert mock_minio.get_signed_url.await_count == 2 @@ -326,10 +346,13 @@ async def test_use_of_cache(mocker): query_cache=cache, config=config, ) - datasource2.query_string_generator = FuncADLQuery_Uproot().FromTree("nominal") + datasource2.query_string_generator = FuncADLQuery_Uproot().FromTree( + "nominal" + ) datasource2.result_format = ResultFormat.parquet - result2 = await datasource2.submit_and_download(signed_urls_only=True, - expandable_progress=progress) + result2 = await datasource2.submit_and_download( + signed_urls_only=True, expandable_progress=progress + ) servicex2.assert_not_awaited() mock_minio.list_bucket.assert_not_awaited() mock_minio.get_signed_url.assert_not_awaited() @@ -341,8 +364,9 @@ async def test_use_of_cache(mocker): mock_minio.list_bucket.reset_mock(side_effect=True) # third round, should hit the cache and download files (and call update_record) with ExpandableProgress(display_progress=False) as progress: - await datasource.submit_and_download(signed_urls_only=False, - expandable_progress=progress) + await datasource.submit_and_download( + signed_urls_only=False, expandable_progress=progress + ) servicex.assert_not_awaited() assert mock_minio.download_file.await_count == 2 upd.assert_called_once() @@ -350,8 +374,9 @@ async def test_use_of_cache(mocker): mock_minio.list_bucket.reset_mock() mock_minio.download_file.reset_mock() with ExpandableProgress(display_progress=False) as progress: - await datasource.submit_and_download(signed_urls_only=False, - expandable_progress=progress) + await datasource.submit_and_download( + signed_urls_only=False, expandable_progress=progress + ) servicex.assert_not_awaited() mock_minio.list_bucket.assert_not_awaited() mock_minio.download_file.assert_not_awaited() @@ -372,12 +397,14 @@ async def test_submit_cancel(mocker): mock_minio = AsyncMock() mock_minio.list_bucket = AsyncMock(side_effect=[[file1], [file1]]) - mock_minio.download_file = AsyncMock(side_effect=lambda a, _, shorten_filename: PurePath(a)) + mock_minio.download_file = AsyncMock( + side_effect=lambda a, _, shorten_filename: PurePath(a) + ) mock_cache = mocker.MagicMock(QueryCache) mock_cache.get_transform_by_hash = mocker.MagicMock(return_value=None) mock_cache.cache_transform = mocker.MagicMock(side_effect=cache_transform) - mock_cache.cache_path_for_transform = mocker.MagicMock(return_value=PurePath('.')) + mock_cache.cache_path_for_transform = mocker.MagicMock(return_value=PurePath(".")) mocker.patch("servicex.minio_adapter.MinioAdapter", return_value=mock_minio) did = FileListDataset("/foo/bar/baz.root") datasource = Query( @@ -392,8 +419,9 @@ async def test_submit_cancel(mocker): with ExpandableProgress(display_progress=False) as progress: datasource.result_format = ResultFormat.parquet with pytest.raises(ServiceXException): - _ = await datasource.submit_and_download(signed_urls_only=False, - expandable_progress=progress) + _ = await datasource.submit_and_download( + signed_urls_only=False, expandable_progress=progress + ) mock_cache.cache_transform.assert_not_called() @@ -410,12 +438,14 @@ async def test_submit_fatal(mocker): mock_minio = AsyncMock() mock_minio.list_bucket = AsyncMock(side_effect=[[file1], [file1]]) - mock_minio.download_file = AsyncMock(side_effect=lambda a, _, shorten_filename: PurePath(a)) + mock_minio.download_file = AsyncMock( + side_effect=lambda a, _, shorten_filename: PurePath(a) + ) mock_cache = mocker.MagicMock(QueryCache) mock_cache.get_transform_by_hash = mocker.MagicMock(return_value=None) mock_cache.cache_transform = mocker.MagicMock(side_effect=cache_transform) - mock_cache.cache_path_for_transform = mocker.MagicMock(return_value=PurePath('.')) + mock_cache.cache_path_for_transform = mocker.MagicMock(return_value=PurePath(".")) mocker.patch("servicex.minio_adapter.MinioAdapter", return_value=mock_minio) did = FileListDataset("/foo/bar/baz.root") datasource = Query( @@ -430,15 +460,17 @@ async def test_submit_fatal(mocker): with ExpandableProgress(display_progress=False) as progress: datasource.result_format = ResultFormat.parquet with pytest.raises(ServiceXException): - _ = await datasource.submit_and_download(signed_urls_only=False, - expandable_progress=progress) + _ = await datasource.submit_and_download( + signed_urls_only=False, expandable_progress=progress + ) mock_cache.cache_transform.assert_not_called() @pytest.mark.asyncio async def test_submit_generic(mocker, codegen_list): - """ Uses Uproot-Raw classes which go through the generic query mechanism """ + """Uses Uproot-Raw classes which go through the generic query mechanism""" import json + sx = AsyncMock() sx.submit_transform = AsyncMock() sx.submit_transform.return_value = {"request_id": '123-456-789"'} @@ -456,44 +488,47 @@ async def test_submit_generic(mocker, codegen_list): mock_cache = mocker.MagicMock(QueryCache) mocker.patch("servicex.minio_adapter.MinioAdapter", return_value=mock_minio) did = FileListDataset("/foo/bar/baz.root") - with patch('servicex.servicex_adapter.ServiceXAdapter.get_code_generators', - return_value=codegen_list): - client = ServiceXClient(backend='servicex-uc-af', config_path='tests/example_config.yaml') + with patch( + "servicex.servicex_adapter.ServiceXAdapter.get_code_generators", + return_value=codegen_list, + ): + client = ServiceXClient( + backend="servicex-uc-af", config_path="tests/example_config.yaml" + ) client.servicex = sx client.query_cache = mock_cache datasource = client.generic_query( - dataset_identifier=did, - query=UprootRawQuery({'treename': 'CollectionTree'}) + dataset_identifier=did, query=UprootRawQuery({"treename": "CollectionTree"}) ) with ExpandableProgress(display_progress=False) as progress: datasource.result_format = ResultFormat.parquet - _ = await datasource.submit_and_download(signed_urls_only=False, - expandable_progress=progress) + _ = await datasource.submit_and_download( + signed_urls_only=False, expandable_progress=progress + ) # same thing but a list argument to UprootRawQuery (UprootRawQuery test...) datasource = client.generic_query( - dataset_identifier=did, - query=UprootRawQuery({'treename': 'CollectionTree'}) + dataset_identifier=did, query=UprootRawQuery({"treename": "CollectionTree"}) ) with ExpandableProgress(display_progress=False) as progress: datasource.result_format = ResultFormat.parquet - _ = await datasource.submit_and_download(signed_urls_only=False, - expandable_progress=progress) + _ = await datasource.submit_and_download( + signed_urls_only=False, expandable_progress=progress + ) assert isinstance(json.loads(datasource.generate_selection_string()), list) @pytest.mark.asyncio async def test_submit_cancelled(mocker, codegen_list): - """ Uses Uproot-Raw classes which go through the query cancelled mechanism """ + """Uses Uproot-Raw classes which go through the query cancelled mechanism""" import json + sx = AsyncMock() sx.submit_transform = AsyncMock() sx.submit_transform.return_value = {"request_id": '123-456-789"'} sx.get_transform_status = AsyncMock() - sx.get_transform_status.side_effect = [ - transform_status4 - ] + sx.get_transform_status.side_effect = [transform_status4] mock_minio = AsyncMock() mock_minio.list_bucket = AsyncMock(side_effect=[[file1], [file1, file2]]) @@ -502,20 +537,24 @@ async def test_submit_cancelled(mocker, codegen_list): mock_cache = mocker.MagicMock(QueryCache) mocker.patch("servicex.minio_adapter.MinioAdapter", return_value=mock_minio) did = FileListDataset("/foo/bar/baz.root") - with patch('servicex.servicex_adapter.ServiceXAdapter.get_code_generators', - return_value=codegen_list): - client = ServiceXClient(backend='servicex-uc-af', config_path='tests/example_config.yaml') + with patch( + "servicex.servicex_adapter.ServiceXAdapter.get_code_generators", + return_value=codegen_list, + ): + client = ServiceXClient( + backend="servicex-uc-af", config_path="tests/example_config.yaml" + ) client.servicex = sx client.query_cache = mock_cache datasource = client.generic_query( - dataset_identifier=did, - query=UprootRawQuery({'treename': 'CollectionTree'}) + dataset_identifier=did, query=UprootRawQuery({"treename": "CollectionTree"}) ) with ExpandableProgress(display_progress=False) as progress: datasource.result_format = ResultFormat.parquet - _ = await datasource.submit_and_download(signed_urls_only=False, - expandable_progress=progress) + _ = await datasource.submit_and_download( + signed_urls_only=False, expandable_progress=progress + ) assert isinstance(json.loads(datasource.generate_selection_string()), list) @@ -533,37 +572,40 @@ def test_transform_request(): query_cache=None, config=Configuration(api_endpoints=[]), ) - datasource.query_string_generator = (FuncADLQuery_Uproot() - .FromTree("nominal") - .Select(lambda e: {"lep_pt": e["lep_pt"]})) + datasource.query_string_generator = ( + FuncADLQuery_Uproot() + .FromTree("nominal") + .Select(lambda e: {"lep_pt": e["lep_pt"]}) + ) - q = ( - datasource.set_result_format(ResultFormat.parquet) - .transform_request + q = datasource.set_result_format(ResultFormat.parquet).transform_request + assert ( + q.selection == "(call Select (call EventDataset 'bogus.root' 'nominal') " + "(lambda (list e) (dict (list 'lep_pt') " + "(list (subscript e 'lep_pt')))))" ) - assert q.selection == "(call Select (call EventDataset 'bogus.root' 'nominal') " \ - "(lambda (list e) (dict (list 'lep_pt') " \ - "(list (subscript e 'lep_pt')))))" cache.close() @pytest.mark.asyncio async def test_use_of_ignore_cache(mocker, servicex): - """ Run a normal request -> run the same request with ignore cache -> run request with cache - After the first request the request is cached - After the second request, transformer runs again by ignoring cache - After the third request, the data is retrieved from the cache + """Run a normal request -> run the same request with ignore cache -> run request with cache + After the first request the request is cached + After the second request, transformer runs again by ignoring cache + After the third request, the data is retrieved from the cache """ # Prepare ServiceX - servicex.get_transform_status.side_effect = cycle([ - transform_status1, - transform_status3, - ]) + servicex.get_transform_status.side_effect = cycle( + [ + transform_status1, + transform_status3, + ] + ) # Prepare Minio mock_minio = AsyncMock() mock_minio.list_bucket = AsyncMock(return_value=[file1, file2]) - mock_minio.get_signed_url = AsyncMock(side_effect=['http://file1', 'http://file2']) + mock_minio.get_signed_url = AsyncMock(side_effect=["http://file1", "http://file2"]) mocker.patch("servicex.minio_adapter.MinioAdapter", return_value=mock_minio) did = FileListDataset("/foo/bar/baz.root") @@ -580,8 +622,9 @@ async def test_use_of_ignore_cache(mocker, servicex): query_cache=cache, config=config, ) - datasource_without_ignore_cache.query_string_generator = \ + datasource_without_ignore_cache.query_string_generator = ( FuncADLQuery_Uproot().FromTree("nominal") + ) datasource_without_ignore_cache.result_format = ResultFormat.parquet # Datasouce with ignore cache @@ -592,27 +635,36 @@ async def test_use_of_ignore_cache(mocker, servicex): sx_adapter=servicex, query_cache=cache, config=config, - ignore_cache=True + ignore_cache=True, ) - datasource_with_ignore_cache.query_string_generator = \ + datasource_with_ignore_cache.query_string_generator = ( FuncADLQuery_Uproot().FromTree("nominal") + ) datasource_with_ignore_cache.result_format = ResultFormat.parquet # 1st time sending the request - upd = mocker.patch.object(cache, 'update_record', side_effect=cache.update_record) + upd = mocker.patch.object( + cache, "update_record", side_effect=cache.update_record + ) with ExpandableProgress(display_progress=False) as progress: - await datasource_without_ignore_cache.submit_and_download(signed_urls_only=True, - expandable_progress=progress) # noqa + await datasource_without_ignore_cache.submit_and_download( + signed_urls_only=True, expandable_progress=progress + ) # noqa upd.assert_not_called() upd.reset_mock() assert mock_minio.get_signed_url.await_count == 2 # 2nd time sending the same request with ignore_cache (So it will run again) - mock_minio.get_signed_url = AsyncMock(side_effect=['http://file1', 'http://file2']) - upd = mocker.patch.object(cache, 'update_record', side_effect=cache.update_record) + mock_minio.get_signed_url = AsyncMock( + side_effect=["http://file1", "http://file2"] + ) + upd = mocker.patch.object( + cache, "update_record", side_effect=cache.update_record + ) with ExpandableProgress(display_progress=False) as progress: - await datasource_with_ignore_cache.submit_and_download(signed_urls_only=True, - expandable_progress=progress) # noqa + await datasource_with_ignore_cache.submit_and_download( + signed_urls_only=True, expandable_progress=progress + ) # noqa upd.assert_not_called() upd.reset_mock() assert mock_minio.get_signed_url.await_count == 2 @@ -625,8 +677,9 @@ async def test_use_of_ignore_cache(mocker, servicex): mock_minio.list_bucket.reset_mock() mock_minio.download_file.reset_mock() with ExpandableProgress(display_progress=False) as progress: - res = await datasource_without_ignore_cache.submit_and_download(signed_urls_only=True, - expandable_progress=progress) # noqa + res = await datasource_without_ignore_cache.submit_and_download( + signed_urls_only=True, expandable_progress=progress + ) # noqa mock_minio.list_bucket.assert_not_awaited() mock_minio.download_file.assert_not_awaited() assert len(res.signed_url_list) == 2