diff --git a/gnomad/resources/resource_utils.py b/gnomad/resources/resource_utils.py index 895af63e2..78fbf908c 100644 --- a/gnomad/resources/resource_utils.py +++ b/gnomad/resources/resource_utils.py @@ -2,8 +2,8 @@ import logging from abc import ABC, abstractmethod -from functools import reduce -from typing import Any, Callable, Dict, List, Optional +from functools import reduce, wraps +from typing import Any, Callable, Dict, Iterable, List, Optional import hail as hl from hail.linalg import BlockMatrix @@ -373,9 +373,47 @@ def __init__(self, default_version: str, versions: Dict[str, BlockMatrixResource super().__init__(default_version, versions) +class ResourceNotAvailable(Exception): + """Exception raised if a resource is not available from the selected source.""" + + class GnomadPublicResource(BaseResource, ABC): """Base class for the gnomAD project's public resources.""" + def __init_subclass__(cls, *, read_resource_methods: Iterable[str] = []) -> None: + super().__init_subclass__() + + # Some resources may not be available from all sources due to delays in syncing, etc. + # This wraps all methods that read the resource and adds a check for if the resource + # is available from the selected source. If the resource is not available, this + # throws a more helpful error than if the read were simply allowed to fail. + def _wrap_read_resource_method(method_name): + original_method = getattr(cls, method_name) + + @wraps(original_method) + def read_resource(self, *args, **kwargs): + # If one of the known sources is selected, check if the resource is available. + # For custom sources, skip the check and attempt to read the resource. + resource_source = gnomad_public_resource_configuration.source + if ( + isinstance(resource_source, GnomadPublicResourceSource) + and resource_source != GnomadPublicResourceSource.GNOMAD + ): + if not self.is_resource_available(): + raise ResourceNotAvailable( + f"This resource is not currently available from {resource_source.value}.\n\n" + "To load resources directly from gnomAD instead, use:\n\n" + ">>> from gnomad.resources.config import gnomad_public_resource_configuration, GnomadPublicResourceSource\n" + ">>> gnomad_public_resource_configuration.source = GnomadPublicResourceSource.GNOMAD" + ) + + return original_method(self, *args, **kwargs) + + setattr(cls, method_name, read_resource) + + for method_name in read_resource_methods: + _wrap_read_resource_method(method_name) + def _get_path(self) -> str: resource_source = gnomad_public_resource_configuration.source if resource_source == GnomadPublicResourceSource.GNOMAD: @@ -406,20 +444,46 @@ def _set_path(self, path): return super()._set_path(path) + def is_resource_available(self) -> bool: + """ + Check if this resource is available from the selected source. + + :return: True if the resource is available. + """ + path = self.path + + # Hail Tables, MatrixTables, and BlockMatrices are directories. + # For those, check for the existence of the _SUCCESS object. + path_to_test = ( + f"{path}/_SUCCESS" + if any(path.endswith(ext) for ext in (".ht", ".mt", ".bm")) + else path + ) + + return hl.current_backend().fs.exists(path_to_test) + -class GnomadPublicTableResource(TableResource, GnomadPublicResource): +class GnomadPublicTableResource( + TableResource, GnomadPublicResource, read_resource_methods=("ht",) +): """Resource class for a public Hail Table published by the gnomAD project.""" -class GnomadPublicMatrixTableResource(MatrixTableResource, GnomadPublicResource): +class GnomadPublicMatrixTableResource( + MatrixTableResource, GnomadPublicResource, read_resource_methods=("mt",) +): """Resource class for a public Hail MatrixTable published by the gnomAD project.""" -class GnomadPublicPedigreeResource(PedigreeResource, GnomadPublicResource): +class GnomadPublicPedigreeResource( + PedigreeResource, GnomadPublicResource, read_resource_methods=("ht", "pedigree") +): """Resource class for a public pedigree published by the gnomAD project.""" -class GnomadPublicBlockMatrixResource(BlockMatrixResource, GnomadPublicResource): +class GnomadPublicBlockMatrixResource( + BlockMatrixResource, GnomadPublicResource, read_resource_methods=("bm",) +): """Resource class for a public Hail BlockMatrix published by the gnomAD project.""" diff --git a/tests/resources/test_resource_utils.py b/tests/resources/test_resource_utils.py index 901021712..ab9664d39 100644 --- a/tests/resources/test_resource_utils.py +++ b/tests/resources/test_resource_utils.py @@ -138,8 +138,9 @@ def test_read_gnomad_public_table_resource( gnomad_public_resource_configuration.source = source - resource.ht() - read_table.assert_called_with(expected_read_path) + with patch.object(resource, "is_resource_available", return_value=True): + resource.ht() + read_table.assert_called_with(expected_read_path) class TestGnomadPublicMatrixTableResource: @@ -158,8 +159,9 @@ def test_read_gnomad_public_matrix_table_resource( gnomad_public_resource_configuration.source = source - resource.mt() - read_matrix_table.assert_called_with(expected_read_path) + with patch.object(resource, "is_resource_available", return_value=True): + resource.mt() + read_matrix_table.assert_called_with(expected_read_path) class TestGnomadPublicPedigreeResource: @@ -178,9 +180,10 @@ def test_read_gnomad_public_pedigree_resource( gnomad_public_resource_configuration.source = source - resource.pedigree() - read_pedigree.assert_called() - assert read_pedigree.call_args[0][0] == expected_read_path + with patch.object(resource, "is_resource_available", return_value=True): + resource.pedigree() + read_pedigree.assert_called() + assert read_pedigree.call_args[0][0] == expected_read_path @pytest.mark.parametrize( "resource_path,source,expected_read_path", @@ -195,9 +198,10 @@ def test_import_gnomad_public_pedigree_resource( gnomad_public_resource_configuration.source = source - resource.ht() - import_fam.assert_called() - assert import_fam.call_args[0][0] == expected_read_path + with patch.object(resource, "is_resource_available", return_value=True): + resource.ht() + import_fam.assert_called() + assert import_fam.call_args[0][0] == expected_read_path class TestGnomadPublicBlockMatrixResource: @@ -216,5 +220,6 @@ def test_read_gnomad_public_block_matrix_resource( gnomad_public_resource_configuration.source = source - resource.bm() - read_block_matrix.assert_called_with(expected_read_path) + with patch.object(resource, "is_resource_available", return_value=True): + resource.bm() + read_block_matrix.assert_called_with(expected_read_path)