Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* Added function `make_faf_index_dict` to create a look-up Dictionary for entries contained in the filter allele frequency annotation array [(#349)](https://github.com/broadinstitute/gnomad_methods/pull/349/files)
* Added function `make_freq_index_dict` to create a look-up Dictionary for entries contained in the frequency annotation array [(#349)](https://github.com/broadinstitute/gnomad_methods/pull/349/files)
* VersionedResource objects are no longer subclasses of BaseResource [(#359)](https://github.com/broadinstitute/gnomad_methods/pull/359)
* gnomAD resources can now be imported from different sources [(#373)](https://github.com/broadinstitute/gnomad_methods/pull/373)

## Version 0.5.0 - April 22nd, 2021

Expand Down
47 changes: 47 additions & 0 deletions gnomad/resources/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Configuration for loading resources."""

from enum import Enum
from typing import Union


class GnomadPublicResourceSource(Enum):
"""Sources for public gnomAD resources."""

GNOMAD = "gnomAD"
GOOGLE_CLOUD_PUBLIC_DATASETS = "Google Cloud Public Datasets"


DEFAULT_GNOMAD_PUBLIC_RESOURCE_SOURCE = GnomadPublicResourceSource.GNOMAD


class _GnomadPublicResourceConfiguration:
"""Configuration for public gnomAD resources."""

__source: Union[
GnomadPublicResourceSource, str
] = DEFAULT_GNOMAD_PUBLIC_RESOURCE_SOURCE

@property
def source(self) -> Union[GnomadPublicResourceSource, str]:
"""
Get the source for public gnomAD resource files.

This is used to determine which URLs gnomAD resources will be loaded from.

:returns: Source name or path to root of resources directory
"""
return self.__source

@source.setter
def source(self, source: Union[GnomadPublicResourceSource, str]) -> None:
"""
Set the default source for resource files.

This is used to determine which URLs gnomAD resources will be loaded from.

:param source: Source name or path to root of resources directory
"""
self.__source = source


gnomad_public_resource_configuration = _GnomadPublicResourceConfiguration()
18 changes: 12 additions & 6 deletions gnomad/resources/grch37/gnomad.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from gnomad.resources.resource_utils import (
DataException,
TableResource,
GnomadPublicTableResource,
VersionedTableResource,
)

Expand Down Expand Up @@ -124,7 +124,9 @@ def public_release(data_type: str) -> VersionedTableResource:
return VersionedTableResource(
current_release,
{
release: TableResource(path=_public_release_ht_path(data_type, release))
release: GnomadPublicTableResource(
path=_public_release_ht_path(data_type, release)
)
for release in releases
},
)
Expand All @@ -150,7 +152,9 @@ def coverage(data_type: str) -> VersionedTableResource:
return VersionedTableResource(
current_release,
{
release: TableResource(path=_public_coverage_ht_path(data_type, release))
release: GnomadPublicTableResource(
path=_public_coverage_ht_path(data_type, release)
)
for release in releases
},
)
Expand All @@ -176,13 +180,15 @@ def liftover(data_type: str) -> VersionedTableResource:
return VersionedTableResource(
current_release,
{
release: TableResource(path=_liftover_data_path(data_type, release))
release: GnomadPublicTableResource(
path=_liftover_data_path(data_type, release)
)
for release in releases
},
)


def public_pca_loadings(subpop: str = "") -> TableResource:
def public_pca_loadings(subpop: str = "") -> GnomadPublicTableResource:
"""
Return the TableResource containing sites and loadings from population PCA.

Expand All @@ -194,7 +200,7 @@ def public_pca_loadings(subpop: str = "") -> TableResource:
'Available subpops are "eas" or "nfe", default value "" for global'
)

return TableResource(path=_public_pca_ht_path(subpop))
return GnomadPublicTableResource(path=_public_pca_ht_path(subpop))


def release_vcf_path(data_type: str, version: str, contig: str) -> str:
Expand Down
17 changes: 10 additions & 7 deletions gnomad/resources/grch37/gnomad_ld.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# noqa: D100

from gnomad.resources.resource_utils import TableResource, BlockMatrixResource
from gnomad.resources.resource_utils import (
GnomadPublicTableResource,
GnomadPublicBlockMatrixResource,
)
from gnomad.resources.grch37.gnomad import CURRENT_EXOME_RELEASE, CURRENT_GENOME_RELEASE
from typing import Optional

Expand Down Expand Up @@ -67,16 +70,16 @@ def _ld_scores_path(
return f'gs://gnomad-public-requester-pays/release/{version}/ld/scores/gnomad.{data_type}.r{version}.{pop}.{"adj." if adj else ""}ld_scores.ht'


def ld_matrix(pop: str) -> BlockMatrixResource:
def ld_matrix(pop: str) -> GnomadPublicBlockMatrixResource:
"""Get resource for the LD matrix for the given population."""
return BlockMatrixResource(path=_ld_matrix_path("genomes", pop))
return GnomadPublicBlockMatrixResource(path=_ld_matrix_path("genomes", pop))


def ld_index(pop: str) -> TableResource:
def ld_index(pop: str) -> GnomadPublicTableResource:
"""Get resource for the LD indices for the given population."""
return TableResource(path=_ld_index_path("genomes", pop))
return GnomadPublicTableResource(path=_ld_index_path("genomes", pop))


def ld_scores(pop: str) -> TableResource:
def ld_scores(pop: str) -> GnomadPublicTableResource:
"""Get resource for the LD scores for the given population."""
return TableResource(path=_ld_scores_path("genomes", pop))
return GnomadPublicTableResource(path=_ld_scores_path("genomes", pop))
54 changes: 28 additions & 26 deletions gnomad/resources/grch37/reference_data.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# noqa: D100

from gnomad.resources.resource_utils import (
MatrixTableResource,
TableResource,
GnomadPublicMatrixTableResource,
GnomadPublicTableResource,
VersionedMatrixTableResource,
VersionedTableResource,
import_sites_vcf,
)
import hail as hl

na12878_giab = MatrixTableResource(
na12878_giab = GnomadPublicMatrixTableResource(
path="gs://gnomad-public/resources/grch37/na12878/NA12878_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-Solid-10X_CHROM1-X_v3.3_highconf.mt",
import_func=hl.import_vcf,
import_args={
Expand All @@ -20,7 +20,7 @@
},
)

hapmap = TableResource(
hapmap = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/hapmap/hapmap_3.3.b37.ht",
import_func=import_sites_vcf,
import_args={
Expand All @@ -31,7 +31,7 @@
},
)

kgp_omni = TableResource(
kgp_omni = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/kgp/1000G_omni2.5.b37.ht",
import_func=import_sites_vcf,
import_args={
Expand All @@ -42,7 +42,7 @@
},
)

mills = TableResource(
mills = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/mills/Mills_and_1000G_gold_standard.indels.b37.ht",
import_func=import_sites_vcf,
import_args={
Expand All @@ -53,7 +53,7 @@
},
)

syndip = MatrixTableResource(
syndip = GnomadPublicMatrixTableResource(
path="gs://gnomad-public/resources/grch37/syndip/hybrid.m37m.mt",
import_func=hl.import_vcf,
import_args={
Expand All @@ -67,7 +67,7 @@
vep_context = VersionedTableResource(
default_version="85",
versions={
"85": TableResource(
"85": GnomadPublicTableResource(
path="gs://gnomad-public-requester-pays/resources/context/grch37_context_vep_annotated.ht",
)
},
Expand All @@ -76,7 +76,7 @@
dbsnp = VersionedTableResource(
default_version="20180423",
versions={
"20180423": TableResource(
"20180423": GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/dbsnp/All_20180423.ht",
import_func=import_sites_vcf,
import_args={
Expand All @@ -93,7 +93,7 @@
clinvar = VersionedTableResource(
default_version="20181028",
versions={
"20181028": TableResource(
"20181028": GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/clinvar/clinvar_20181028.vep.ht",
import_func=import_sites_vcf,
import_args={
Expand All @@ -110,7 +110,7 @@
kgp_phase_3 = VersionedMatrixTableResource(
default_version="phase_3_split",
versions={
"phase_3_split": MatrixTableResource(
"phase_3_split": GnomadPublicMatrixTableResource(
path="gs://gnomad-public/resources/grch37/kgp/1000Genomes_phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.split.mt",
import_func=hl.import_vcf,
import_args={
Expand All @@ -121,7 +121,7 @@
"reference_genome": "GRCh37",
},
),
"phase_3": MatrixTableResource(
"phase_3": GnomadPublicMatrixTableResource(
path="gs://gnomad-public/resources/grch37/kgp/1000Genomes_phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.mt",
import_func=hl.import_vcf,
import_args={
Expand All @@ -138,7 +138,7 @@
kgp = VersionedTableResource(
default_version="phase_1_hc",
versions={
"phase_1_hc": TableResource(
"phase_1_hc": GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/kgp/1000G_phase1.snps.high_confidence.b37.ht",
import_func=import_sites_vcf,
import_args={
Expand All @@ -152,13 +152,15 @@
},
)

cpg_sites = TableResource(path="gs://gnomad-public/resources/grch37/cpg_sites/cpg.ht")
cpg_sites = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/cpg_sites/cpg.ht"
)

methylation_sites = TableResource(
methylation_sites = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/methylation_sites/methylation.ht"
)

lcr_intervals = TableResource(
lcr_intervals = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/lcr_intervals/LCR.GRCh37_compliant.interval_list.ht",
import_func=hl.import_locus_intervals,
import_args={
Expand All @@ -167,7 +169,7 @@
},
)

decoy_intervals = TableResource(
decoy_intervals = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/decoy_intervals/mm-2-merged.GRCh37_compliant.ht",
import_func=hl.import_bed,
import_args={
Expand All @@ -176,7 +178,7 @@
},
)

purcell_5k_intervals = TableResource(
purcell_5k_intervals = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/purcell_5k_intervals/purcell5k.ht",
import_func=hl.import_locus_intervals,
import_args={
Expand All @@ -185,7 +187,7 @@
},
)

seg_dup_intervals = TableResource(
seg_dup_intervals = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/seg_dup_intervals/hg19_self_chain_split_both.ht",
import_func=hl.import_bed,
import_args={
Expand All @@ -194,7 +196,7 @@
},
)

exome_hc_intervals = TableResource(
exome_hc_intervals = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/broad_intervals/exomes_high_coverage.auto.interval_list.ht",
import_func=hl.import_locus_intervals,
import_args={
Expand All @@ -203,7 +205,7 @@
},
)

high_coverage_intervals = TableResource(
high_coverage_intervals = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/broad_intervals/high_coverage.auto.interval_list.ht",
import_func=hl.import_locus_intervals,
import_args={
Expand All @@ -212,7 +214,7 @@
},
)

exome_calling_intervals = TableResource(
exome_calling_intervals = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/broad_intervals/exome_calling_regions.v1.interval_list.ht",
import_func=hl.import_locus_intervals,
import_args={
Expand All @@ -221,7 +223,7 @@
},
)

exome_evaluation_intervals = TableResource(
exome_evaluation_intervals = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/broad_intervals/exome_evaluation_regions.v1.noheader.interval_list.ht",
import_func=hl.import_locus_intervals,
import_args={
Expand All @@ -230,7 +232,7 @@
},
)

genome_evaluation_intervals = TableResource(
genome_evaluation_intervals = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/broad_intervals/hg19-v0-wgs_evaluation_regions.v1.interval_list.ht",
import_func=hl.import_locus_intervals,
import_args={
Expand All @@ -239,7 +241,7 @@
},
)

na12878_hc_intervals = TableResource(
na12878_hc_intervals = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/na12878/NA12878_GIAB_highconf_intervals.ht",
import_func=hl.import_bed,
import_args={
Expand All @@ -248,7 +250,7 @@
},
)

syndip_hc_intervals = TableResource(
syndip_hc_intervals = GnomadPublicTableResource(
path="gs://gnomad-public/resources/grch37/syndip/syndip_highconf_genome_intervals.ht",
import_func=hl.import_bed,
import_args={
Expand Down
Loading