Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,5 @@ Below is a list of all features not included in the v4 MVP and where to find the
| Genetic ancestry subgroups (prevously subpops) | v2 variant page |
| Multi Nucleotide (MNV) calls | v2 variant table and variant page |
| Variant co-occurrence | v2 gene page |
| Manual LoF curation | v2 variant table and variant page |
| Regional Missense Constraint | Now available on v2 gene page |
| Linkage disequilibrium scores | [v2](/downloads/#v2-linkage-disequilibrium) downloads |
65 changes: 37 additions & 28 deletions browser/help/topics/lof-curation.md

Large diffs are not rendered by default.

46 changes: 46 additions & 0 deletions browser/src/DataPage/GnomadV4Downloads.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,52 @@ const GnomadV4Downloads = () => {
</ListItem>
</FileList>
</DownloadsSection>

<SectionTitle id="v4-secondary-analyses" theme={{ type: 'datasets' }}>
Secondary Analyses
</SectionTitle>
<StyledParagraph>
Additional research analyses created using the core gnomAD releases in collaboration with
members of the gnomAD steering committee.
</StyledParagraph>

<DownloadsSection>
<SectionTitle id="v4-lof-curation-results">Loss-of-function curation results</SectionTitle>
<p>
For information on v4 loss-of-function curation results, see{' '}
{/* @ts-expect-error TS(2769) FIXME: No overload matches this call. */}
<ExternalLink href="https://doi.org/10.1038/s41586-020-2308-7">
<em>The mutational constraint spectrum quantified from variation in 141,456 humans.</em>{' '}
Nature 581, 434–443 (2020)
</ExternalLink>{' '}
(all homozygous LoF curation results),{' '}
{/* @ts-expect-error TS(2769) FIXME: No overload matches this call. */}
<ExternalLink href="https://doi.org/10.1038/s41586-020-2329-2">
<em>Transcript expression-aware annotation improves rare variant interpretation.</em>{' '}
Nature 581, 452–458 (2020)
</ExternalLink>{' '}
(haploinsufficient genes LoF curation results), and{' '}
{/* @ts-expect-error TS(2769) FIXME: No overload matches this call. */}
<ExternalLink href="https://pubmed.ncbi.nlm.nih.gov/37633279/">
<em>
Advanced variant classification framework reduces the false positive rate of predicted
loss-of-function variants in population sequencing data.
</em>{' '}
Am J Hum Genet 110, 1496-1508 (2023)
</ExternalLink>
.
</p>

<FileList>
{/* @ts-expect-error TS(2745) FIXME: This JSX tag's 'children' prop expects type 'never... Remove this comment to see the full error message */}
<ListItem>
<DownloadLinks
label="Incomplete penetrance LoF curation results"
path="/release/4.1/lof_curation/incomplete_penetrance_curation_results.csv"
/>
</ListItem>
</FileList>
</DownloadsSection>
</>
)
}
Expand Down
139 changes: 139 additions & 0 deletions browser/src/DataPage/__snapshots__/DataPage.spec.tsx.snap
Original file line number Diff line number Diff line change
Expand Up @@ -7300,6 +7300,145 @@ exports[`Data Page has no unexpected changes 1`] = `
</li>
</ul>
</section>
<span
className="c8"
>
<h2
className="c9"
>
<a
aria-hidden="true"
className="c10 c11"
href="#v4-secondary-analyses"
id="v4-secondary-analyses"
>
<img
alt=""
aria-hidden="true"
height={12}
src="test-file-stub"
width={12}
/>
</a>
Secondary Analyses
</h2>
</span>
<p
className="c12"
>
Additional research analyses created using the core gnomAD releases in collaboration with members of the gnomAD steering committee.
</p>
<section
className="c16"
>
<span
className="c8"
>
<h2
className="c17"
>
<a
aria-hidden="true"
className="c10 c11"
href="#v4-lof-curation-results"
id="v4-lof-curation-results"
>
<img
alt=""
aria-hidden="true"
height={12}
src="test-file-stub"
width={12}
/>
</a>
Loss-of-function curation results
</h2>
</span>
<p>
For information on v4 loss-of-function curation results, see

<a
className="c7"
href="https://doi.org/10.1038/s41586-020-2308-7"
rel="noopener noreferrer"
target="_blank"
>
<em>
The mutational constraint spectrum quantified from variation in 141,456 humans.
</em>

Nature 581, 434–443 (2020)
</a>

(all homozygous LoF curation results),

<a
className="c7"
href="https://doi.org/10.1038/s41586-020-2329-2"
rel="noopener noreferrer"
target="_blank"
>
<em>
Transcript expression-aware annotation improves rare variant interpretation.
</em>

Nature 581, 452–458 (2020)
</a>

(haploinsufficient genes LoF curation results), and

<a
className="c7"
href="https://pubmed.ncbi.nlm.nih.gov/37633279/"
rel="noopener noreferrer"
target="_blank"
>
<em>
Advanced variant classification framework reduces the false positive rate of predicted loss-of-function variants in population sequencing data.
</em>

Am J Hum Genet 110, 1496-1508 (2023)
</a>
.
</p>
<ul
className="c20"
>
<li
className="c21"
>
<span>
Incomplete penetrance LoF curation results
</span>
<br />
<span>
Download from

<a
aria-label="Download Incomplete penetrance LoF curation results from Google"
className="c7"
href="https://storage.googleapis.com/gcp-public-data--gnomad/release/4.1/lof_curation/incomplete_penetrance_curation_results.csv"
onClick={[Function]}
rel="noopener noreferrer"
target="_blank"
>
Google
</a>
/
<a
aria-label="Download Incomplete penetrance LoF curation results from Amazon"
className="c7"
href="https://gnomad-public-us-east-1.s3.amazonaws.com/release/4.1/lof_curation/incomplete_penetrance_curation_results.csv"
onClick={[Function]}
rel="noopener noreferrer"
target="_blank"
>
Amazon
</a>
</span>
</li>
</ul>
</section>
<span
className="c8"
>
Expand Down
1 change: 0 additions & 1 deletion browser/src/help/__snapshots__/HelpPage.spec.tsx.snap
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,6 @@ Below is a list of all features not included in the v4 MVP and where to find the
| Genetic ancestry subgroups (prevously subpops) | v2 variant page |
| Multi Nucleotide (MNV) calls | v2 variant table and variant page |
| Variant co-occurrence | v2 gene page |
| Manual LoF curation | v2 variant table and variant page |
| Regional Missense Constraint | Now available on v2 gene page |
| Linkage disequilibrium scores | [v2](/downloads/#v2-linkage-disequilibrium) downloads |
",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@
"not_lof": "Not LoF",
}

VERDICT_MAPPINGS_CLEAN = VERDICT_MAPPING.values()

def import_gnomad_v2_lof_curation_results(curation_result_paths, genes_path):

def import_gnomad_lof_curation_results(curation_result_paths, genes_path, reference_genome="GRCh37"):
all_flags = set()

with hl.hadoop_open("/tmp/import_temp.tsv", "w") as temp_output_file:
Expand All @@ -52,8 +54,13 @@ def import_gnomad_v2_lof_curation_results(curation_result_paths, genes_path):

for row in reader:
[chrom, pos, ref, alt] = row["Variant ID"].split("-")
chrom = f"chr{chrom}" if reference_genome == "GRCh38" else chrom

variant_flags = [FLAG_MAPPING.get(f, f) for f in raw_dataset_flags if row[f"Flag {f}"] == "TRUE"]
variant_flags = [
FLAG_MAPPING.get(f, f)
for f in raw_dataset_flags
if row.get(f"Flag {f}") == "TRUE" or row.get(f"FLAG {f}") == "1"
]

genes = [gene_id for (gene_id, gene_symbol) in (gene.split(":") for gene in row["Gene"].split(";"))]

Expand All @@ -62,7 +69,8 @@ def import_gnomad_v2_lof_curation_results(curation_result_paths, genes_path):
if verdict == "inufficient_evidence":
verdict = "insufficient_evidence"

verdict = VERDICT_MAPPING[verdict]
if verdict not in VERDICT_MAPPINGS_CLEAN:
verdict = VERDICT_MAPPING[verdict]

output_row = [
chrom,
Expand All @@ -81,7 +89,7 @@ def import_gnomad_v2_lof_curation_results(curation_result_paths, genes_path):
ds = hl.import_table("/tmp/import_temp.tsv")

ds = ds.transmute(
locus=hl.locus(ds.chrom, hl.int(ds.position)),
locus=hl.locus(ds.chrom, hl.int(ds.position), reference_genome),
alleles=[ds.ref, ds.alt],
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
from data_pipeline.pipelines.gnomad_v3_short_tandem_repeats import pipeline as gnomad_v3_short_tandem_repeats_pipeline
from data_pipeline.pipelines.gnomad_v4_variants import pipeline as gnomad_v4_variants_pipeline
from data_pipeline.pipelines.gnomad_v4_coverage import pipeline as gnomad_v4_coverage_pipeline

from data_pipeline.pipelines.gnomad_v4_cnvs import pipeline as gnomad_v4_cnvs_pipeline
from data_pipeline.pipelines.gnomad_v4_lof_curation_results import pipeline as gnomad_v4_lof_curation_results_pipeline


logger = logging.getLogger("gnomad_data_pipeline")
Expand Down Expand Up @@ -145,6 +145,18 @@ def add_liftover_document_id(ds):
# ),
# "args": {"index": "gnomad_v4_genome_coverage", "id_field": "xpos", "num_shards": 2, "block_size": 10_000},
# },
"gnomad_v4_lof_curation_results": {
"get_table": lambda: add_variant_document_id(
hl.read_table(gnomad_v4_lof_curation_results_pipeline.get_output("lof_curation_results").get_output_path())
),
"args": {
"index": "gnomad_v4_lof_curation_results",
"index_fields": ["document_id", "variant_id", "locus", "lof_curations.gene_id"],
"id_field": "document_id",
"num_shards": 1,
"block_size": 1_000,
},
},
##############################################################################################################
# gnomAD v4 CNVs
##############################################################################################################
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from data_pipeline.pipeline import Pipeline, run_pipeline

from data_pipeline.datasets.gnomad_v2.gnomad_v2_lof_curation import import_gnomad_v2_lof_curation_results
from data_pipeline.datasets.gnomad_v2.gnomad_v2_lof_curation import import_gnomad_lof_curation_results

from data_pipeline.pipelines.genes import pipeline as genes_pipeline

Expand All @@ -9,7 +9,7 @@

pipeline.add_task(
"prepare_gnomad_v2_lof_curation_results",
import_gnomad_v2_lof_curation_results,
import_gnomad_lof_curation_results,
"/gnomad_v2/gnomad_v2_lof_curation_results.ht",
{"genes_path": genes_pipeline.get_output("genes_grch37")},
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from data_pipeline.pipeline import Pipeline, run_pipeline

from data_pipeline.datasets.gnomad_v2.gnomad_v2_lof_curation import import_gnomad_lof_curation_results

from data_pipeline.pipelines.genes import pipeline as genes_pipeline


pipeline = Pipeline()

pipeline.add_task(
"prepare_gnomad_v4_lof_curation_results",
import_gnomad_lof_curation_results,
"/gnomad_v4/gnomad_v4_lof_curation_results.ht",
{"genes_path": genes_pipeline.get_output("genes_grch38")},
{
# If a result for a variant/gene pair is present in more than one file,
# the result in the first file in this list takes precedence.
"curation_result_paths": [
"gs://gnomad-v4-data-pipeline/inputs/lof_curation/gnomAD_v4/gnomAD_incomplete_penetrance_final_results.csv",
],
"reference_genome": "GRCh38",
},
)

###############################################
# Outputs
###############################################

pipeline.set_outputs({"lof_curation_results": "prepare_gnomad_v4_lof_curation_results"})

###############################################
# Run
###############################################

if __name__ == "__main__":
run_pipeline(pipeline)
Loading