Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 42 additions & 11 deletions gnomad/sample_qc/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from gnomad.utils.reference_genome import get_reference_genome
from gnomad.utils.sparse_mt import impute_sex_ploidy

from typing import List, Optional
from typing import List, Optional, Union

logging.basicConfig(format="%(levelname)s (%(name)s %(lineno)s): %(message)s")
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -209,7 +209,7 @@ def get_qc_mt(


def annotate_sex(
mt: hl.MatrixTable,
mtds: Union[hl.MatrixTable, hl.vds.VariantDataset],
is_sparse: bool = True,
excluded_intervals: Optional[hl.Table] = None,
included_intervals: Optional[hl.Table] = None,
Expand All @@ -225,7 +225,7 @@ def annotate_sex(

Return Table with the following fields:
- s (str): Sample
- chr20_mean_dp (float32): Sample's mean coverage over chromosome 20.
- `normalization_contig`_mean_dp (float32): Sample's mean coverage over the specified `normalization_contig`.
- chrX_mean_dp (float32): Sample's mean coverage over chromosome X.
- chrY_mean_dp (float32): Sample's mean coverage over chromosome Y.
- chrX_ploidy (float32): Sample's imputed ploidy over chromosome X.
Expand All @@ -238,7 +238,7 @@ def annotate_sex(
- Y_karyotype (str): Sample's chromosome Y karyotype.
- sex_karyotype (str): Sample's sex karyotype.

:param mt: Input MatrixTable
:param mtds: Input MatrixTable or VariantDataset
:param bool is_sparse: Whether input MatrixTable is in sparse data format
:param excluded_intervals: Optional table of intervals to exclude from the computation.
:param included_intervals: Optional table of intervals to use in the computation. REQUIRED for exomes.
Expand All @@ -252,14 +252,38 @@ def annotate_sex(
:return: Table of samples and their imputed sex karyotypes.
"""
logger.info("Imputing sex chromosome ploidies...")
if is_sparse:
ploidy_ht = impute_sex_ploidy(
mt, excluded_intervals, included_intervals, normalization_contig

is_vds = isinstance(mtds, hl.vds.VariantDataset)
if is_vds:
if excluded_intervals is not None:
raise NotImplementedError(
"The use of the parameter 'excluded_intervals' is currently not implemented for imputing sex chromosome ploidy on a VDS!"
)
ploidy_ht = hl.vds.impute_sex_chromosome_ploidy(
mtds,
calling_intervals=included_intervals,
normalization_contig=normalization_contig,
)
else:
raise NotImplementedError(
"Imputing sex ploidy does not exist yet for dense data."
ploidy_ht = ploidy_ht.rename(
{
"x_ploidy": "chrX_ploidy",
"y_ploidy": "chrY_ploidy",
"x_mean_dp": "chrX_mean_dp",
"y_mean_dp": "chrY_mean_dp",
"autosomal_mean_dp": f"{normalization_contig}_mean_dp",
}
)
mt = mtds.variant_data
else:
mt = mtds
if is_sparse:
ploidy_ht = impute_sex_ploidy(
mt, excluded_intervals, included_intervals, normalization_contig
)
else:
raise NotImplementedError(
"Imputing sex ploidy does not exist yet for dense data."
)

x_contigs = get_reference_genome(mt.locus).x_contigs
logger.info("Filtering mt to biallelic SNPs in X contigs: %s", x_contigs)
Expand All @@ -269,8 +293,15 @@ def annotate_sex(
mt = mt.filter_rows(
(hl.len(mt.alleles) == 2) & hl.is_snp(mt.alleles[0], mt.alleles[1])
)

build = get_reference_genome(mt.locus).name
mt = hl.filter_intervals(
mt, [hl.parse_locus_interval(contig) for contig in x_contigs]
mt,
[
hl.parse_locus_interval(contig, reference_genome=build)
for contig in x_contigs
],
keep=True,
)

if sites_ht is not None:
Expand Down
3 changes: 2 additions & 1 deletion gnomad/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from typing import Callable, List, Dict, Optional, Tuple, Union

import hail as hl
from hailtop.aiotools import LocalAsyncFS, RouterAsyncFS, AsyncFS
from hailtop.aiotools import LocalAsyncFS, AsyncFS
from hailtop.aiotools.router_fs import RouterAsyncFS
from hailtop.aiogoogle import GoogleStorageAsyncFS
from hailtop.utils import bounded_gather, tqdm

Expand Down