Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 52 additions & 2 deletions gnomad/utils/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,9 +350,11 @@ def annotate_freq(
.. note::

Currently this only supports bi-allelic sites.

The input `mt` needs to have the following entry fields:
- GT: a CallExpression containing the genotype
- adj: a BooleanExpression containing whether the genotype is of high quality or not.
- GT: a CallExpression containing the genotype
- adj: a BooleanExpression containing whether the genotype is of high quality or not.

All expressions arguments need to be expression on the input `mt`.

.. rubric:: `freq` row annotation
Expand Down Expand Up @@ -800,6 +802,54 @@ def add_variant_type(alt_alleles: hl.expr.ArrayExpression) -> hl.expr.StructExpr
)


def annotate_allele_info(ht: hl.Table) -> hl.Table:
"""
Return bi-allelic sites Table with an 'allele_info' annotation.

.. note::

This function requires that the input `ht` is unsplit and returns a split `ht`.

'allele_info' is a struct with the following information:
- variant_type: Variant type (snv, indel, multi-snv, multi-indel, or mixed).
- n_alt_alleles: Total number of alternate alleles observed at variant locus.
- has_star: True if the variant contains a star allele.
- allele_type: Allele type (snv, insertion, deletion, or mixed).
- was_mixed: True if the variant was mixed (i.e. contained both SNVs and indels).
- nonsplit_alleles: Array of alleles before splitting.

:param Table ht: Unsplit input Table.
:return: Split Table with allele data annotation added,
"""
ht = ht.annotate(
allele_info=hl.struct(
**add_variant_type(ht.alleles),
has_star=hl.any(lambda a: a == "*", ht.alleles),
)
)

ht = hl.split_multi(ht)

ref_expr = ht.alleles[0]
alt_expr = ht.alleles[1]
allele_type_expr = (
hl.case()
.when(hl.is_snp(ref_expr, alt_expr), "snv")
.when(hl.is_insertion(ref_expr, alt_expr), "ins")
.when(hl.is_deletion(ref_expr, alt_expr), "del")
.default("complex")
)
ht = ht.transmute(
allele_info=ht.allele_info.annotate(
allele_type=allele_type_expr,
was_mixed=ht.allele_info.variant_type == "mixed",
nonsplit_alleles=ht.old_alleles,
)
)

return ht


def annotation_type_is_numeric(t: Any) -> bool:
"""
Given an annotation type, return whether it is a numerical type or not.
Expand Down