Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions gnomad/utils/constraint.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,7 @@ def get_constraint_grouping_expr(
coverage_expr: Optional[hl.Int32Expression] = None,
include_transcript_group: bool = True,
include_canonical_group: bool = True,
include_mane_select_group: bool = False,
) -> Dict[str, Union[hl.StringExpression, hl.Int32Expression, hl.BooleanExpression]]:
"""
Collect annotations used for constraint groupings.
Expand Down Expand Up @@ -790,6 +791,9 @@ def get_constraint_grouping_expr(
groupings. Default is True.
:param include_canonical_group: Whether to include canonical annotation in the
groupings. Default is True.
:param include_mane_select_group: Whether to include mane_select annotation in the
groupings. Default is False.

:return: A dictionary with keys as annotation names and values as actual
annotations.
"""
Expand All @@ -810,12 +814,19 @@ def get_constraint_grouping_expr(
groupings["transcript"] = vep_annotation_expr.transcript_id
if include_canonical_group:
groupings["canonical"] = hl.or_else(vep_annotation_expr.canonical == 1, False)
if include_mane_select_group:
groupings["mane_select"] = hl.or_else(
hl.is_defined(vep_annotation_expr.mane_select), False
)

return groupings


def annotate_exploded_vep_for_constraint_groupings(
ht: hl.Table, vep_annotation: str = "transcript_consequences"
ht: hl.Table,
vep_annotation: str = "transcript_consequences",
include_canonical_group: bool = True,
include_mane_select_group: bool = False,
) -> Tuple[Union[hl.Table, hl.MatrixTable], Tuple[str]]:
"""
Annotate Table with annotations used for constraint groupings.
Expand All @@ -842,13 +853,29 @@ def annotate_exploded_vep_for_constraint_groupings(
:param vep_annotation: Name of annotation in 'vep' annotation (one of
"transcript_consequences" and "worst_csq_by_gene") that will be used for
obtaining constraint annotations. Default is "transcript_consequences".
:param include_canonical_group: Whether to include 'canonical' annotation in the
groupings. Default is True. Ignored unless `vep_annotation` is "transcript_consequences".
:param include_mane_select_group: Whether to include 'mane_select' annotation in the
groupings. Default is False. Ignored unless `vep_annotation` is "transcript_consequences".
:return: A tuple of input Table or MatrixTable with grouping annotations added and
the names of added annotations.
"""
if vep_annotation == "transcript_consequences":
include_transcript_group = include_canonical_group = True
if not include_canonical_group and not include_mane_select_group:
raise ValueError(
"If 'vep_annotation' is 'transcript_consequences', one of either"
" 'include_canonical_group' or 'include_mane_select_group' must be set!"
)
include_transcript_group = True
else:
include_transcript_group = include_canonical_group = False
logger.warning(
"Setting both 'include_canonical_group' and 'include_mane_select_group' to"
" False (options cannot be used unless 'vep_annotation' is"
" 'transcript_consequences')."
)
include_transcript_group = False
include_canonical_group = False
include_mane_select_group = False

# Annotate 'worst_csq_by_gene' to `ht` if it's specified for `vep_annotation`.
if vep_annotation == "worst_csq_by_gene":
Expand All @@ -863,6 +890,7 @@ def annotate_exploded_vep_for_constraint_groupings(
coverage_expr=ht.exome_coverage,
include_transcript_group=include_transcript_group,
include_canonical_group=include_canonical_group,
include_mane_select_group=include_mane_select_group,
)

return ht.annotate(**groupings), tuple(groupings.keys())
Expand Down
7 changes: 6 additions & 1 deletion gnomad/utils/vep.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,7 @@ def filter_vep_transcript_csqs(
vep_root: str = "vep",
synonymous: bool = True,
canonical: bool = True,
mane_select: bool = False,
filter_empty_csq: bool = True,
) -> Union[hl.Table, hl.MatrixTable]:
"""
Expand All @@ -619,10 +620,11 @@ def filter_vep_transcript_csqs(
:param vep_root: Name used for VEP annotation. Default is 'vep'.
:param synonymous: Whether to filter to variants where the most severe consequence is 'synonymous_variant'. Default is True.
:param canonical: Whether to filter to only canonical transcripts. Default is True.
:param mane_select: Whether to filter to only MANE Select transcripts. Default is False.
:param filter_empty_csq: Whether to filter out rows where 'transcript_consequences' is empty, after filtering 'transcript_consequences' to the specified criteria. Default is True.
:return: Table or MatrixTable filtered to specified criteria.
"""
if not synonymous and not canonical and not filter_empty_csq:
if not synonymous and not (canonical or mane_select) and not filter_empty_csq:
logger.warning("No changes have been made to input Table/MatrixTable!")
return t

Expand All @@ -632,6 +634,9 @@ def filter_vep_transcript_csqs(
criteria.append(lambda csq: csq.most_severe_consequence == "synonymous_variant")
if canonical:
criteria.append(lambda csq: csq.canonical == 1)
if mane_select:
criteria.append(lambda csq: hl.is_defined(csq.mane_select))

transcript_csqs = transcript_csqs.filter(lambda x: combine_functions(criteria, x))
is_mt = isinstance(t, hl.MatrixTable)
vep_data = {vep_root: t[vep_root].annotate(transcript_consequences=transcript_csqs)}
Expand Down