From c60f8f4ea9335351c3657c364adc6e129e400e46 Mon Sep 17 00:00:00 2001 From: klaricch Date: Wed, 20 Sep 2023 16:12:03 -0400 Subject: [PATCH 1/6] add mane option to filter_vep_transcript_csqs --- gnomad/utils/vep.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/gnomad/utils/vep.py b/gnomad/utils/vep.py index 8b006f18e..17cd82932 100644 --- a/gnomad/utils/vep.py +++ b/gnomad/utils/vep.py @@ -604,6 +604,7 @@ def filter_vep_transcript_csqs( vep_root: str = "vep", synonymous: bool = True, canonical: bool = True, + mane: bool = False, filter_empty_csq: bool = True, ) -> Union[hl.Table, hl.MatrixTable]: """ @@ -619,19 +620,26 @@ def filter_vep_transcript_csqs( :param vep_root: Name used for VEP annotation. Default is 'vep'. :param synonymous: Whether to filter to variants where the most severe consequence is 'synonymous_variant'. Default is True. :param canonical: Whether to filter to only canonical transcripts. Default is True. + :param mane: Whether to filter to only MANE select transcripts. Default is False. :param filter_empty_csq: Whether to filter out rows where 'transcript_consequences' is empty, after filtering 'transcript_consequences' to the specified criteria. Default is True. :return: Table or MatrixTable filtered to specified criteria. """ - if not synonymous and not canonical and not filter_empty_csq: + if not synonymous and not (canonical or mane) and not filter_empty_csq: logger.warning("No changes have been made to input Table/MatrixTable!") return t + if canonical and mane: + raise ValueError("Cannot set both 'canonical' and 'mane'!") + transcript_csqs = t[vep_root].transcript_consequences criteria = [lambda csq: True] if synonymous: criteria.append(lambda csq: csq.most_severe_consequence == "synonymous_variant") if canonical: criteria.append(lambda csq: csq.canonical == 1) + if mane: + criteria.append(lambda csq: hl.is_defined(csq.mane_select)) + transcript_csqs = transcript_csqs.filter(lambda x: combine_functions(criteria, x)) is_mt = isinstance(t, hl.MatrixTable) vep_data = {vep_root: t[vep_root].annotate(transcript_consequences=transcript_csqs)} From dbd09ed5603df12240f77de5c77f2403d9da6ba2 Mon Sep 17 00:00:00 2001 From: klaricch Date: Fri, 22 Sep 2023 11:30:48 -0400 Subject: [PATCH 2/6] add mane option to transcript groupings --- gnomad/utils/constraint.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/gnomad/utils/constraint.py b/gnomad/utils/constraint.py index 2332c86dc..c79e56852 100644 --- a/gnomad/utils/constraint.py +++ b/gnomad/utils/constraint.py @@ -756,7 +756,7 @@ def get_constraint_grouping_expr( vep_annotation_expr: hl.StructExpression, coverage_expr: Optional[hl.Int32Expression] = None, include_transcript_group: bool = True, - include_canonical_group: bool = True, + preferred_transcript_group: str = "canonical", ) -> Dict[str, Union[hl.StringExpression, hl.Int32Expression, hl.BooleanExpression]]: """ Collect annotations used for constraint groupings. @@ -788,8 +788,8 @@ def get_constraint_grouping_expr( :param coverage_expr: Optional Int32Expression of exome coverage. Default is None. :param include_transcript_group: Whether to include the transcript annotation in the groupings. Default is True. - :param include_canonical_group: Whether to include canonical annotation in the - groupings. Default is True. + :param preferred_transcript_group: Preffered transcript grouping to use if also grouping by preferred transcript. Choices: ["mane", "canonical", None]. Default is "canonical". + :return: A dictionary with keys as annotation names and values as actual annotations. """ @@ -808,14 +808,20 @@ def get_constraint_grouping_expr( # Add 'transcript' and 'canonical' annotation if requested. if include_transcript_group: groupings["transcript"] = vep_annotation_expr.transcript_id - if include_canonical_group: + if preferred_transcript_group == "canonical": groupings["canonical"] = hl.or_else(vep_annotation_expr.canonical == 1, False) + elif preferred_transcript_group == "mane": + groupings["mane"] = hl.or_else( + hl.is_defined(vep_annotation_expr.mane_select), False + ) return groupings def annotate_exploded_vep_for_constraint_groupings( - ht: hl.Table, vep_annotation: str = "transcript_consequences" + ht: hl.Table, + vep_annotation: str = "transcript_consequences", + preferred_transcript_group: str = "canonical", ) -> Tuple[Union[hl.Table, hl.MatrixTable], Tuple[str]]: """ Annotate Table with annotations used for constraint groupings. @@ -842,13 +848,17 @@ def annotate_exploded_vep_for_constraint_groupings( :param vep_annotation: Name of annotation in 'vep' annotation (one of "transcript_consequences" and "worst_csq_by_gene") that will be used for obtaining constraint annotations. Default is "transcript_consequences". + :param preferred_transcript_group: Preffered transcript grouping to use if also grouping by preferred transcript. Only applied if `include_transcript_group` is True. Choices: ["mane", "canonical", None]. Default is "canonical". + :return: A tuple of input Table or MatrixTable with grouping annotations added and the names of added annotations. """ if vep_annotation == "transcript_consequences": - include_transcript_group = include_canonical_group = True + include_transcript_group = True + preferred_transcript_group = preferred_transcript_group else: - include_transcript_group = include_canonical_group = False + include_transcript_group = False + preferred_transcript_group = None # Annotate 'worst_csq_by_gene' to `ht` if it's specified for `vep_annotation`. if vep_annotation == "worst_csq_by_gene": @@ -862,7 +872,7 @@ def annotate_exploded_vep_for_constraint_groupings( ht[vep_annotation], coverage_expr=ht.exome_coverage, include_transcript_group=include_transcript_group, - include_canonical_group=include_canonical_group, + preferred_transcript_group=preferred_transcript_group, ) return ht.annotate(**groupings), tuple(groupings.keys()) From 65f733a838dc8cd05ab3d8a77e4c4e1742174ecf Mon Sep 17 00:00:00 2001 From: klaricch Date: Fri, 22 Sep 2023 13:45:46 -0400 Subject: [PATCH 3/6] change param to include_mane_select_group --- gnomad/utils/constraint.py | 38 ++++++++++++++++++++++++++++---------- gnomad/utils/vep.py | 10 +++++----- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/gnomad/utils/constraint.py b/gnomad/utils/constraint.py index c79e56852..8d94c31be 100644 --- a/gnomad/utils/constraint.py +++ b/gnomad/utils/constraint.py @@ -756,7 +756,8 @@ def get_constraint_grouping_expr( vep_annotation_expr: hl.StructExpression, coverage_expr: Optional[hl.Int32Expression] = None, include_transcript_group: bool = True, - preferred_transcript_group: str = "canonical", + include_canonical_group: bool = True, + include_mane_select_group: bool = False, ) -> Dict[str, Union[hl.StringExpression, hl.Int32Expression, hl.BooleanExpression]]: """ Collect annotations used for constraint groupings. @@ -788,7 +789,10 @@ def get_constraint_grouping_expr( :param coverage_expr: Optional Int32Expression of exome coverage. Default is None. :param include_transcript_group: Whether to include the transcript annotation in the groupings. Default is True. - :param preferred_transcript_group: Preffered transcript grouping to use if also grouping by preferred transcript. Choices: ["mane", "canonical", None]. Default is "canonical". + :param include_canonical_group: Whether to include canonical annotation in the + groupings. Default is True. + :param include_mane_select_group: Whether to include mane_select annotation in the + groupings. Default is False. :return: A dictionary with keys as annotation names and values as actual annotations. @@ -808,9 +812,9 @@ def get_constraint_grouping_expr( # Add 'transcript' and 'canonical' annotation if requested. if include_transcript_group: groupings["transcript"] = vep_annotation_expr.transcript_id - if preferred_transcript_group == "canonical": + if include_canonical_group: groupings["canonical"] = hl.or_else(vep_annotation_expr.canonical == 1, False) - elif preferred_transcript_group == "mane": + elif include_mane_select_group: groupings["mane"] = hl.or_else( hl.is_defined(vep_annotation_expr.mane_select), False ) @@ -821,7 +825,8 @@ def get_constraint_grouping_expr( def annotate_exploded_vep_for_constraint_groupings( ht: hl.Table, vep_annotation: str = "transcript_consequences", - preferred_transcript_group: str = "canonical", + include_canonical_group: bool = True, + include_mane_select_group: bool = False, ) -> Tuple[Union[hl.Table, hl.MatrixTable], Tuple[str]]: """ Annotate Table with annotations used for constraint groupings. @@ -848,17 +853,29 @@ def annotate_exploded_vep_for_constraint_groupings( :param vep_annotation: Name of annotation in 'vep' annotation (one of "transcript_consequences" and "worst_csq_by_gene") that will be used for obtaining constraint annotations. Default is "transcript_consequences". - :param preferred_transcript_group: Preffered transcript grouping to use if also grouping by preferred transcript. Only applied if `include_transcript_group` is True. Choices: ["mane", "canonical", None]. Default is "canonical". - + :param include_canonical_group: Whether to include canonical annotation in the + groupings. Default is True. Ignored unless 'vep_annotation' is "transcript_consequences". + :param include_mane_select_group: Whether to include mane_select annotation in the + groupings. Default is False. Ignored unless 'vep_annotation' is "transcript_consequences". :return: A tuple of input Table or MatrixTable with grouping annotations added and the names of added annotations. """ if vep_annotation == "transcript_consequences": + if not include_canonical_group and not include_mane_select_group: + raise ValueError( + "If 'vep_annotation' is 'transcript_consequences', one of either" + " 'include_canonical_group' or 'include_mane_select_group' must be set!" + ) include_transcript_group = True - preferred_transcript_group = preferred_transcript_group else: + logger.warning( + "Setting both 'include_canonical_group' and 'include_mane_select_group' to" + " False (options cannot be used unless 'vep_annotation' is" + " 'transcript_consequences')." + ) include_transcript_group = False - preferred_transcript_group = None + include_canonical_group = False + include_mane_select_group = False # Annotate 'worst_csq_by_gene' to `ht` if it's specified for `vep_annotation`. if vep_annotation == "worst_csq_by_gene": @@ -872,7 +889,8 @@ def annotate_exploded_vep_for_constraint_groupings( ht[vep_annotation], coverage_expr=ht.exome_coverage, include_transcript_group=include_transcript_group, - preferred_transcript_group=preferred_transcript_group, + include_canonical_group=include_canonical_group, + include_mane_select_group=include_mane_select_group, ) return ht.annotate(**groupings), tuple(groupings.keys()) diff --git a/gnomad/utils/vep.py b/gnomad/utils/vep.py index 17cd82932..5fa4f3956 100644 --- a/gnomad/utils/vep.py +++ b/gnomad/utils/vep.py @@ -604,7 +604,7 @@ def filter_vep_transcript_csqs( vep_root: str = "vep", synonymous: bool = True, canonical: bool = True, - mane: bool = False, + mane_select: bool = False, filter_empty_csq: bool = True, ) -> Union[hl.Table, hl.MatrixTable]: """ @@ -620,15 +620,15 @@ def filter_vep_transcript_csqs( :param vep_root: Name used for VEP annotation. Default is 'vep'. :param synonymous: Whether to filter to variants where the most severe consequence is 'synonymous_variant'. Default is True. :param canonical: Whether to filter to only canonical transcripts. Default is True. - :param mane: Whether to filter to only MANE select transcripts. Default is False. + :param mane_select: Whether to filter to only MANE Select transcripts. Default is False. :param filter_empty_csq: Whether to filter out rows where 'transcript_consequences' is empty, after filtering 'transcript_consequences' to the specified criteria. Default is True. :return: Table or MatrixTable filtered to specified criteria. """ - if not synonymous and not (canonical or mane) and not filter_empty_csq: + if not synonymous and not (canonical or mane_select) and not filter_empty_csq: logger.warning("No changes have been made to input Table/MatrixTable!") return t - if canonical and mane: + if canonical and mane_select: raise ValueError("Cannot set both 'canonical' and 'mane'!") transcript_csqs = t[vep_root].transcript_consequences @@ -637,7 +637,7 @@ def filter_vep_transcript_csqs( criteria.append(lambda csq: csq.most_severe_consequence == "synonymous_variant") if canonical: criteria.append(lambda csq: csq.canonical == 1) - if mane: + if mane_select: criteria.append(lambda csq: hl.is_defined(csq.mane_select)) transcript_csqs = transcript_csqs.filter(lambda x: combine_functions(criteria, x)) From 901dff730623d28173f5b7f22a990abdf1bf9898 Mon Sep 17 00:00:00 2001 From: klaricch Date: Fri, 22 Sep 2023 13:48:02 -0400 Subject: [PATCH 4/6] small edit --- gnomad/utils/constraint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/constraint.py b/gnomad/utils/constraint.py index 8d94c31be..cef0b4341 100644 --- a/gnomad/utils/constraint.py +++ b/gnomad/utils/constraint.py @@ -815,7 +815,7 @@ def get_constraint_grouping_expr( if include_canonical_group: groupings["canonical"] = hl.or_else(vep_annotation_expr.canonical == 1, False) elif include_mane_select_group: - groupings["mane"] = hl.or_else( + groupings["mane_select"] = hl.or_else( hl.is_defined(vep_annotation_expr.mane_select), False ) From 7d9e7cacee6f06a3b7086d47b786a830e7cc4ff3 Mon Sep 17 00:00:00 2001 From: klaricch Date: Fri, 22 Sep 2023 14:33:26 -0400 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: jkgoodrich <33063077+jkgoodrich@users.noreply.github.com> --- gnomad/utils/constraint.py | 10 +++++----- gnomad/utils/vep.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gnomad/utils/constraint.py b/gnomad/utils/constraint.py index cef0b4341..e2434077e 100644 --- a/gnomad/utils/constraint.py +++ b/gnomad/utils/constraint.py @@ -814,7 +814,7 @@ def get_constraint_grouping_expr( groupings["transcript"] = vep_annotation_expr.transcript_id if include_canonical_group: groupings["canonical"] = hl.or_else(vep_annotation_expr.canonical == 1, False) - elif include_mane_select_group: + if include_mane_select_group: groupings["mane_select"] = hl.or_else( hl.is_defined(vep_annotation_expr.mane_select), False ) @@ -853,10 +853,10 @@ def annotate_exploded_vep_for_constraint_groupings( :param vep_annotation: Name of annotation in 'vep' annotation (one of "transcript_consequences" and "worst_csq_by_gene") that will be used for obtaining constraint annotations. Default is "transcript_consequences". - :param include_canonical_group: Whether to include canonical annotation in the - groupings. Default is True. Ignored unless 'vep_annotation' is "transcript_consequences". - :param include_mane_select_group: Whether to include mane_select annotation in the - groupings. Default is False. Ignored unless 'vep_annotation' is "transcript_consequences". + :param include_canonical_group: Whether to include 'canonical' annotation in the + groupings. Default is True. Ignored unless `vep_annotation` is "transcript_consequences". + :param include_mane_select_group: Whether to include 'mane_select' annotation in the + groupings. Default is False. Ignored unless `vep_annotation` is "transcript_consequences". :return: A tuple of input Table or MatrixTable with grouping annotations added and the names of added annotations. """ diff --git a/gnomad/utils/vep.py b/gnomad/utils/vep.py index 5fa4f3956..8d1bcc306 100644 --- a/gnomad/utils/vep.py +++ b/gnomad/utils/vep.py @@ -629,7 +629,7 @@ def filter_vep_transcript_csqs( return t if canonical and mane_select: - raise ValueError("Cannot set both 'canonical' and 'mane'!") + raise ValueError("Cannot set both 'canonical' and 'mane_select'!") transcript_csqs = t[vep_root].transcript_consequences criteria = [lambda csq: True] From 67275952bb2a5dafd5755feb7a481112c8812153 Mon Sep 17 00:00:00 2001 From: klaricch Date: Fri, 22 Sep 2023 14:36:00 -0400 Subject: [PATCH 6/6] small edit --- gnomad/utils/vep.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/gnomad/utils/vep.py b/gnomad/utils/vep.py index 8d1bcc306..1d114d659 100644 --- a/gnomad/utils/vep.py +++ b/gnomad/utils/vep.py @@ -628,9 +628,6 @@ def filter_vep_transcript_csqs( logger.warning("No changes have been made to input Table/MatrixTable!") return t - if canonical and mane_select: - raise ValueError("Cannot set both 'canonical' and 'mane_select'!") - transcript_csqs = t[vep_root].transcript_consequences criteria = [lambda csq: True] if synonymous: