From c946a0b7344edbe4f5b92334b8c059350bd268d7 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Fri, 18 Aug 2023 11:14:00 -0400 Subject: [PATCH 01/13] function to update sample annotations --- gnomad/utils/annotations.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/gnomad/utils/annotations.py b/gnomad/utils/annotations.py index bdeaa2558..72ceca263 100644 --- a/gnomad/utils/annotations.py +++ b/gnomad/utils/annotations.py @@ -6,6 +6,7 @@ import hail as hl +from gnomad.utils.filtering import add_filters_expr from gnomad.utils.gen_stats import to_phred logging.basicConfig( @@ -1682,3 +1683,31 @@ def _agg_by_group( ht = ht.select(**ann_expr) return ht + + +def update_sample_annotations(expr: hl.expr, sample_annotations: Dict[str, hl.expr]): + """ + Update highly structured annotations such as gnomAD sample meta. + + .. note:: + This function allows first to check if the sample annotations are different from the input, then it updates the annotations recursively. It will also add a `sample_annotations_updated` flag to the input, indicating which annotations have been updated for each sample. + :param expr: highly structured Hail expr, could be a Table or MatrixTable. + :param sample_annotations: Dictionary of sample annotations to update. + """ + if isinstance(sample_annotations, dict): + updated = {} + updated_flag = {} + for ann, updated_expr in sample_annotations.items(): + updated_flag_dict, updated_ann = update_sample_annotations( + expr[ann], updated_expr + ) + updated_flag.update( + {ann + ("." + k if k else ""): v for k, v in updated_flag_dict.items()} + ) + updated[ann] = updated_ann + if isinstance(expr, hl.Table): + updated_flag = add_filters_expr(filters=updated_flag) + return expr.annotate(**updated, sample_annotations_updated=updated_flag) + return updated_flag, expr.annotate(**updated) + else: + return {"": sample_annotations != expr}, sample_annotations From be4360600a45158b6be374f073a17717e9d79765 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Fri, 18 Aug 2023 11:27:29 -0400 Subject: [PATCH 02/13] change return type of add_filters_expr --- gnomad/utils/filtering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/filtering.py b/gnomad/utils/filtering.py index 3d5f91824..9e5b04bc8 100644 --- a/gnomad/utils/filtering.py +++ b/gnomad/utils/filtering.py @@ -217,7 +217,7 @@ def filter_to_autosomes( def add_filters_expr( filters: Dict[str, hl.expr.BooleanExpression], current_filters: hl.expr.SetExpression = None, -) -> hl.expr.SetExpression: +) -> hl.expr: """ Create an expression to create or add filters. From edad95d270ba86b084d93711b5e6939ab746fd18 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Fri, 18 Aug 2023 12:19:24 -0400 Subject: [PATCH 03/13] re-add function --- gnomad/utils/annotations.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/gnomad/utils/annotations.py b/gnomad/utils/annotations.py index e2fd30e0b..ff6f2ea01 100644 --- a/gnomad/utils/annotations.py +++ b/gnomad/utils/annotations.py @@ -7,6 +7,7 @@ import hail as hl from gnomad.utils.gen_stats import to_phred +from gnomad.utils.filtering import add_filters_expr logging.basicConfig( format="%(asctime)s (%(name)s %(lineno)s): %(message)s", @@ -1795,3 +1796,31 @@ def _agg_by_group( ) return ht.drop("cols") + + +def update_sample_annotations(expr: hl.expr, sample_annotations: Dict[str, hl.expr]): + """ + Update highly structured annotations such as gnomAD sample meta. + + .. note:: + This function allows first to check if the sample annotations are different from the input, then it updates the annotations recursively. It will also add a `sample_annotations_updated` flag to the input, indicating which annotations have been updated for each sample. + :param expr: highly structured Hail expr, could be a Table or MatrixTable. + :param sample_annotations: Dictionary of sample annotations to update. + """ + if isinstance(sample_annotations, dict): + updated = {} + updated_flag = {} + for ann, updated_expr in sample_annotations.items(): + updated_flag_dict, updated_ann = update_sample_annotations( + expr[ann], updated_expr + ) + updated_flag.update( + {ann + ("." + k if k else ""): v for k, v in updated_flag_dict.items()} + ) + updated[ann] = updated_ann + if isinstance(expr, hl.Table): + updated_flag = add_filters_expr(filters=updated_flag) + return expr.annotate(**updated, sample_annotations_updated=updated_flag) + return updated_flag, expr.annotate(**updated) + else: + return {"": sample_annotations != expr}, sample_annotations From a4dc91c6d421d05f00c29424012addad54a47686 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Fri, 18 Aug 2023 12:27:28 -0400 Subject: [PATCH 04/13] test pre-commit --- gnomad/utils/annotations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/annotations.py b/gnomad/utils/annotations.py index ff6f2ea01..a101eecf6 100644 --- a/gnomad/utils/annotations.py +++ b/gnomad/utils/annotations.py @@ -6,8 +6,8 @@ import hail as hl -from gnomad.utils.gen_stats import to_phred from gnomad.utils.filtering import add_filters_expr +from gnomad.utils.gen_stats import to_phred logging.basicConfig( format="%(asctime)s (%(name)s %(lineno)s): %(message)s", From af73f09bc4406d690b8b81e1a33d7c2ccc460513 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Fri, 18 Aug 2023 13:52:50 -0400 Subject: [PATCH 05/13] reverse changing type to pass checks --- gnomad/utils/filtering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/filtering.py b/gnomad/utils/filtering.py index 128cff143..fba17eaa1 100644 --- a/gnomad/utils/filtering.py +++ b/gnomad/utils/filtering.py @@ -217,7 +217,7 @@ def filter_to_autosomes( def add_filters_expr( filters: Dict[str, hl.expr.BooleanExpression], current_filters: hl.expr.SetExpression = None, -) -> hl.expr: +) -> hl.expr.SetExpression: """ Create an expression to create or add filters. From 6dcc781a0e360e5c9641549ff413e4f8e2aedde3 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Fri, 18 Aug 2023 14:34:49 -0400 Subject: [PATCH 06/13] move function to release.py --- gnomad/utils/annotations.py | 29 ----------------------------- gnomad/utils/release.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/gnomad/utils/annotations.py b/gnomad/utils/annotations.py index a101eecf6..e2fd30e0b 100644 --- a/gnomad/utils/annotations.py +++ b/gnomad/utils/annotations.py @@ -6,7 +6,6 @@ import hail as hl -from gnomad.utils.filtering import add_filters_expr from gnomad.utils.gen_stats import to_phred logging.basicConfig( @@ -1796,31 +1795,3 @@ def _agg_by_group( ) return ht.drop("cols") - - -def update_sample_annotations(expr: hl.expr, sample_annotations: Dict[str, hl.expr]): - """ - Update highly structured annotations such as gnomAD sample meta. - - .. note:: - This function allows first to check if the sample annotations are different from the input, then it updates the annotations recursively. It will also add a `sample_annotations_updated` flag to the input, indicating which annotations have been updated for each sample. - :param expr: highly structured Hail expr, could be a Table or MatrixTable. - :param sample_annotations: Dictionary of sample annotations to update. - """ - if isinstance(sample_annotations, dict): - updated = {} - updated_flag = {} - for ann, updated_expr in sample_annotations.items(): - updated_flag_dict, updated_ann = update_sample_annotations( - expr[ann], updated_expr - ) - updated_flag.update( - {ann + ("." + k if k else ""): v for k, v in updated_flag_dict.items()} - ) - updated[ann] = updated_ann - if isinstance(expr, hl.Table): - updated_flag = add_filters_expr(filters=updated_flag) - return expr.annotate(**updated, sample_annotations_updated=updated_flag) - return updated_flag, expr.annotate(**updated) - else: - return {"": sample_annotations != expr}, sample_annotations diff --git a/gnomad/utils/release.py b/gnomad/utils/release.py index 61382ab2b..622c8b3de 100644 --- a/gnomad/utils/release.py +++ b/gnomad/utils/release.py @@ -12,6 +12,7 @@ SEXES, SUBSETS, ) +from gnomad.utils.filtering import add_filters_expr from gnomad.utils.vcf import SORT_ORDER, index_globals logging.basicConfig( @@ -151,3 +152,31 @@ def make_freq_index_dict_from_meta( ] = i return index_dict + + +def update_sample_annotations(expr: hl.expr, sample_annotations: Dict[str, hl.expr]): + """ + Update highly structured annotations such as gnomAD sample meta. + + .. note:: + This function allows first to check if the sample annotations are different from the input, then it updates the annotations recursively. It will also add a `sample_annotations_updated` flag to the input, indicating which annotations have been updated for each sample. + :param expr: highly structured Hail expr, could be a Table or MatrixTable. + :param sample_annotations: Dictionary of sample annotations to update. + """ + if isinstance(sample_annotations, dict): + updated = {} + updated_flag = {} + for ann, updated_expr in sample_annotations.items(): + updated_flag_dict, updated_ann = update_sample_annotations( + expr[ann], updated_expr + ) + updated_flag.update( + {ann + ("." + k if k else ""): v for k, v in updated_flag_dict.items()} + ) + updated[ann] = updated_ann + if isinstance(expr, hl.Table): + updated_flag = add_filters_expr(filters=updated_flag) + return expr.annotate(**updated, sample_annotations_updated=updated_flag) + return updated_flag, expr.annotate(**updated) + else: + return {"": sample_annotations != expr}, sample_annotations From 89de5e2ab2c22155eda08934fc3ca2572bcb1fd9 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Fri, 18 Aug 2023 14:36:24 -0400 Subject: [PATCH 07/13] change return type in add_filters_expr --- gnomad/utils/filtering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/filtering.py b/gnomad/utils/filtering.py index fba17eaa1..128cff143 100644 --- a/gnomad/utils/filtering.py +++ b/gnomad/utils/filtering.py @@ -217,7 +217,7 @@ def filter_to_autosomes( def add_filters_expr( filters: Dict[str, hl.expr.BooleanExpression], current_filters: hl.expr.SetExpression = None, -) -> hl.expr.SetExpression: +) -> hl.expr: """ Create an expression to create or add filters. From 3a448cd113945d96caedccff8ad1479e02f9e8cd Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Fri, 18 Aug 2023 14:41:30 -0400 Subject: [PATCH 08/13] add blank line --- gnomad/utils/release.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gnomad/utils/release.py b/gnomad/utils/release.py index 622c8b3de..cf706bfd8 100644 --- a/gnomad/utils/release.py +++ b/gnomad/utils/release.py @@ -156,12 +156,12 @@ def make_freq_index_dict_from_meta( def update_sample_annotations(expr: hl.expr, sample_annotations: Dict[str, hl.expr]): """ - Update highly structured annotations such as gnomAD sample meta. + Update highly structured annotations such as gnomAD sample meta. - .. note:: This function allows first to check if the sample annotations are different from the input, then it updates the annotations recursively. It will also add a `sample_annotations_updated` flag to the input, indicating which annotations have been updated for each sample. - :param expr: highly structured Hail expr, could be a Table or MatrixTable. - :param sample_annotations: Dictionary of sample annotations to update. + + :param expr: highly structured Hail expr, could be a Table or MatrixTable. + :param sample_annotations: Dictionary of sample annotations to update. """ if isinstance(sample_annotations, dict): updated = {} From 541ad88a409e1a28e0ba2ece9150c4941cdd4e5c Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Mon, 21 Aug 2023 15:09:22 -0400 Subject: [PATCH 09/13] address review comments --- gnomad/utils/annotations.py | 63 +++++++++++++++++++++++++++++++++++++ gnomad/utils/release.py | 28 ----------------- 2 files changed, 63 insertions(+), 28 deletions(-) diff --git a/gnomad/utils/annotations.py b/gnomad/utils/annotations.py index e2fd30e0b..24dabdd73 100644 --- a/gnomad/utils/annotations.py +++ b/gnomad/utils/annotations.py @@ -6,6 +6,7 @@ import hail as hl +import gnomad.utils.filtering as filter_utils from gnomad.utils.gen_stats import to_phred logging.basicConfig( @@ -1795,3 +1796,65 @@ def _agg_by_group( ) return ht.drop("cols") + + +def update_structured_annotations( + ht: hl.Table, + annotation_update_exprs: Dict[str, hl.Expression], + annotation_update_label: Optional[str] = None, +) -> hl.Table: + """ + Update highly structured annotations on a Table. + + This function recursively updates annotations defined by `annotation_update_exprs` + and if `annotation_update_label` is supplied, it checks if the sample annotations + are different from the input and adds a flag to the Table, indicating which + annotations have been updated for each sample. + + :param ht: Input Table with structured annotations to update. + :param annotation_update_exprs: Dictionary of annotations to update, structured as + they are structured on the input `ht`. + :param annotation_update_label: Optional string of the label to use for an + annotation indicating which annotations have been updated. Default is None, so + no annotation is added. + :return: Table with updated annotations and optionally a flag indicating which + annotations were changed. + """ + + def _update_struct( + struct_expr: hl.expr.StructExpression, + update_exprs: Union[Dict[str, hl.Expression], hl.expr.Expression], + ) -> Tuple[Dict[str, Any], hl.expr]: + """ + Update a StructExpression. + + :param struct_expr: StructExpression to update. + :param update_exprs: Dictionary of annotations to update. + :return: Tuple of the updated annotations and the updated flag. + """ + if isinstance(update_exprs, dict): + updated_struct_expr = {} + updated_flag_expr = {} + for ann, expr in update_exprs.items(): + updated_flag, updated_ann = _update_struct(struct_expr[ann], expr) + updated_flag_expr.update( + {ann + ("." + k if k else ""): v for k, v in updated_flag.items()} + ) + updated_struct_expr[ann] = updated_ann + return updated_flag_expr, struct_expr.annotate(**updated_struct_expr) + else: + return {"": update_exprs != struct_expr}, update_exprs + + annotation_update_flag, updated_rows = _update_struct( + ht.row_value, annotation_update_exprs + ) + if annotation_update_label is not None: + updated_rows = updated_rows.annotate( + **{ + annotation_update_label: filter_utils.add_filters_expr( + filters=annotation_update_flag + ) + } + ) + + return ht.annotate(**updated_rows) diff --git a/gnomad/utils/release.py b/gnomad/utils/release.py index cf706bfd8..e205ee39e 100644 --- a/gnomad/utils/release.py +++ b/gnomad/utils/release.py @@ -152,31 +152,3 @@ def make_freq_index_dict_from_meta( ] = i return index_dict - - -def update_sample_annotations(expr: hl.expr, sample_annotations: Dict[str, hl.expr]): - """ - Update highly structured annotations such as gnomAD sample meta. - - This function allows first to check if the sample annotations are different from the input, then it updates the annotations recursively. It will also add a `sample_annotations_updated` flag to the input, indicating which annotations have been updated for each sample. - - :param expr: highly structured Hail expr, could be a Table or MatrixTable. - :param sample_annotations: Dictionary of sample annotations to update. - """ - if isinstance(sample_annotations, dict): - updated = {} - updated_flag = {} - for ann, updated_expr in sample_annotations.items(): - updated_flag_dict, updated_ann = update_sample_annotations( - expr[ann], updated_expr - ) - updated_flag.update( - {ann + ("." + k if k else ""): v for k, v in updated_flag_dict.items()} - ) - updated[ann] = updated_ann - if isinstance(expr, hl.Table): - updated_flag = add_filters_expr(filters=updated_flag) - return expr.annotate(**updated, sample_annotations_updated=updated_flag) - return updated_flag, expr.annotate(**updated) - else: - return {"": sample_annotations != expr}, sample_annotations From 28fc712b586e80027a58642d6bf70a7a9d2d8e99 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Mon, 21 Aug 2023 15:10:00 -0400 Subject: [PATCH 10/13] address review comments --- gnomad/utils/release.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gnomad/utils/release.py b/gnomad/utils/release.py index e205ee39e..61382ab2b 100644 --- a/gnomad/utils/release.py +++ b/gnomad/utils/release.py @@ -12,7 +12,6 @@ SEXES, SUBSETS, ) -from gnomad.utils.filtering import add_filters_expr from gnomad.utils.vcf import SORT_ORDER, index_globals logging.basicConfig( From 7d90d586b0802ee4cd64e29162d0b4c417b5b8bf Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Mon, 21 Aug 2023 15:37:36 -0400 Subject: [PATCH 11/13] change type --- gnomad/utils/annotations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/annotations.py b/gnomad/utils/annotations.py index 24dabdd73..6a1d22ab0 100644 --- a/gnomad/utils/annotations.py +++ b/gnomad/utils/annotations.py @@ -1824,7 +1824,7 @@ def update_structured_annotations( def _update_struct( struct_expr: hl.expr.StructExpression, update_exprs: Union[Dict[str, hl.Expression], hl.expr.Expression], - ) -> Tuple[Dict[str, Any], hl.expr]: + ) -> Tuple[Dict[str, hl.expr.BooleanExpression], Any]: """ Update a StructExpression. From 2bc0a2296d470c8492122b6ed1d21d646d8fa18f Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Mon, 21 Aug 2023 15:43:07 -0400 Subject: [PATCH 12/13] change type 2 --- gnomad/utils/annotations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/annotations.py b/gnomad/utils/annotations.py index 6a1d22ab0..e71ed73a6 100644 --- a/gnomad/utils/annotations.py +++ b/gnomad/utils/annotations.py @@ -1823,7 +1823,7 @@ def update_structured_annotations( def _update_struct( struct_expr: hl.expr.StructExpression, - update_exprs: Union[Dict[str, hl.Expression], hl.expr.Expression], + update_exprs: Union[Dict[str, hl.expr.Expression], hl.expr.Expression], ) -> Tuple[Dict[str, hl.expr.BooleanExpression], Any]: """ Update a StructExpression. From aaef59b3300f7f2e13835c0627d3fa21156a9133 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Mon, 21 Aug 2023 16:33:35 -0400 Subject: [PATCH 13/13] reserve to SetExpression for add_filters_expr --- gnomad/utils/filtering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/filtering.py b/gnomad/utils/filtering.py index 128cff143..fba17eaa1 100644 --- a/gnomad/utils/filtering.py +++ b/gnomad/utils/filtering.py @@ -217,7 +217,7 @@ def filter_to_autosomes( def add_filters_expr( filters: Dict[str, hl.expr.BooleanExpression], current_filters: hl.expr.SetExpression = None, -) -> hl.expr: +) -> hl.expr.SetExpression: """ Create an expression to create or add filters.