From 67d65a1fe08a05262abaed3498a4da05574dcb71 Mon Sep 17 00:00:00 2001 From: Ruchit10 Date: Thu, 3 Jul 2025 15:08:15 -0400 Subject: [PATCH 01/27] Add function for exploding intervals to loci --- gnomad/utils/intervals.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index d927b9bbe..236fff774 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -112,3 +112,27 @@ def _add_padding( return [_add_padding(i) for i in intervals] else: return _add_padding(intervals) + + +def explode_intervals_to_loci( + ht: hl.Table, + keep_intervals: bool = False, +) -> hl.Table: + """ + Expand intervals to loci. + + :param ht: Hail Table with an interval field. + :param keep_intervals: If True, keep the original interval as a column in output. + :return: Hail Table keyed by loci and intervals as optional field. + """ + ht = ht.annotate( + pos=hl.range(ht.interval.start.position, ht.interval.end.position + 1), + ).explode("pos") + ht = ht.annotate( + locus=hl.locus( + ht.interval.start.contig, + ht.pos, + reference_genome=ht.interval.start.dtype.reference_genome, + ), + ).key_by("locus") + return ht.drop("interval", "pos") if not keep_intervals else ht.drop("pos") From 9b2cae2781ba0f69bb820b5ed589d4498bc0a994 Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Thu, 3 Jul 2025 17:09:00 -0400 Subject: [PATCH 02/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 236fff774..fbfeb0f01 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -123,6 +123,7 @@ def explode_intervals_to_loci( :param ht: Hail Table with an interval field. :param keep_intervals: If True, keep the original interval as a column in output. + Default is False. :return: Hail Table keyed by loci and intervals as optional field. """ ht = ht.annotate( From 31eeac1cbc302f8d6e336a9aa40c57f14fa17bfb Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Thu, 3 Jul 2025 17:09:39 -0400 Subject: [PATCH 03/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index fbfeb0f01..6a8673924 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -133,7 +133,7 @@ def explode_intervals_to_loci( locus=hl.locus( ht.interval.start.contig, ht.pos, - reference_genome=ht.interval.start.dtype.reference_genome, + reference_genome=ht.interval.start.reference_genome, ), ).key_by("locus") return ht.drop("interval", "pos") if not keep_intervals else ht.drop("pos") From b4214e53a503242af4c4defe6274fa363d638245 Mon Sep 17 00:00:00 2001 From: Ruchit10 Date: Thu, 10 Jul 2025 15:24:10 -0400 Subject: [PATCH 04/27] Add flexibility to accept MT/HT and adjust to interval including start and end --- gnomad/utils/intervals.py | 47 +++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 6a8673924..061edfa18 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -115,25 +115,48 @@ def _add_padding( def explode_intervals_to_loci( - ht: hl.Table, + obj: Union[hl.Table, hl.MatrixTable], + interval_field: str = "interval", keep_intervals: bool = False, -) -> hl.Table: +) -> Union[hl.Table, hl.MatrixTable]: """ Expand intervals to loci. - :param ht: Hail Table with an interval field. + :param obj: Hail Table or MatrixTable with an interval field. + :param interval_field: Name of the interval field. Default is 'interval'. :param keep_intervals: If True, keep the original interval as a column in output. - Default is False. - :return: Hail Table keyed by loci and intervals as optional field. + :return: Hail Table or MatrixTable with interval exploded to loci. """ - ht = ht.annotate( - pos=hl.range(ht.interval.start.position, ht.interval.end.position + 1), - ).explode("pos") + is_matrix = isinstance(obj, hl.MatrixTable) + ht = obj.rows() if is_matrix else obj + + interval = ht[interval_field] + includes_start = interval.includes_start.take(1)[0] + includes_end = interval.includes_end.take(1)[0] + + interval_start = interval.start.position if includes_start else interval.start.position + 1 + interval_end = interval.end.position + 1 if includes_end else interval.end.position + + ht = ht.annotate(pos=hl.range(interval_start, interval_end)).explode("pos") ht = ht.annotate( locus=hl.locus( - ht.interval.start.contig, + ht[interval_field].start.contig, ht.pos, - reference_genome=ht.interval.start.reference_genome, - ), + reference_genome=str(interval.start.take(1)[0].reference_genome) + ) ).key_by("locus") - return ht.drop("interval", "pos") if not keep_intervals else ht.drop("pos") + + fields_to_drop = ["pos"] + if not keep_intervals: + fields_to_drop.append(interval_field) + + ht = ht.drop(*fields_to_drop) + + if is_matrix: + mt = obj + ht = ht.select_globals() + mt = mt.annotate_rows(**ht[mt.row_key]) + mt = mt.filter_rows(hl.is_defined(ht[mt.row_key])) + return mt + else: + return ht From e870d3b619a59c90c8d8037c0cd8bedb909a6e0d Mon Sep 17 00:00:00 2001 From: Ruchit10 Date: Thu, 10 Jul 2025 15:25:29 -0400 Subject: [PATCH 05/27] Black reformatting --- gnomad/utils/intervals.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 061edfa18..4dd4588e3 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -134,7 +134,9 @@ def explode_intervals_to_loci( includes_start = interval.includes_start.take(1)[0] includes_end = interval.includes_end.take(1)[0] - interval_start = interval.start.position if includes_start else interval.start.position + 1 + interval_start = ( + interval.start.position if includes_start else interval.start.position + 1 + ) interval_end = interval.end.position + 1 if includes_end else interval.end.position ht = ht.annotate(pos=hl.range(interval_start, interval_end)).explode("pos") @@ -142,7 +144,7 @@ def explode_intervals_to_loci( locus=hl.locus( ht[interval_field].start.contig, ht.pos, - reference_genome=str(interval.start.take(1)[0].reference_genome) + reference_genome=str(interval.start.take(1)[0].reference_genome), ) ).key_by("locus") From f8d53cb324d511a88e390f42cc10066e22d238c2 Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Fri, 11 Jul 2025 17:14:26 -0400 Subject: [PATCH 06/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 4dd4588e3..621653816 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -115,7 +115,7 @@ def _add_padding( def explode_intervals_to_loci( - obj: Union[hl.Table, hl.MatrixTable], + ht: hl.Table, interval_field: str = "interval", keep_intervals: bool = False, ) -> Union[hl.Table, hl.MatrixTable]: From 3e96a219b9312788a72fbeb0f3096b9f10bf1a3f Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Fri, 11 Jul 2025 17:15:10 -0400 Subject: [PATCH 07/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 621653816..dca794aa9 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -122,7 +122,7 @@ def explode_intervals_to_loci( """ Expand intervals to loci. - :param obj: Hail Table or MatrixTable with an interval field. + :param obj: Hail Table with intervals to be exploded. :param interval_field: Name of the interval field. Default is 'interval'. :param keep_intervals: If True, keep the original interval as a column in output. :return: Hail Table or MatrixTable with interval exploded to loci. From 40e270245360fed4d4312eca05772145b44ba027 Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Fri, 11 Jul 2025 17:15:26 -0400 Subject: [PATCH 08/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index dca794aa9..c8e54145a 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -124,7 +124,7 @@ def explode_intervals_to_loci( :param obj: Hail Table with intervals to be exploded. :param interval_field: Name of the interval field. Default is 'interval'. - :param keep_intervals: If True, keep the original interval as a column in output. + :param keep_intervals: If True, keep the original intervals as a column in output. :return: Hail Table or MatrixTable with interval exploded to loci. """ is_matrix = isinstance(obj, hl.MatrixTable) From 53b50d7256d0ca4de1fcb37697e82bfbfc15a3c7 Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Fri, 11 Jul 2025 17:15:36 -0400 Subject: [PATCH 09/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index c8e54145a..4968a4995 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -125,7 +125,7 @@ def explode_intervals_to_loci( :param obj: Hail Table with intervals to be exploded. :param interval_field: Name of the interval field. Default is 'interval'. :param keep_intervals: If True, keep the original intervals as a column in output. - :return: Hail Table or MatrixTable with interval exploded to loci. + :return: Hail Table with intervals exploded to loci. """ is_matrix = isinstance(obj, hl.MatrixTable) ht = obj.rows() if is_matrix else obj From 3d6774d86f8b70963d967bbfbbfb4493cb2307dd Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Fri, 11 Jul 2025 17:15:49 -0400 Subject: [PATCH 10/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 4968a4995..dafa49ef7 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -130,7 +130,7 @@ def explode_intervals_to_loci( is_matrix = isinstance(obj, hl.MatrixTable) ht = obj.rows() if is_matrix else obj - interval = ht[interval_field] + interval_expr = ht[interval_field] includes_start = interval.includes_start.take(1)[0] includes_end = interval.includes_end.take(1)[0] From 238f5473d67e16fdae18247d239a7fba3e350523 Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Fri, 11 Jul 2025 17:16:30 -0400 Subject: [PATCH 11/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index dafa49ef7..e663f12d5 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -140,13 +140,13 @@ def explode_intervals_to_loci( interval_end = interval.end.position + 1 if includes_end else interval.end.position ht = ht.annotate(pos=hl.range(interval_start, interval_end)).explode("pos") - ht = ht.annotate( + ht = ht.key_by( locus=hl.locus( ht[interval_field].start.contig, ht.pos, - reference_genome=str(interval.start.take(1)[0].reference_genome), + reference_genome=get_reference_genome(ht[interval_field]) ) - ).key_by("locus") + ) fields_to_drop = ["pos"] if not keep_intervals: From 619468848052c521f70626a812dbde0f96ce27bc Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Fri, 11 Jul 2025 17:16:58 -0400 Subject: [PATCH 12/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index e663f12d5..46a9cecee 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -120,7 +120,7 @@ def explode_intervals_to_loci( keep_intervals: bool = False, ) -> Union[hl.Table, hl.MatrixTable]: """ - Expand intervals to loci. + Expand intervals to loci and key by loci. :param obj: Hail Table with intervals to be exploded. :param interval_field: Name of the interval field. Default is 'interval'. From fcf450e576d4d14b05cf2e19302ed94615ba6b09 Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Fri, 11 Jul 2025 17:17:26 -0400 Subject: [PATCH 13/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 46a9cecee..2ae4aebc2 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -118,7 +118,7 @@ def explode_intervals_to_loci( ht: hl.Table, interval_field: str = "interval", keep_intervals: bool = False, -) -> Union[hl.Table, hl.MatrixTable]: +) -> hl.Table, """ Expand intervals to loci and key by loci. From 77c9f2c6320d371843d5f1825d007b516a3e120a Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Fri, 11 Jul 2025 17:17:53 -0400 Subject: [PATCH 14/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 2ae4aebc2..3099c4524 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -127,9 +127,6 @@ def explode_intervals_to_loci( :param keep_intervals: If True, keep the original intervals as a column in output. :return: Hail Table with intervals exploded to loci. """ - is_matrix = isinstance(obj, hl.MatrixTable) - ht = obj.rows() if is_matrix else obj - interval_expr = ht[interval_field] includes_start = interval.includes_start.take(1)[0] includes_end = interval.includes_end.take(1)[0] From caf9dab0b3859034cf7532680d83ae2670cb8057 Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Fri, 11 Jul 2025 17:18:33 -0400 Subject: [PATCH 15/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 3099c4524..3a9951274 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -128,13 +128,8 @@ def explode_intervals_to_loci( :return: Hail Table with intervals exploded to loci. """ interval_expr = ht[interval_field] - includes_start = interval.includes_start.take(1)[0] - includes_end = interval.includes_end.take(1)[0] - - interval_start = ( - interval.start.position if includes_start else interval.start.position + 1 - ) - interval_end = interval.end.position + 1 if includes_end else interval.end.position + interval_start_expr = hl.if_else(interval_expr.includes_start, interval_expr.start.position, interval_expr.start.position + 1) + interval_end_expr = hl.if_else(interval_expr.includes_end, interval_expr.end.position + 1, interval_expr.end.position) ht = ht.annotate(pos=hl.range(interval_start, interval_end)).explode("pos") ht = ht.key_by( From c2dc321b5785400922fbc29b5b9a9613f53290e4 Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Fri, 11 Jul 2025 17:18:56 -0400 Subject: [PATCH 16/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 3a9951274..ec29a830a 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -144,13 +144,5 @@ def explode_intervals_to_loci( if not keep_intervals: fields_to_drop.append(interval_field) - ht = ht.drop(*fields_to_drop) - - if is_matrix: - mt = obj - ht = ht.select_globals() - mt = mt.annotate_rows(**ht[mt.row_key]) - mt = mt.filter_rows(hl.is_defined(ht[mt.row_key])) - return mt - else: - return ht + return ht.drop(*fields_to_drop) + From f4b28a5c6bcf0f3f2849a6678fd67d7c2185b44d Mon Sep 17 00:00:00 2001 From: Ruchit10 Date: Mon, 14 Jul 2025 13:15:46 -0400 Subject: [PATCH 17/27] Change explode_intervals_to_loci to accept interval expression or HT --- gnomad/utils/intervals.py | 71 +++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index ec29a830a..139d6046d 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -3,6 +3,7 @@ from typing import List, Union import hail as hl +from gnomad.utils.reference_genome import get_reference_genome def sort_intervals(intervals: List[hl.Interval]): @@ -115,34 +116,60 @@ def _add_padding( def explode_intervals_to_loci( - ht: hl.Table, + obj: Union[hl.Table, hl.expr.IntervalExpression], interval_field: str = "interval", keep_intervals: bool = False, -) -> hl.Table, +) -> Union[hl.Table, hl.expr.ArrayExpression]: """ - Expand intervals to loci and key by loci. + Expand intervals to loci and key by loci, or return loci range expression. - :param obj: Hail Table with intervals to be exploded. - :param interval_field: Name of the interval field. Default is 'interval'. - :param keep_intervals: If True, keep the original intervals as a column in output. - :return: Hail Table with intervals exploded to loci. + :param obj: Hail Table or Interval Expression. + :param interval_field: Name of the interval field if `obj` is a Hail Table. + :param keep_intervals: If True, keep the original intervals as a column in output, if `obj` is a Hail Table. Default is False. + :return: If input is a Hail Table, returns exploded Table keyed by locus. If input is an IntervalExpression, returns position array expression. """ - interval_expr = ht[interval_field] - interval_start_expr = hl.if_else(interval_expr.includes_start, interval_expr.start.position, interval_expr.start.position + 1) - interval_end_expr = hl.if_else(interval_expr.includes_end, interval_expr.end.position + 1, interval_expr.end.position) - - ht = ht.annotate(pos=hl.range(interval_start, interval_end)).explode("pos") - ht = ht.key_by( - locus=hl.locus( - ht[interval_field].start.contig, - ht.pos, - reference_genome=get_reference_genome(ht[interval_field]) + if isinstance(obj, hl.expr.IntervalExpression): + interval = obj + interval_start_expr = hl.if_else( + interval.includes_start, + interval.start.position, + interval.start.position + 1, + ) + interval_end_expr = hl.if_else( + interval.includes_end, interval.end.position + 1, interval.end.position + ) + return hl.range(interval_start_expr, interval_end_expr) + + elif isinstance(obj, hl.Table): + ht = obj + interval_expr = ht[interval_field] + interval_start_expr = hl.if_else( + interval_expr.includes_start, + interval_expr.start.position, + interval_expr.start.position + 1, + ) + interval_end_expr = hl.if_else( + interval_expr.includes_end, + interval_expr.end.position + 1, + interval_expr.end.position, ) - ) - fields_to_drop = ["pos"] - if not keep_intervals: - fields_to_drop.append(interval_field) + ht = ht.annotate(pos=hl.range(interval_start_expr, interval_end_expr)).explode( + "pos" + ) + ht = ht.key_by( + locus=hl.locus( + ht[interval_field].start.contig, + ht.pos, + reference_genome=get_reference_genome(ht[interval_field]), + ) + ) - return ht.drop(*fields_to_drop) + fields_to_drop = ["pos"] + if not keep_intervals: + fields_to_drop.append(interval_field) + return ht.drop(*fields_to_drop) + + else: + raise TypeError("Input must be a Hail Table or a Hail Interval Expression.") From 1162c19092d465940869d2107c4190583d04ce26 Mon Sep 17 00:00:00 2001 From: Ruchit10 Date: Mon, 14 Jul 2025 13:17:51 -0400 Subject: [PATCH 18/27] Fix pre-commit --- gnomad/utils/intervals.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 139d6046d..3eb987d2e 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -3,6 +3,7 @@ from typing import List, Union import hail as hl + from gnomad.utils.reference_genome import get_reference_genome From 1927ed6c2c326194b3b0fb861cc58a6013f0ea65 Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Tue, 15 Jul 2025 11:13:02 -0400 Subject: [PATCH 19/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 3eb987d2e..1f7db866a 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -118,8 +118,8 @@ def _add_padding( def explode_intervals_to_loci( obj: Union[hl.Table, hl.expr.IntervalExpression], - interval_field: str = "interval", - keep_intervals: bool = False, + interval_field: Optional[str] = None, + keep_intervals: Optional[bool] = None, ) -> Union[hl.Table, hl.expr.ArrayExpression]: """ Expand intervals to loci and key by loci, or return loci range expression. From 91a49486fd594806661b598fbfd3d4a59429483e Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Tue, 15 Jul 2025 11:13:35 -0400 Subject: [PATCH 20/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 1f7db866a..5218394ef 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -125,7 +125,7 @@ def explode_intervals_to_loci( Expand intervals to loci and key by loci, or return loci range expression. :param obj: Hail Table or Interval Expression. - :param interval_field: Name of the interval field if `obj` is a Hail Table. + :param interval_field: Name of the interval field. Only required if input is a Hail Table. Default is None. :param keep_intervals: If True, keep the original intervals as a column in output, if `obj` is a Hail Table. Default is False. :return: If input is a Hail Table, returns exploded Table keyed by locus. If input is an IntervalExpression, returns position array expression. """ From defa0b046c98cdd86456d7dbb36728b7a3d2ecca Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Tue, 15 Jul 2025 11:14:01 -0400 Subject: [PATCH 21/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 5218394ef..4643e36f2 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -126,7 +126,7 @@ def explode_intervals_to_loci( :param obj: Hail Table or Interval Expression. :param interval_field: Name of the interval field. Only required if input is a Hail Table. Default is None. - :param keep_intervals: If True, keep the original intervals as a column in output, if `obj` is a Hail Table. Default is False. + :param keep_intervals: If True, keep the original intervals as a column in output. Only applies if input is a Hail Table. Default is False. :return: If input is a Hail Table, returns exploded Table keyed by locus. If input is an IntervalExpression, returns position array expression. """ if isinstance(obj, hl.expr.IntervalExpression): From 078ac9133b46438d98785f37faeabf14bef4da25 Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Tue, 15 Jul 2025 11:14:32 -0400 Subject: [PATCH 22/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 4643e36f2..03abd8f88 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -117,7 +117,7 @@ def _add_padding( def explode_intervals_to_loci( - obj: Union[hl.Table, hl.expr.IntervalExpression], + intervals: Union[hl.Table, hl.expr.IntervalExpression], interval_field: Optional[str] = None, keep_intervals: Optional[bool] = None, ) -> Union[hl.Table, hl.expr.ArrayExpression]: From 020b493a87817f70cf4d913ea407a9c2e0a037b1 Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Tue, 15 Jul 2025 11:15:50 -0400 Subject: [PATCH 23/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 03abd8f88..153771073 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -129,7 +129,11 @@ def explode_intervals_to_loci( :param keep_intervals: If True, keep the original intervals as a column in output. Only applies if input is a Hail Table. Default is False. :return: If input is a Hail Table, returns exploded Table keyed by locus. If input is an IntervalExpression, returns position array expression. """ - if isinstance(obj, hl.expr.IntervalExpression): + assert isintance(intervals, hl.Table) or isinstance(intervals, hl.expr.IntervalExpression), "Input must be a Table or IntervalExpression!" + + if isinstance(intervals, hl.Table) and (not interval_field or keep_intervals is None): + raise ValueError("`interval_field` and `keep_intervals` must be defined if input is a Table!") + assert interval_field in intervals.row, "`interval_field` must be an annotation present on input Table!" interval = obj interval_start_expr = hl.if_else( interval.includes_start, From 04edebc0da331f437b24a403c70f63ad9629b0ce Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Tue, 15 Jul 2025 11:17:19 -0400 Subject: [PATCH 24/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index 153771073..dca931957 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -134,15 +134,17 @@ def explode_intervals_to_loci( if isinstance(intervals, hl.Table) and (not interval_field or keep_intervals is None): raise ValueError("`interval_field` and `keep_intervals` must be defined if input is a Table!") assert interval_field in intervals.row, "`interval_field` must be an annotation present on input Table!" - interval = obj - interval_start_expr = hl.if_else( - interval.includes_start, - interval.start.position, - interval.start.position + 1, - ) - interval_end_expr = hl.if_else( - interval.includes_end, interval.end.position + 1, interval.end.position - ) + intervals_expr = intervals if isinstance(intervals, hl.expr.IntervalExpression) else intervals[interval_field] + intervals_start_expr = hl.if_else( + intervals_expr.includes_start, + intervals_expr.start.position, + intervals_expr.start.position + 1, + ) + intervals_end_expr = hl.if_else( + intervals_expr.includes_end, + intervals_expr.end.position + 1, + intervals_expr.end.position + ) return hl.range(interval_start_expr, interval_end_expr) elif isinstance(obj, hl.Table): From ec2cb971b395c0936a7f440629ba8bec20d27b5b Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Tue, 15 Jul 2025 11:18:53 -0400 Subject: [PATCH 25/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index dca931957..fc6a1325b 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -145,7 +145,26 @@ def explode_intervals_to_loci( intervals_expr.end.position + 1, intervals_expr.end.position ) - return hl.range(interval_start_expr, interval_end_expr) + if isinstance(intervals, hl.Table): + intervals = intervals.annotate(pos=hl.range(intervals_start_expr, intervals_end_expr)).explode( + "pos" + ) + intervals = intervals.key_by( + locus=hl.locus( + intervals[interval_field].start.contig, + intervals.pos, + reference_genome=get_reference_genome(intervals[interval_field]), + ) + ) + + fields_to_drop = ["pos"] + if not keep_intervals: + fields_to_drop.append(interval_field) + + return intervals.drop(*fields_to_drop) + + logger.warning("Input is an IntervalExpression, so function will return ArrayExpression of positions within input intervals. To fully explode intervals to loci, we recommend annotating your dataset with the returned ArrayExpression, exploding the array, and converting the positions to loci!") + return hl.range(intervals_start_expr, intervals_end_expr) elif isinstance(obj, hl.Table): ht = obj From f8864f7852c429501d0c97c81e223227b18be421 Mon Sep 17 00:00:00 2001 From: Ruchit Panchal Date: Tue, 15 Jul 2025 11:19:38 -0400 Subject: [PATCH 26/27] Update gnomad/utils/intervals.py Co-authored-by: Katherine Chao --- gnomad/utils/intervals.py | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index fc6a1325b..b5c1c97fd 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -166,36 +166,3 @@ def explode_intervals_to_loci( logger.warning("Input is an IntervalExpression, so function will return ArrayExpression of positions within input intervals. To fully explode intervals to loci, we recommend annotating your dataset with the returned ArrayExpression, exploding the array, and converting the positions to loci!") return hl.range(intervals_start_expr, intervals_end_expr) - elif isinstance(obj, hl.Table): - ht = obj - interval_expr = ht[interval_field] - interval_start_expr = hl.if_else( - interval_expr.includes_start, - interval_expr.start.position, - interval_expr.start.position + 1, - ) - interval_end_expr = hl.if_else( - interval_expr.includes_end, - interval_expr.end.position + 1, - interval_expr.end.position, - ) - - ht = ht.annotate(pos=hl.range(interval_start_expr, interval_end_expr)).explode( - "pos" - ) - ht = ht.key_by( - locus=hl.locus( - ht[interval_field].start.contig, - ht.pos, - reference_genome=get_reference_genome(ht[interval_field]), - ) - ) - - fields_to_drop = ["pos"] - if not keep_intervals: - fields_to_drop.append(interval_field) - - return ht.drop(*fields_to_drop) - - else: - raise TypeError("Input must be a Hail Table or a Hail Interval Expression.") From 6216cee7467a84ce974ae0022ebce9af13865d05 Mon Sep 17 00:00:00 2001 From: Ruchit10 Date: Tue, 15 Jul 2025 11:41:48 -0400 Subject: [PATCH 27/27] update docstrings, add logging and typing imports, fix typos and indentation --- gnomad/utils/intervals.py | 74 ++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/gnomad/utils/intervals.py b/gnomad/utils/intervals.py index b5c1c97fd..ff8039742 100644 --- a/gnomad/utils/intervals.py +++ b/gnomad/utils/intervals.py @@ -1,11 +1,19 @@ # noqa: D100 -from typing import List, Union +import logging +from typing import List, Optional, Union import hail as hl from gnomad.utils.reference_genome import get_reference_genome +logging.basicConfig( + format="%(asctime)s (%(name)s %(lineno)s): %(message)s", + datefmt="%m/%d/%Y %I:%M:%S %p", +) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + def sort_intervals(intervals: List[hl.Interval]): """ @@ -119,36 +127,48 @@ def _add_padding( def explode_intervals_to_loci( intervals: Union[hl.Table, hl.expr.IntervalExpression], interval_field: Optional[str] = None, - keep_intervals: Optional[bool] = None, + keep_intervals: Optional[bool] = False, ) -> Union[hl.Table, hl.expr.ArrayExpression]: """ Expand intervals to loci and key by loci, or return loci range expression. - :param obj: Hail Table or Interval Expression. + :param intervals: Hail Table or Interval Expression. :param interval_field: Name of the interval field. Only required if input is a Hail Table. Default is None. :param keep_intervals: If True, keep the original intervals as a column in output. Only applies if input is a Hail Table. Default is False. :return: If input is a Hail Table, returns exploded Table keyed by locus. If input is an IntervalExpression, returns position array expression. """ - assert isintance(intervals, hl.Table) or isinstance(intervals, hl.expr.IntervalExpression), "Input must be a Table or IntervalExpression!" - - if isinstance(intervals, hl.Table) and (not interval_field or keep_intervals is None): - raise ValueError("`interval_field` and `keep_intervals` must be defined if input is a Table!") - assert interval_field in intervals.row, "`interval_field` must be an annotation present on input Table!" - intervals_expr = intervals if isinstance(intervals, hl.expr.IntervalExpression) else intervals[interval_field] - intervals_start_expr = hl.if_else( - intervals_expr.includes_start, - intervals_expr.start.position, - intervals_expr.start.position + 1, - ) - intervals_end_expr = hl.if_else( - intervals_expr.includes_end, - intervals_expr.end.position + 1, - intervals_expr.end.position - ) - if isinstance(intervals, hl.Table): - intervals = intervals.annotate(pos=hl.range(intervals_start_expr, intervals_end_expr)).explode( - "pos" + assert isinstance(intervals, hl.Table) or isinstance( + intervals, hl.expr.IntervalExpression + ), "Input must be a Table or IntervalExpression!" + + if isinstance(intervals, hl.Table) and ( + not interval_field or keep_intervals is None + ): + raise ValueError( + "`interval_field` and `keep_intervals` must be defined if input is a Table!" ) + assert ( + interval_field in intervals.row + ), "`interval_field` must be an annotation present on input Table!" + intervals_expr = ( + intervals + if isinstance(intervals, hl.expr.IntervalExpression) + else intervals[interval_field] + ) + intervals_start_expr = hl.if_else( + intervals_expr.includes_start, + intervals_expr.start.position, + intervals_expr.start.position + 1, + ) + intervals_end_expr = hl.if_else( + intervals_expr.includes_end, + intervals_expr.end.position + 1, + intervals_expr.end.position, + ) + if isinstance(intervals, hl.Table): + intervals = intervals.annotate( + pos=hl.range(intervals_start_expr, intervals_end_expr) + ).explode("pos") intervals = intervals.key_by( locus=hl.locus( intervals[interval_field].start.contig, @@ -162,7 +182,11 @@ def explode_intervals_to_loci( fields_to_drop.append(interval_field) return intervals.drop(*fields_to_drop) - - logger.warning("Input is an IntervalExpression, so function will return ArrayExpression of positions within input intervals. To fully explode intervals to loci, we recommend annotating your dataset with the returned ArrayExpression, exploding the array, and converting the positions to loci!") - return hl.range(intervals_start_expr, intervals_end_expr) + logger.warning( + "Input is an IntervalExpression, so function will return ArrayExpression of" + " positions within input intervals. To fully explode intervals to loci, we" + " recommend annotating your dataset with the returned ArrayExpression," + " exploding the array, and converting the positions to loci!" + ) + return hl.range(intervals_start_expr, intervals_end_expr)