@@ -226,6 +226,8 @@ def annotate_sex(
226226 gt_expr : str = "GT" ,
227227 f_stat_cutoff : float = 0.5 ,
228228 aaf_threshold : float = 0.001 ,
229+ variants_only_x_ploidy : bool = False ,
230+ variants_only_y_ploidy : bool = False ,
229231) -> hl .Table :
230232 """
231233 Impute sample sex based on X-chromosome heterozygosity and sex chromosome ploidy.
@@ -256,6 +258,8 @@ def annotate_sex(
256258 :param gt_expr: Name of entry field storing the genotype. Default: 'GT'
257259 :param f_stat_cutoff: f-stat to roughly divide 'XX' from 'XY' samples. Assumes XX samples are below cutoff and XY are above cutoff.
258260 :param float aaf_threshold: Minimum alternate allele frequency to be used in f-stat calculations.
261+ :param variants_only_x_ploidy: Whether to use depth of only variant data for the x ploidy estimation.
262+ :param variants_only_y_ploidy: Whether to use depth of only variant data for the y ploidy estimation.
259263 :return: Table of samples and their imputed sex karyotypes.
260264 """
261265 logger .info ("Imputing sex chromosome ploidies..." )
@@ -266,27 +270,94 @@ def annotate_sex(
266270 raise NotImplementedError (
267271 "The use of the parameter 'excluded_intervals' is currently not implemented for imputing sex chromosome ploidy on a VDS!"
268272 )
273+ # Begin by creating a ploidy estimate HT using the method defined by 'variants_only_x_ploidy'
269274 ploidy_ht = hl .vds .impute_sex_chromosome_ploidy (
270275 mtds ,
271276 calling_intervals = included_intervals ,
272277 normalization_contig = normalization_contig ,
278+ use_variant_dataset = variants_only_x_ploidy ,
273279 )
274280 ploidy_ht = ploidy_ht .rename (
275281 {
276282 "x_ploidy" : "chrX_ploidy" ,
277283 "y_ploidy" : "chrY_ploidy" ,
278284 "x_mean_dp" : "chrX_mean_dp" ,
279285 "y_mean_dp" : "chrY_mean_dp" ,
280- "autosomal_mean_dp" : f"{ normalization_contig } _mean_dp" ,
286+ "autosomal_mean_dp" : f"var_data_{ normalization_contig } _mean_dp"
287+ if variants_only_x_ploidy
288+ else f"{ normalization_contig } _mean_dp" ,
281289 }
282290 )
291+ # If 'variants_only_y_ploidy' is different from 'variants_only_x_ploidy' then re-run the ploidy estimation using
292+ # the method defined by 'variants_only_y_ploidy' and re-annotate with the modified ploidy estimates.
293+ if variants_only_y_ploidy != variants_only_x_ploidy :
294+ y_ploidy_ht = hl .vds .impute_sex_chromosome_ploidy (
295+ mtds ,
296+ calling_intervals = included_intervals ,
297+ normalization_contig = normalization_contig ,
298+ use_variant_dataset = variants_only_y_ploidy ,
299+ )
300+ y_ploidy_idx = y_ploidy_ht [ploidy_ht .key ]
301+ ploidy_ht = ploidy_ht .annotate (
302+ chrY_ploidy = y_ploidy_idx .y_ploidy ,
303+ chrY_mean_dp = y_ploidy_idx .y_mean_dp ,
304+ )
305+
306+ # If the `variants_only_y_ploidy' is True modify the name of the normalization contig mean DP to indicate
307+ # that this is the variant dataset only mean DP (this will have already been added if
308+ # 'variants_only_x_ploidy' was also True).
309+ if variants_only_y_ploidy :
310+ ploidy_ht = ploidy_ht .annotate (
311+ ** {
312+ f"var_data_{ normalization_contig } _mean_dp" : y_ploidy_idx .autosomal_mean_dp
313+ }
314+ )
315+
283316 mt = mtds .variant_data
284317 else :
285318 mt = mtds
286319 if is_sparse :
287320 ploidy_ht = impute_sex_ploidy (
288- mt , excluded_intervals , included_intervals , normalization_contig
321+ mt ,
322+ excluded_intervals ,
323+ included_intervals ,
324+ normalization_contig ,
325+ use_only_variants = variants_only_x_ploidy ,
289326 )
327+ ploidy_ht = ploidy_ht .rename (
328+ {
329+ "autosomal_mean_dp" : f"var_data_{ normalization_contig } _mean_dp"
330+ if variants_only_x_ploidy
331+ else f"{ normalization_contig } _mean_dp" ,
332+ }
333+ )
334+ # If 'variants_only_y_ploidy' is different from 'variants_only_x_ploidy' then re-run the ploidy estimation
335+ # using the method defined by 'variants_only_y_ploidy' and re-annotate with the modified ploidy estimates.
336+ if variants_only_y_ploidy != variants_only_x_ploidy :
337+ y_ploidy_ht = impute_sex_ploidy (
338+ mt ,
339+ excluded_intervals ,
340+ included_intervals ,
341+ normalization_contig ,
342+ use_only_variants = variants_only_y_ploidy ,
343+ )
344+ y_ploidy_ht .select (
345+ "chrY_ploidy" ,
346+ "chrY_mean_dp" ,
347+ f"{ normalization_contig } _mean_dp" ,
348+ )
349+ # If the `variants_only_y_ploidy' is True modify the name of the normalization contig mean DP to indicate
350+ # that this is the variant dataset only mean DP (this will have already been added if
351+ # 'variants_only_x_ploidy' was also True).
352+ if variants_only_y_ploidy :
353+ ploidy_ht = ploidy_ht .rename (
354+ {
355+ f"{ normalization_contig } _mean_dp" : f"var_data_{ normalization_contig } _mean_dp"
356+ }
357+ )
358+ # Re-annotate the ploidy HT with modified Y ploidy annotations
359+ ploidy_ht = ploidy_ht .annotate (** y_ploidy_ht [ploidy_ht .key ])
360+
290361 else :
291362 raise NotImplementedError (
292363 "Imputing sex ploidy does not exist yet for dense data."
@@ -348,6 +419,8 @@ def annotate_sex(
348419 lower_cutoff_YY = y_ploidy_cutoffs [1 ],
349420 ),
350421 f_stat_cutoff = f_stat_cutoff ,
422+ variants_only_x_ploidy = variants_only_x_ploidy ,
423+ variants_only_y_ploidy = variants_only_y_ploidy ,
351424 )
352425 return sex_ht .annotate (
353426 ** get_sex_expr (
0 commit comments