From db8e849befbec8f91d047f0f7dcfa2d8d9ec5243 Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Tue, 29 Jun 2021 19:06:41 -0500 Subject: [PATCH 01/18] move legend to up and fix y-axis inward if using different ymax --- deeptools/plotHeatmap.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index bc4bbcc2a4..cbf2ddb62d 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -161,6 +161,7 @@ def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType if sample_id > 0 and len(yMin) == 1 and len(yMax) == 1: plt.setp(ax_profile.get_yticklabels(), visible=False) + ax_profile.get_yaxis().set_tick_params(direction='in',pad=-22) # beisi if sample_id == 0 and yAxisLabel != '': ax_profile.set_ylabel(yAxisLabel) xticks, xtickslabel = hm.getTicks(tickIdx) @@ -593,8 +594,13 @@ def plotMatrix(hm, outFileName, ax_list = addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, kwargs['wspace'], kwargs['hspace'], colorbar_position, label_rotation) if legend_location != 'none': - ax_list[-1].legend(loc=legend_location.replace('-', ' '), ncol=1, prop=fontP, - frameon=False, markerscale=0.5) + ax = ax_list[-1] # beisi + box = ax.get_position() + ax.set_position([box.x0, box.y0 - box.height * 0.1, box.width, box.height * 0.9]) + legend = ax.legend(loc='lower right', shadow=False, fontsize='x-large', bbox_to_anchor=(0, 1.3, 1, .22), ncol=10, frameon=False, prop=fontP) # beisi, legend line + ax.add_artist(legend) + # ax_list[-1].legend(loc=legend_location.replace('-', ' '), ncol=1, prop=fontP, + # frameon=False, markerscale=0.5) first_group = 0 # helper variable to place the title per sample/group for sample in range(hm.matrix.get_num_samples()): From 30f42e99b8fa073e14047b4de3f83aa975142c61 Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Tue, 11 Jul 2023 18:43:25 -0500 Subject: [PATCH 02/18] autobreakline for title and rotation y label of heatmap --- deeptools/heatmapper.py | 4 ++-- deeptools/plotHeatmap.py | 19 ++++++++++++++----- deeptools/plotProfile.py | 15 ++++++++++++--- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index 6a95ba0052..d0eebaad22 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -1308,9 +1308,9 @@ def hmcluster(self, k, evaluate_silhouette=True, method='kmeans', clustering_sam _clustered_matrix = [] cluster_number = 1 for cluster in cluster_order: - self.group_labels.append("cluster_{}".format(cluster_number)) - cluster_number += 1 cluster_ids = _cluster_ids_list[cluster] + self.group_labels.append("cluster_{}_n{}".format(cluster_number,len(cluster_ids))) + cluster_number += 1 self.group_boundaries.append(self.group_boundaries[-1] + len(cluster_ids)) _clustered_matrix.append(self.matrix[cluster_ids, :]) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index cbf2ddb62d..521ebc388e 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -13,7 +13,7 @@ from matplotlib.font_manager import FontProperties import matplotlib.gridspec as gridspec from matplotlib import ticker -import copy +import copy, re import sys import plotly.offline as py import plotly.graph_objs as go @@ -115,6 +115,15 @@ def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGro return grids +def autobreaklinetitle(title,sep="[-_,.]",lmax=15): + sss = [ rr for rr in re.split(sep,title) if len(rr) ] + newtitle, tmp = "", "" + for ss in sss: + tmp += ss + if len(tmp) > lmax: + newtitle += tmp + "\n" + tmp = "" + return newtitle def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, wspace, hspace, colorbar_position, label_rotation=0.0): """ @@ -144,7 +153,7 @@ def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType else: ax_profile = fig.add_subplot(grids[0, sample_id]) - ax_profile.set_title(title) + ax_profile.set_title(autobreaklinetitle(title)) for group in range(iterNum2): if perGroup: sub_matrix = hm.matrix.get_matrix(sample_id, group) @@ -636,7 +645,7 @@ def plotMatrix(hm, outFileName, if group == first_group and not showSummaryPlot and not perGroup: title = hm.matrix.sample_labels[sample] - ax.set_title(title) + ax_profile.set_title(autobreaklinetitle(title)) if box_around_heatmaps is False: # Turn off the boxes around the individual heatmaps @@ -689,9 +698,9 @@ def plotMatrix(hm, outFileName, ax.axes.set_xlabel(xAxisLabel) ax.axes.set_yticks([]) if perGroup and group == 0: - ax.axes.set_ylabel(sub_matrix['sample']) + ax.axes.set_ylabel(sub_matrix['sample'],rotation=75,labelpad=0,fontsize=15) elif not perGroup and sample == 0: - ax.axes.set_ylabel(sub_matrix['group']) + ax.axes.set_ylabel(sub_matrix['group'],rotation=75,labelpad=0,horizontalalignment='right',fontsize=15) # Plot vertical lines at tick marks if desired if linesAtTickMarks: diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index b46be32bb9..aa8afc30a8 100755 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -49,6 +49,15 @@ def parse_arguments(args=None): return parser +def autobreaklinetitle(title,sep="[-_,.]",lmax=15): + sss = [ rr for rr in re.split(sep,title) if len(rr) ] + newtitle, tmp = "", "" + for ss in sss: + tmp += ss + if len(tmp) > lmax: + newtitle += tmp + "\n" + tmp = "" + return newtitle def process_args(args=None): args = parse_arguments().parse_args(args) @@ -269,7 +278,7 @@ def plot_hexbin(self): for data_idx in range(self.numlines)[::-1]: ax = self.fig.add_subplot(sub_grid[data_idx, 0]) if data_idx == 0: - ax.set_title(title) + ax.set_title(autobreaklinetitle(title)) if data_idx != self.numlines - 1: plt.setp(ax.get_xticklabels(), visible=False) @@ -514,7 +523,7 @@ def plot_heatmap(self): else: title = self.hm.matrix.sample_labels[plot] tickIdx = plot - ax.set_title(title) + ax.set_title(autobreaklinetitle(title)) mat = [] # when drawing a heatmap (in contrast to drawing lines) for data_idx in range(self.numlines): if self.per_group: @@ -723,7 +732,7 @@ def plot_profile(self): plt.setp(ax.get_yticklabels(), visible=False) tickIdx = plot - ax.set_title(title) + ax.set_title(autobreaklinetitle(title)) for data_idx in range(self.numlines): if self.per_group: _row, _col = plot, data_idx From f1481a4cbe09404f02e44bb2d7d725502bc50493 Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Tue, 11 Jul 2023 18:44:27 -0500 Subject: [PATCH 03/18] autobreakline for title and rotation y label of heatmap --- deeptools/plotHeatmap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index 521ebc388e..7285ea816a 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -645,7 +645,7 @@ def plotMatrix(hm, outFileName, if group == first_group and not showSummaryPlot and not perGroup: title = hm.matrix.sample_labels[sample] - ax_profile.set_title(autobreaklinetitle(title)) + ax.set_title(autobreaklinetitle(title)) if box_around_heatmaps is False: # Turn off the boxes around the individual heatmaps From 7cacf3cb5243f9a46d085367c2629e9397eeaebb Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Wed, 12 Jul 2023 10:53:08 -0500 Subject: [PATCH 04/18] debug title --- deeptools/plotHeatmap.py | 2 ++ deeptools/plotProfile.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index 7285ea816a..e7657172a2 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -123,6 +123,8 @@ def autobreaklinetitle(title,sep="[-_,.]",lmax=15): if len(tmp) > lmax: newtitle += tmp + "\n" tmp = "" + else: + tmp += "-" return newtitle def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, wspace, hspace, colorbar_position, label_rotation=0.0): diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index aa8afc30a8..5011234c1a 100755 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -57,6 +57,8 @@ def autobreaklinetitle(title,sep="[-_,.]",lmax=15): if len(tmp) > lmax: newtitle += tmp + "\n" tmp = "" + else: + tmp += "-" return newtitle def process_args(args=None): From a7059c1b3916482db341e41161e869d610a67bef Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Wed, 12 Jul 2023 15:07:46 -0500 Subject: [PATCH 05/18] debug title --- deeptools/plotHeatmap.py | 1 + deeptools/plotProfile.py | 1 + 2 files changed, 2 insertions(+) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index e7657172a2..0358390984 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -125,6 +125,7 @@ def autobreaklinetitle(title,sep="[-_,.]",lmax=15): tmp = "" else: tmp += "-" + newtitle += tmp.strip("-") + "\n" return newtitle def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, wspace, hspace, colorbar_position, label_rotation=0.0): diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index 5011234c1a..ce49e82ab5 100755 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -59,6 +59,7 @@ def autobreaklinetitle(title,sep="[-_,.]",lmax=15): tmp = "" else: tmp += "-" + newtitle += tmp.strip("-") + "\n" return newtitle def process_args(args=None): From 23c5e1601c909cdab9b744af2b7d225818a3f9c5 Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Wed, 12 Jul 2023 16:20:58 -0500 Subject: [PATCH 06/18] small tweak --- deeptools/plotHeatmap.py | 3 ++- deeptools/plotProfile.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index 0358390984..688d41df1d 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -125,7 +125,8 @@ def autobreaklinetitle(title,sep="[-_,.]",lmax=15): tmp = "" else: tmp += "-" - newtitle += tmp.strip("-") + "\n" + newtitle += tmp.strip("-") + newtitle = "\n" + newtitle return newtitle def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, wspace, hspace, colorbar_position, label_rotation=0.0): diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index ce49e82ab5..f100a9eac8 100755 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- -import sys +import sys, re import argparse import numpy as np @@ -59,7 +59,8 @@ def autobreaklinetitle(title,sep="[-_,.]",lmax=15): tmp = "" else: tmp += "-" - newtitle += tmp.strip("-") + "\n" + newtitle += tmp.strip("-") + newtitle = "\n" + newtitle return newtitle def process_args(args=None): From 1b85c00c8c0efa1b10f724ff4ae338aec651e5d1 Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Mon, 2 Oct 2023 18:09:32 -0500 Subject: [PATCH 07/18] hard limit to 15 chars --- deeptools/plotHeatmap.py | 16 +++++++++------- deeptools/plotProfile.py | 14 ++++++++------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index 688d41df1d..d2f8b5a18c 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -13,8 +13,8 @@ from matplotlib.font_manager import FontProperties import matplotlib.gridspec as gridspec from matplotlib import ticker -import copy, re -import sys + +import sys, re, os, copy import plotly.offline as py import plotly.graph_objs as go @@ -115,17 +115,19 @@ def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGro return grids -def autobreaklinetitle(title,sep="[-_,.]",lmax=15): +def autobreaklinetitle(title,sep="[-_,.:]",lmax=15): + outsep = "-" sss = [ rr for rr in re.split(sep,title) if len(rr) ] newtitle, tmp = "", "" for ss in sss: + tmp0 = tmp tmp += ss if len(tmp) > lmax: - newtitle += tmp + "\n" - tmp = "" + newtitle += tmp0.strip(outsep) + "\n" + tmp = ss else: - tmp += "-" - newtitle += tmp.strip("-") + tmp += outsep + newtitle += tmp.strip(outsep) newtitle = "\n" + newtitle return newtitle diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index f100a9eac8..6655d5940d 100755 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- -import sys, re +import sys, os, re import argparse import numpy as np @@ -49,17 +49,19 @@ def parse_arguments(args=None): return parser -def autobreaklinetitle(title,sep="[-_,.]",lmax=15): +def autobreaklinetitle(title,sep="[-_,.:]",lmax=15): + outsep = "-" sss = [ rr for rr in re.split(sep,title) if len(rr) ] newtitle, tmp = "", "" for ss in sss: + tmp0 = tmp tmp += ss if len(tmp) > lmax: - newtitle += tmp + "\n" - tmp = "" + newtitle += tmp0.strip(outsep) + "\n" + tmp = ss else: - tmp += "-" - newtitle += tmp.strip("-") + tmp += outsep + newtitle += tmp.strip(outsep) newtitle = "\n" + newtitle return newtitle From e793e4840e500c811cf4258e407bd60e76d5d359 Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Wed, 22 Nov 2023 13:01:56 -0600 Subject: [PATCH 08/18] add --repgrplist option to combine replicates for plotProfile --- deeptools/heatmapper_utilities.py | 21 +++++- deeptools/parserCommon.py | 8 +++ deeptools/plotProfile.py | 109 +++++++++++++++++++++--------- 3 files changed, 103 insertions(+), 35 deletions(-) diff --git a/deeptools/heatmapper_utilities.py b/deeptools/heatmapper_utilities.py index e63dfb0226..aa3559096f 100644 --- a/deeptools/heatmapper_utilities.py +++ b/deeptools/heatmapper_utilities.py @@ -9,6 +9,9 @@ old_settings = np.seterr(all='ignore') +debug = 0 +if debug: + from ipdb import set_trace def plot_single(ax, ma, average_type, color, label, plot_type='lines'): """ @@ -18,7 +21,7 @@ def plot_single(ax, ma, average_type, color, label, plot_type='lines'): ---------- ax : matplotlib axis matplotlib axis - ma : numpy array + ma : numpy array or list of numpy array(for plot with --repgrplist, take average between replicates ) numpy array The data on this matrix is summarized according to the `average_type` argument. average_type : str @@ -32,7 +35,9 @@ def plot_single(ax, ma, average_type, color, label, plot_type='lines'): type of plot. Either 'se' for standard error, 'std' for standard deviation, 'overlapped_lines' to plot each line of the matrix, fill to plot the area between the x axis and the value or any other string to - just plot the average line. + just plot the average line. When assign samples to replicates group such as + '--repgrplist WT WT KO KO' : 'std' would be the standard deviation between replicates groups. + 'se' for standard error between replicates groups. Returns ------- @@ -63,7 +68,15 @@ def plot_single(ax, ma, average_type, color, label, plot_type='lines'): """ - summary = np.ma.__getattribute__(average_type)(ma, axis=0) + if isinstance(ma,list): + summary_list = [] + for ma_each in ma: + tmp = np.ma.__getattribute__(average_type)(ma_each, axis=0) + summary_list.append(tmp) + ma = np.array(summary_list) + summary = np.ma.__getattribute__("average")(ma, axis=0) + else: + summary = np.ma.__getattribute__(average_type)(ma, axis=0) # only plot the average profiles without error regions x = np.arange(len(summary)) if isinstance(color, np.ndarray): @@ -72,6 +85,8 @@ def plot_single(ax, ma, average_type, color, label, plot_type='lines'): if plot_type == 'fill': ax.fill_between(x, summary, facecolor=color, alpha=0.6, edgecolor='none') + if debug: + set_trace() if plot_type in ['se', 'std']: if plot_type == 'se': # standard error std = np.std(ma, axis=0) / np.sqrt(ma.shape[0]) diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index ef4f4d0748..23de9e6472 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -846,6 +846,14 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]): 'group.', action='store_true') + optional.add_argument('--repgrplist', + default=None, + nargs='+', + help='Group profiles by genotype' + 'assign each profile to a group(as replicates) to plot average +/- se/std.' + 'If the number of group values is smaller than' + 'the number of plots, the values are recycled.') + optional.add_argument('--plotFileFormat', metavar='', help='Image format type. If given, this ' diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index 6655d5940d..8df0bf679d 100755 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -26,12 +26,12 @@ from deeptools.heatmapper_utilities import plot_single, plotly_single, getProfileTicks from deeptools.computeMatrixOperations import filterHeatmapValues - debug = 0 +if debug: + from ipdb import set_trace old_settings = np.seterr(all='ignore') plt.ioff() - def parse_arguments(args=None): parser = argparse.ArgumentParser( parents=[parserCommon.heatmapperMatrixArgs(), @@ -111,6 +111,7 @@ def __init__(self, hm, out_file_name, plot_height=7, plot_width=11, per_group=False, + repgrplist=None, plot_type='lines', image_format=None, color_list=None, @@ -137,6 +138,7 @@ def __init__(self, hm, out_file_name, plot_height: in cm plot_width: in cm per_group: bool + repgrplist: list plot_type: string image_format: string color_list: list @@ -160,6 +162,7 @@ def __init__(self, hm, out_file_name, self.plot_height = plot_height self.plot_width = plot_width self.per_group = per_group + self.repgrplist = repgrplist self.plot_type = plot_type self.image_format = image_format self.color_list = color_list @@ -171,11 +174,13 @@ def __init__(self, hm, out_file_name, # Honor reference point labels from computeMatrix if reference_point_label is None: self.reference_point_label = hm.parameters['ref point'] - # decide how many plots are needed if self.per_group: self.numplots = self.hm.matrix.get_num_groups() - self.numlines = self.hm.matrix.get_num_samples() + if self.repgrplist: + self.numlines = len(set(self.repgrplist)) + else: + self.numlines = self.hm.matrix.get_num_samples() else: self.numplots = self.hm.matrix.get_num_samples() self.numlines = self.hm.matrix.get_num_groups() @@ -525,7 +530,7 @@ def plot_heatmap(self): if self.per_group: title = self.hm.matrix.group_labels[plot] - tickIdx = plot % self.hm.matrix.get_num_samples() + tickIdx = plot % self.numlines else: title = self.hm.matrix.sample_labels[plot] tickIdx = plot @@ -731,7 +736,7 @@ def plot_profile(self): title = self.hm.matrix.group_labels[plot] if row != 0 and len(self.y_min) == 1 and len(self.y_max) == 1: plt.setp(ax.get_yticklabels(), visible=False) - tickIdx = plot % self.hm.matrix.get_num_samples() + tickIdx = plot % self.numlines else: title = self.hm.matrix.sample_labels[plot] if col != 0 and len(self.y_min) == 1 and len(self.y_max) == 1: @@ -739,32 +744,71 @@ def plot_profile(self): tickIdx = plot ax.set_title(autobreaklinetitle(title)) - for data_idx in range(self.numlines): - if self.per_group: - _row, _col = plot, data_idx - else: - _row, _col = data_idx, plot - if localYMin is None or self.y_min[col % len(self.y_min)] < localYMin: - localYMin = self.y_min[col % len(self.y_min)] - if localYMax is None or self.y_max[col % len(self.y_max)] > localYMax: - localYMax = self.y_max[col % len(self.y_max)] - - sub_matrix = self.hm.matrix.get_matrix(_row, _col) - - if self.per_group: - label = sub_matrix['sample'] - else: - label = sub_matrix['group'] - - if self.numlines > 1: - coloridx = data_idx - else: - coloridx = plot - plot_single(ax, sub_matrix['matrix'], - self.averagetype, - self.color_list[coloridx], - label, - plot_type=self.plot_type) + if localYMin is None or self.y_min[col % len(self.y_min)] < localYMin: + localYMin = self.y_min[col % len(self.y_min)] + if localYMax is None or self.y_max[col % len(self.y_max)] > localYMax: + localYMax = self.y_max[col % len(self.y_max)] + if self.per_group and self.repgrplist: + nsamptmp = self.hm.matrix.get_num_samples() + repgrplistuniq = [] + repgrp_samp_dict = {} + for tmp in self.repgrplist: + if not tmp in repgrplistuniq: + repgrplistuniq.append(tmp) + for data_idx in range(nsamptmp): + thisrepgrp = repgrplistuniq[int(data_idx / (nsamptmp/self.numlines))] + try: + repgrp_samp_dict[thisrepgrp].append(data_idx) + except: + repgrp_samp_dict[thisrepgrp] = [ data_idx ] + + for irepgrp, repgrp in enumerate(repgrplistuniq): + sub_matrix_list = [] + for data_idx in repgrp_samp_dict[repgrp]: + _row, _col = plot, data_idx + sub_matrix = self.hm.matrix.get_matrix(_row, _col) + sub_matrix_list.append(sub_matrix['matrix']) + if debug: + set_trace() + + label = f"{repgrp}(n={len(repgrp_samp_dict[repgrp])})" + + if self.numlines > 1: + coloridx = irepgrp + else: + coloridx = plot + plot_single(ax, sub_matrix_list, + self.averagetype, + self.color_list[coloridx], + label, + plot_type=self.plot_type) + else: + for data_idx in range(self.numlines): + if self.per_group: + _row, _col = plot, data_idx + else: + _row, _col = data_idx, plot + if localYMin is None or self.y_min[col % len(self.y_min)] < localYMin: + localYMin = self.y_min[col % len(self.y_min)] + if localYMax is None or self.y_max[col % len(self.y_max)] > localYMax: + localYMax = self.y_max[col % len(self.y_max)] + + sub_matrix = self.hm.matrix.get_matrix(_row, _col) + + if self.per_group: + label = sub_matrix['sample'] + else: + label = sub_matrix['group'] + + if self.numlines > 1: + coloridx = data_idx + else: + coloridx = plot + plot_single(ax, sub_matrix['matrix'], + self.averagetype, + self.color_list[coloridx], + label, + plot_type=self.plot_type) globalYmin = min(np.float64(globalYmin), ax.get_ylim()[0]) globalYmax = max(globalYmax, ax.get_ylim()[1]) @@ -970,6 +1014,7 @@ def main(args=None): plot_height=args.plotHeight, plot_width=args.plotWidth, per_group=args.perGroup, + repgrplist=args.repgrplist, plot_type=args.plotType, image_format=args.plotFileFormat, color_list=args.colors, From f05c5faf8eaebf1f45d9cf1517d133f2150e04c7 Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Wed, 22 Nov 2023 14:39:06 -0600 Subject: [PATCH 09/18] debug --repgrplist if number matching number of samples --- deeptools/parserCommon.py | 2 +- deeptools/plotProfile.py | 28 ++++++++++++++++++---------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index 23de9e6472..ce11b399f5 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -852,7 +852,7 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]): help='Group profiles by genotype' 'assign each profile to a group(as replicates) to plot average +/- se/std.' 'If the number of group values is smaller than' - 'the number of plots, the values are recycled.') + 'the number of samples, the values will be equally divide into groups.') optional.add_argument('--plotFileFormat', metavar='', diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index 8df0bf679d..69b536b37a 100755 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -750,17 +750,25 @@ def plot_profile(self): localYMax = self.y_max[col % len(self.y_max)] if self.per_group and self.repgrplist: nsamptmp = self.hm.matrix.get_num_samples() - repgrplistuniq = [] repgrp_samp_dict = {} - for tmp in self.repgrplist: - if not tmp in repgrplistuniq: - repgrplistuniq.append(tmp) - for data_idx in range(nsamptmp): - thisrepgrp = repgrplistuniq[int(data_idx / (nsamptmp/self.numlines))] - try: - repgrp_samp_dict[thisrepgrp].append(data_idx) - except: - repgrp_samp_dict[thisrepgrp] = [ data_idx ] + if len(self.repgrplist) == nsamptmp: + for data_idx in range(nsamptmp): + thisrepgrp = self.repgrplist[data_idx] + try: + repgrp_samp_dict[thisrepgrp].append(data_idx) + except: + repgrp_samp_dict[thisrepgrp] = [ data_idx ] + else: + repgrplistuniq = [] + for tmp in self.repgrplist: + if not tmp in repgrplistuniq: + repgrplistuniq.append(tmp) + for data_idx in range(nsamptmp): + thisrepgrp = repgrplistuniq[int(data_idx / (nsamptmp/self.numlines))] + try: + repgrp_samp_dict[thisrepgrp].append(data_idx) + except: + repgrp_samp_dict[thisrepgrp] = [ data_idx ] for irepgrp, repgrp in enumerate(repgrplistuniq): sub_matrix_list = [] From 01caa598f8125541d3a4d06b48a9f08387414553 Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Wed, 22 Nov 2023 14:45:20 -0600 Subject: [PATCH 10/18] debug --repgrplist if number matching number of samples --- deeptools/plotProfile.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index 69b536b37a..7bf4c61b78 100755 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -751,24 +751,20 @@ def plot_profile(self): if self.per_group and self.repgrplist: nsamptmp = self.hm.matrix.get_num_samples() repgrp_samp_dict = {} - if len(self.repgrplist) == nsamptmp: - for data_idx in range(nsamptmp): + repgrplistuniq = [] + for tmp in self.repgrplist: + if not tmp in repgrplistuniq: + repgrplistuniq.append(tmp) + + for data_idx in range(nsamptmp): + if len(self.repgrplist) >= nsamptmp: thisrepgrp = self.repgrplist[data_idx] - try: - repgrp_samp_dict[thisrepgrp].append(data_idx) - except: - repgrp_samp_dict[thisrepgrp] = [ data_idx ] - else: - repgrplistuniq = [] - for tmp in self.repgrplist: - if not tmp in repgrplistuniq: - repgrplistuniq.append(tmp) - for data_idx in range(nsamptmp): + else: thisrepgrp = repgrplistuniq[int(data_idx / (nsamptmp/self.numlines))] - try: - repgrp_samp_dict[thisrepgrp].append(data_idx) - except: - repgrp_samp_dict[thisrepgrp] = [ data_idx ] + try: + repgrp_samp_dict[thisrepgrp].append(data_idx) + except: + repgrp_samp_dict[thisrepgrp] = [ data_idx ] for irepgrp, repgrp in enumerate(repgrplistuniq): sub_matrix_list = [] From 4e19dcbb4c79be497c306be928280165c74101f3 Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Wed, 22 Nov 2023 14:52:38 -0600 Subject: [PATCH 11/18] debug --repgrplist if number matching number of samples --- deeptools/plotProfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index 7bf4c61b78..dddea44482 100755 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -766,14 +766,14 @@ def plot_profile(self): except: repgrp_samp_dict[thisrepgrp] = [ data_idx ] + if debug: + set_trace() for irepgrp, repgrp in enumerate(repgrplistuniq): sub_matrix_list = [] for data_idx in repgrp_samp_dict[repgrp]: _row, _col = plot, data_idx sub_matrix = self.hm.matrix.get_matrix(_row, _col) sub_matrix_list.append(sub_matrix['matrix']) - if debug: - set_trace() label = f"{repgrp}(n={len(repgrp_samp_dict[repgrp])})" From eb8fa8dfee09dfed8d4fd4a4d8ea73176e7e17e2 Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Wed, 24 Sep 2025 14:11:25 -0500 Subject: [PATCH 12/18] new matplotlib --- deeptools/cm.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/deeptools/cm.py b/deeptools/cm.py index 47bcf16285..2d4d8ae91b 100644 --- a/deeptools/cm.py +++ b/deeptools/cm.py @@ -30,8 +30,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from matplotlib import colors, colormaps as mpl_cm - +from matplotlib import colors _rocket_lut = [ [0.01060815, 0.01808215, 0.10018654], @@ -1076,6 +1075,15 @@ _luts = [_rocket_lut, _mako_lut, _vlag_lut, _icefire_lut] _names = ["rocket", "mako", "vlag", "icefire"] +def mycmregister(name,cmap): + import matplotlib + if hasattr(matplotlib.cm, 'register_cmap'): + matplotlib.cm.register_cmap(name, cmap) + elif hasattr(matplotlib.colors, 'register_cmap'): + matplotlib.colors.register_cmap(name, cmap) + else: + matplotlib.colormaps.register(cmap, name=name) + for _lut, _name in zip(_luts, _names): _cmap = colors.ListedColormap(_lut, _name) @@ -1084,5 +1092,5 @@ _cmap_r = colors.ListedColormap(_lut[::-1], _name + "_r") locals()[_name + "_r"] = _cmap_r - mpl_cm.register(_cmap, name=_name) - mpl_cm.register(_cmap_r, name=_name + "_r") + mycmregister(_name, _cmap) + mycmregister(_name + "_r", _cmap_r) From 6fc9ee0ada40fddc957c4d7ceb6a7e62949f0df9 Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Wed, 24 Sep 2025 14:42:58 -0500 Subject: [PATCH 13/18] linter by black --- deeptools/cm.py | 17 +- deeptools/computeGCBias.py | 725 ++++++++++++++++----------- deeptools/correctGCBias.py | 430 +++++++++------- deeptools/heatmapper_utilities.py | 92 ++-- deeptools/plotHeatmap.py | 782 ++++++++++++++++++++---------- deeptools/plotProfile.py | 647 ++++++++++++++++-------- 6 files changed, 1698 insertions(+), 995 deletions(-) diff --git a/deeptools/cm.py b/deeptools/cm.py index 2d4d8ae91b..14da876c29 100644 --- a/deeptools/cm.py +++ b/deeptools/cm.py @@ -288,7 +288,7 @@ [0.97912374, 0.90313207, 0.83979337], [0.979891, 0.90894778, 0.84827858], [0.98067764, 0.91476465, 0.85676611], - [0.98137749, 0.92061729, 0.86536915] + [0.98137749, 0.92061729, 0.86536915], ] @@ -548,7 +548,7 @@ [0.84857662, 0.9498573, 0.8776059], [0.8564431, 0.95309792, 0.88414253], [0.86429066, 0.95635719, 0.89067759], - [0.87218969, 0.95960708, 0.89725384] + [0.87218969, 0.95960708, 0.89725384], ] @@ -808,7 +808,7 @@ [0.67000176, 0.23511902, 0.24650278], [0.66693423, 0.22859879, 0.24124404], [0.6638441, 0.22201742, 0.2359961], - [0.66080672, 0.21526712, 0.23069468] + [0.66080672, 0.21526712, 0.23069468], ] @@ -1068,22 +1068,25 @@ [0.9932672, 0.79848979, 0.63231691], [0.99535958, 0.80926704, 0.64687278], [0.99740544, 0.82008078, 0.66150571], - [0.9992197, 0.83100723, 0.6764127] + [0.9992197, 0.83100723, 0.6764127], ] _luts = [_rocket_lut, _mako_lut, _vlag_lut, _icefire_lut] _names = ["rocket", "mako", "vlag", "icefire"] -def mycmregister(name,cmap): + +def mycmregister(name, cmap): import matplotlib - if hasattr(matplotlib.cm, 'register_cmap'): + + if hasattr(matplotlib.cm, "register_cmap"): matplotlib.cm.register_cmap(name, cmap) - elif hasattr(matplotlib.colors, 'register_cmap'): + elif hasattr(matplotlib.colors, "register_cmap"): matplotlib.colors.register_cmap(name, cmap) else: matplotlib.colormaps.register(cmap, name=name) + for _lut, _name in zip(_luts, _names): _cmap = colors.ListedColormap(_lut, _name) diff --git a/deeptools/computeGCBias.py b/deeptools/computeGCBias.py index f261a9fc14..3732e1bc58 100755 --- a/deeptools/computeGCBias.py +++ b/deeptools/computeGCBias.py @@ -17,7 +17,7 @@ from deeptools import bamHandler debug = 0 -old_settings = np.seterr(all='ignore') +old_settings = np.seterr(all="ignore") def parse_arguments(args=None): @@ -26,15 +26,16 @@ def parse_arguments(args=None): parser = argparse.ArgumentParser( parents=[requiredArgs, parentParser], formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description='Computes the GC-bias using Benjamini\'s method ' - '[Benjamini & Speed (2012). Nucleic Acids Research, 40(10). doi: 10.1093/nar/gks001]. ' - 'The GC-bias is visualized and the resulting table can be used to' - 'correct the bias with `correctGCBias`.', - usage='computeGCBias ' - '-b file.bam --effectiveGenomeSize 2150570000 -g mm9.2bit -l 200 --GCbiasFrequenciesFile freq.txt\n' - 'help: computeGCBias -h / computeGCBias --help', - conflict_handler='resolve', - add_help=False) + description="Computes the GC-bias using Benjamini's method " + "[Benjamini & Speed (2012). Nucleic Acids Research, 40(10). doi: 10.1093/nar/gks001]. " + "The GC-bias is visualized and the resulting table can be used to" + "correct the bias with `correctGCBias`.", + usage="computeGCBias " + "-b file.bam --effectiveGenomeSize 2150570000 -g mm9.2bit -l 200 --GCbiasFrequenciesFile freq.txt\n" + "help: computeGCBias -h / computeGCBias --help", + conflict_handler="resolve", + add_help=False, + ) return parser @@ -42,96 +43,117 @@ def parse_arguments(args=None): def getRequiredArgs(): parser = argparse.ArgumentParser(add_help=False) - required = parser.add_argument_group('Required arguments') - - required.add_argument('--bamfile', '-b', - metavar='bam file', - help='Sorted BAM file. ', - required=True) - - required.add_argument('--effectiveGenomeSize', - help='The effective genome size is the portion ' - 'of the genome that is mappable. Large fractions of ' - 'the genome are stretches of NNNN that should be ' - 'discarded. Also, if repetitive regions were not ' - 'included in the mapping of reads, the effective ' - 'genome size needs to be adjusted accordingly. ' - 'A table of values is available here: ' - 'http://deeptools.readthedocs.io/en/latest/content/feature/effectiveGenomeSize.html .', - default=None, - type=int, - required=True) - - required.add_argument('--genome', '-g', - help='Genome in two bit format. Most genomes can be ' - 'found here: http://hgdownload.cse.ucsc.edu/gbdb/ ' - 'Search for the .2bit ending. Otherwise, fasta ' - 'files can be converted to 2bit using the UCSC ' - 'programm called faToTwoBit available for different ' - 'plattforms at ' - 'http://hgdownload.cse.ucsc.edu/admin/exe/', - metavar='2bit FILE', - required=True) - - required.add_argument('--GCbiasFrequenciesFile', '-freq', '-o', - help='Path to save the file containing ' - 'the observed and expected read frequencies per %%GC-' - 'content. This file is needed to run the ' - 'correctGCBias tool. This is a text file.', - type=argparse.FileType('w'), - metavar='FILE', - required=True) + required = parser.add_argument_group("Required arguments") + + required.add_argument( + "--bamfile", "-b", metavar="bam file", help="Sorted BAM file. ", required=True + ) + + required.add_argument( + "--effectiveGenomeSize", + help="The effective genome size is the portion " + "of the genome that is mappable. Large fractions of " + "the genome are stretches of NNNN that should be " + "discarded. Also, if repetitive regions were not " + "included in the mapping of reads, the effective " + "genome size needs to be adjusted accordingly. " + "A table of values is available here: " + "http://deeptools.readthedocs.io/en/latest/content/feature/effectiveGenomeSize.html .", + default=None, + type=int, + required=True, + ) + + required.add_argument( + "--genome", + "-g", + help="Genome in two bit format. Most genomes can be " + "found here: http://hgdownload.cse.ucsc.edu/gbdb/ " + "Search for the .2bit ending. Otherwise, fasta " + "files can be converted to 2bit using the UCSC " + "programm called faToTwoBit available for different " + "plattforms at " + "http://hgdownload.cse.ucsc.edu/admin/exe/", + metavar="2bit FILE", + required=True, + ) + + required.add_argument( + "--GCbiasFrequenciesFile", + "-freq", + "-o", + help="Path to save the file containing " + "the observed and expected read frequencies per %%GC-" + "content. This file is needed to run the " + "correctGCBias tool. This is a text file.", + type=argparse.FileType("w"), + metavar="FILE", + required=True, + ) # define the optional arguments - optional = parser.add_argument_group('Optional arguments') - optional.add_argument('--fragmentLength', '-l', - help='Fragment length used for the sequencing. If ' - 'paired-end reads are used, the fragment length is ' - 'computed based from the bam file', - type=int) - - optional.add_argument("--help", "-h", action="help", - help="show this help message and exit") - - optional.add_argument('--sampleSize', - default=5e7, - help='Number of sampling points to be considered. (Default: %(default)s)', - type=int) - - optional.add_argument('--extraSampling', - help='BED file containing genomic regions for which ' - 'extra sampling is required because they are ' - 'underrepresented in the genome.', - type=argparse.FileType('r'), - metavar='BED file') - - plot = parser.add_argument_group('Diagnostic plot options') - - plot.add_argument('--biasPlot', - metavar='FILE NAME', - help='If given, a diagnostic image summarizing ' - 'the GC-bias will be saved.') - - plot.add_argument('--plotFileFormat', - metavar='', - help='image format type. If given, this ' - 'option overrides the ' - 'image format based on the plotFile ending. ' - 'The available options are: "png", ' - '"eps", "pdf", "plotly" and "svg"', - choices=['png', 'pdf', 'svg', 'eps', 'plotly']) - - plot.add_argument('--regionSize', - metavar='INT', - type=int, - default=300, - help='To plot the reads per %%GC over a region' - 'the size of the region is required. By default, ' - 'the bin size is set to 300 bases, which is close to the ' - 'standard fragment size for Illumina machines. However, ' - 'if the depth of sequencing is low, a larger bin size ' - 'will be required, otherwise many bins will not ' - 'overlap with any read (Default: %(default)s)') + optional = parser.add_argument_group("Optional arguments") + optional.add_argument( + "--fragmentLength", + "-l", + help="Fragment length used for the sequencing. If " + "paired-end reads are used, the fragment length is " + "computed based from the bam file", + type=int, + ) + + optional.add_argument( + "--help", "-h", action="help", help="show this help message and exit" + ) + + optional.add_argument( + "--sampleSize", + default=5e7, + help="Number of sampling points to be considered. (Default: %(default)s)", + type=int, + ) + + optional.add_argument( + "--extraSampling", + help="BED file containing genomic regions for which " + "extra sampling is required because they are " + "underrepresented in the genome.", + type=argparse.FileType("r"), + metavar="BED file", + ) + + plot = parser.add_argument_group("Diagnostic plot options") + + plot.add_argument( + "--biasPlot", + metavar="FILE NAME", + help="If given, a diagnostic image summarizing " "the GC-bias will be saved.", + ) + + plot.add_argument( + "--plotFileFormat", + metavar="", + help="image format type. If given, this " + "option overrides the " + "image format based on the plotFile ending. " + 'The available options are: "png", ' + '"eps", "pdf", "plotly" and "svg"', + choices=["png", "pdf", "svg", "eps", "plotly"], + ) + + plot.add_argument( + "--regionSize", + metavar="INT", + type=int, + default=300, + help="To plot the reads per %%GC over a region" + "the size of the region is required. By default, " + "the bin size is set to 300 bases, which is close to the " + "standard fragment size for Illumina machines. However, " + "if the depth of sequencing is low, a larger bin size " + "will be required, otherwise many bins will not " + "overlap with any read (Default: %(default)s)", + ) return parser @@ -149,13 +171,13 @@ def getPositionsToSample(chrom, start, end, stepSize): """ positions_to_sample = np.arange(start, end, stepSize) - if global_vars['filter_out']: - filter_out_tree = GTF(global_vars['filter_out']) + if global_vars["filter_out"]: + filter_out_tree = GTF(global_vars["filter_out"]) else: filter_out_tree = None - if global_vars['extra_sampling_file']: - extra_tree = GTF(global_vars['extra_sampling_file']) + if global_vars["extra_sampling_file"]: + extra_tree = GTF(global_vars["extra_sampling_file"]) else: extra_tree = None @@ -168,14 +190,17 @@ def getPositionsToSample(chrom, start, end, stepSize): if len(extra_match) > 0: for intval in extra_match: - positions_to_sample = np.append(positions_to_sample, - list(range(intval[0], intval[1], stepSize))) + positions_to_sample = np.append( + positions_to_sample, list(range(intval[0], intval[1], stepSize)) + ) # remove duplicates positions_to_sample = np.unique(np.sort(positions_to_sample)) if debug: - print("sampling increased to {} from {}".format( - len(positions_to_sample), - orig_len)) + print( + "sampling increased to {} from {}".format( + len(positions_to_sample), orig_len + ) + ) # skip regions that are filtered out if filter_out_tree: @@ -186,8 +211,10 @@ def getPositionsToSample(chrom, start, end, stepSize): if len(out_match) > 0: for intval in out_match: - positions_to_sample = \ - positions_to_sample[(positions_to_sample < intval[0]) | (positions_to_sample >= intval[1])] + positions_to_sample = positions_to_sample[ + (positions_to_sample < intval[0]) + | (positions_to_sample >= intval[1]) + ] return positions_to_sample @@ -195,21 +222,20 @@ def countReadsPerGC_wrapper(args): return countReadsPerGC_worker(*args) -def countReadsPerGC_worker(chromNameBam, - start, end, stepSize, regionSize, - chrNameBamToBit, verbose=False): +def countReadsPerGC_worker( + chromNameBam, start, end, stepSize, regionSize, chrNameBamToBit, verbose=False +): """given a genome region defined by (start, end), the GC content is quantified for regions of size regionSize that are contiguous """ chromNameBit = chrNameBamToBit[chromNameBam] - tbit = py2bit.open(global_vars['2bit']) - bam = bamHandler.openBam(global_vars['bam']) + tbit = py2bit.open(global_vars["2bit"]) + bam = bamHandler.openBam(global_vars["bam"]) c = 1 sub_reads_per_gc = [] - positions_to_sample = getPositionsToSample(chromNameBit, - start, end, stepSize) + positions_to_sample = getPositionsToSample(chromNameBit, start, end, stepSize) for index in range(len(positions_to_sample)): i = positions_to_sample[index] @@ -235,10 +261,10 @@ def tabulateGCcontent_wrapper(args): return tabulateGCcontent_worker(*args) -def tabulateGCcontent_worker(chromNameBam, start, end, stepSize, - fragmentLength, - chrNameBamToBit, verbose=False): - r""" given genome regions, the GC content of the genome is tabulated for +def tabulateGCcontent_worker( + chromNameBam, start, end, stepSize, fragmentLength, chrNameBamToBit, verbose=False +): + r"""given genome regions, the GC content of the genome is tabulated for fragments of length 'fragmentLength' each 'stepSize' positions. >>> test = Tester() @@ -300,20 +326,20 @@ def tabulateGCcontent_worker(chromNameBam, start, end, stepSize, # indicate the gc content. The values inside the # array are counts. Thus, if N_gc[10] = 3, that means # that 3 regions have a gc_content of 10. - subN_gc = np.zeros(fragmentLength['median'] + 1, dtype='int') - subF_gc = np.zeros(fragmentLength['median'] + 1, dtype='int') + subN_gc = np.zeros(fragmentLength["median"] + 1, dtype="int") + subF_gc = np.zeros(fragmentLength["median"] + 1, dtype="int") - tbit = py2bit.open(global_vars['2bit']) - bam = bamHandler.openBam(global_vars['bam']) + tbit = py2bit.open(global_vars["2bit"]) + bam = bamHandler.openBam(global_vars["bam"]) peak = 0 startTime = time.time() if verbose: - print("[{:.3f}] computing positions to " - "sample".format(time.time() - startTime)) + print( + "[{:.3f}] computing positions to " "sample".format(time.time() - startTime) + ) - positions_to_sample = getPositionsToSample(chromNameBit, - start, end, stepSize) + positions_to_sample = getPositionsToSample(chromNameBit, start, end, stepSize) read_counts = [] # Optimize IO. @@ -332,16 +358,18 @@ def tabulateGCcontent_worker(chromNameBam, start, end, stepSize, if verbose: print("[{:.3f}] caching reads".format(time.time() - startTime)) - counts = np.bincount([r.pos - start_pos - for r in bam.fetch(chromNameBam, start_pos, - end_pos + 1) - if not r.is_reverse and not r.is_unmapped and r.pos >= start_pos], - minlength=end_pos - start_pos + 2) + counts = np.bincount( + [ + r.pos - start_pos + for r in bam.fetch(chromNameBam, start_pos, end_pos + 1) + if not r.is_reverse and not r.is_unmapped and r.pos >= start_pos + ], + minlength=end_pos - start_pos + 2, + ) read_counts = counts[positions_to_sample - min(positions_to_sample)] if verbose: - print("[{:.3f}] finish caching reads.".format( - time.time() - startTime)) + print("[{:.3f}] finish caching reads.".format(time.time() - startTime)) countTime = time.time() @@ -349,11 +377,17 @@ def tabulateGCcontent_worker(chromNameBam, start, end, stepSize, for index in range(len(positions_to_sample)): i = positions_to_sample[index] # stop if the end of the chromosome is reached - if i + fragmentLength['median'] > tbit.chroms(chromNameBit): + if i + fragmentLength["median"] > tbit.chroms(chromNameBit): break try: - gc = getGC_content(tbit, chromNameBit, int(i), int(i + fragmentLength['median']), fraction=False) + gc = getGC_content( + tbit, + chromNameBit, + int(i), + int(i + fragmentLength["median"]), + fraction=False, + ) except Exception as detail: if verbose: print(detail) @@ -363,12 +397,17 @@ def tabulateGCcontent_worker(chromNameBam, start, end, stepSize, # count all reads at position 'i' if len(read_counts) == 0: # case when no cache was done - num_reads = len([x.pos for x in bam.fetch(chromNameBam, i, i + 1) - if x.is_reverse is False and x.pos == i]) + num_reads = len( + [ + x.pos + for x in bam.fetch(chromNameBam, i, i + 1) + if x.is_reverse is False and x.pos == i + ] + ) else: num_reads = read_counts[index] - if num_reads >= global_vars['max_reads']: + if num_reads >= global_vars["max_reads"]: peak += 1 continue @@ -376,27 +415,58 @@ def tabulateGCcontent_worker(chromNameBam, start, end, stepSize, if verbose: if index % 50000 == 0: endTime = time.time() - print("%s processing %d (%.1f per sec) @ %s:%s-%s %s" % - (multiprocessing.current_process().name, - index, index / (endTime - countTime), - chromNameBit, start, end, stepSize)) + print( + "%s processing %d (%.1f per sec) @ %s:%s-%s %s" + % ( + multiprocessing.current_process().name, + index, + index / (endTime - countTime), + chromNameBit, + start, + end, + stepSize, + ) + ) c += 1 if verbose: endTime = time.time() - print("%s processing %d (%.1f per sec) @ %s:%s-%s %s" % - (multiprocessing.current_process().name, - index, index / (endTime - countTime), - chromNameBit, start, end, stepSize)) - print("%s total time %.1f @ %s:%s-%s %s" % (multiprocessing.current_process().name, - (endTime - startTime), chromNameBit, start, end, stepSize)) + print( + "%s processing %d (%.1f per sec) @ %s:%s-%s %s" + % ( + multiprocessing.current_process().name, + index, + index / (endTime - countTime), + chromNameBit, + start, + end, + stepSize, + ) + ) + print( + "%s total time %.1f @ %s:%s-%s %s" + % ( + multiprocessing.current_process().name, + (endTime - startTime), + chromNameBit, + start, + end, + stepSize, + ) + ) return subN_gc, subF_gc -def tabulateGCcontent(fragmentLength, chrNameBitToBam, stepSize, - chromSizes, numberOfProcessors=None, verbose=False, - region=None): +def tabulateGCcontent( + fragmentLength, + chrNameBitToBam, + stepSize, + chromSizes, + numberOfProcessors=None, + verbose=False, + region=None, +): r""" Subdivides the genome or the reads into chunks to be analyzed in parallel using several processors. This codes handles the creation of @@ -421,17 +491,17 @@ def tabulateGCcontent(fragmentLength, chrNameBitToBam, stepSize, global global_vars chrNameBamToBit = dict([(v, k) for k, v in chrNameBitToBam.items()]) - chunkSize = int(min(2e6, 4e5 / global_vars['reads_per_bp'])) + chunkSize = int(min(2e6, 4e5 / global_vars["reads_per_bp"])) chromSizes = [(k, v) for k, v in chromSizes if k in list(chrNameBamToBit.keys())] - imap_res = mapReduce.mapReduce((stepSize, - fragmentLength, chrNameBamToBit, - verbose), - tabulateGCcontent_wrapper, - chromSizes, - genomeChunkLength=chunkSize, - numberOfProcessors=numberOfProcessors, - region=region) + imap_res = mapReduce.mapReduce( + (stepSize, fragmentLength, chrNameBamToBit, verbose), + tabulateGCcontent_wrapper, + chromSizes, + genomeChunkLength=chunkSize, + numberOfProcessors=numberOfProcessors, + region=region, + ) for subN_gc, subF_gc in imap_res: try: @@ -442,20 +512,31 @@ def tabulateGCcontent(fragmentLength, chrNameBitToBam, stepSize, N_gc = subN_gc if sum(F_gc) == 0: - sys.exit("No fragments included in the sampling! Consider decreasing (or maybe increasing) the --sampleSize parameter") + sys.exit( + "No fragments included in the sampling! Consider decreasing (or maybe increasing) the --sampleSize parameter" + ) scaling = float(sum(N_gc)) / float(sum(F_gc)) - R_gc = np.array([float(F_gc[x]) / N_gc[x] * scaling - if N_gc[x] and F_gc[x] > 0 else 1 - for x in range(len(F_gc))]) + R_gc = np.array( + [ + float(F_gc[x]) / N_gc[x] * scaling if N_gc[x] and F_gc[x] > 0 else 1 + for x in range(len(F_gc)) + ] + ) data = np.transpose(np.vstack((F_gc, N_gc, R_gc))) return data -def countReadsPerGC(regionSize, chrNameBitToBam, stepSize, - chromSizes, numberOfProcessors=None, verbose=False, - region=None): +def countReadsPerGC( + regionSize, + chrNameBitToBam, + stepSize, + chromSizes, + numberOfProcessors=None, + verbose=False, + region=None, +): r""" Computes for a region of size regionSize, the GC of the region and the number of reads that overlap it. @@ -472,16 +553,16 @@ def countReadsPerGC(regionSize, chrNameBitToBam, stepSize, global global_vars chrNameBamToBit = dict([(v, k) for k, v in chrNameBitToBam.items()]) - chunkSize = int(min(2e6, 4e5 / global_vars['reads_per_bp'])) + chunkSize = int(min(2e6, 4e5 / global_vars["reads_per_bp"])) - imap_res = mapReduce.mapReduce((stepSize, - regionSize, chrNameBamToBit, - verbose), - countReadsPerGC_wrapper, - chromSizes, - genomeChunkLength=chunkSize, - numberOfProcessors=numberOfProcessors, - region=region) + imap_res = mapReduce.mapReduce( + (stepSize, regionSize, chrNameBamToBit, verbose), + countReadsPerGC_wrapper, + chromSizes, + genomeChunkLength=chunkSize, + numberOfProcessors=numberOfProcessors, + region=region, + ) reads_per_gc = [] for sub_reads_per_gc in imap_res: @@ -507,7 +588,7 @@ def smooth(x, window_len=3): if i < half_width or i + half_width + 1 > len(x): continue else: - y[i] = np.mean(x[i - half_width:i + half_width + 1]) + y[i] = np.mean(x[i - half_width : i + half_width + 1]) # clip low values, this avoid problems with zeros return y @@ -539,14 +620,44 @@ def plotlyGCbias(file_name, frequencies, reads_per_gc, region_size): import matplotlib.cbook as cbook fig = go.Figure() - fig['layout']['xaxis1'] = dict(domain=[0.0, 1.0], anchor="y1", title="GC fraction") - fig['layout']['yaxis1'] = dict(domain=[0.55, 1.0], anchor="x1", title="Number of reads") - fig['layout']['xaxis2'] = dict(domain=[0.0, 1.0], anchor="y2", title="GC fraction", range=[0.2, 0.7]) - fig['layout']['yaxis2'] = dict(domain=[0.0, 0.45], anchor="x2", title="log2(observed/expected)") + fig["layout"]["xaxis1"] = dict(domain=[0.0, 1.0], anchor="y1", title="GC fraction") + fig["layout"]["yaxis1"] = dict( + domain=[0.55, 1.0], anchor="x1", title="Number of reads" + ) + fig["layout"]["xaxis2"] = dict( + domain=[0.0, 1.0], anchor="y2", title="GC fraction", range=[0.2, 0.7] + ) + fig["layout"]["yaxis2"] = dict( + domain=[0.0, 0.45], anchor="x2", title="log2(observed/expected)" + ) text = "reads per {} base region".format(region_size) - annos = [{'yanchor': 'bottom', 'xref': 'paper', 'xanchor': 'center', 'yref': 'paper', 'text': text, 'y': 1.0, 'x': 0.5, 'font': {'size': 16}, 'showarrow': False}] + annos = [ + { + "yanchor": "bottom", + "xref": "paper", + "xanchor": "center", + "yref": "paper", + "text": text, + "y": 1.0, + "x": 0.5, + "font": {"size": 16}, + "showarrow": False, + } + ] text = "normalized observed/expected read counts" - annos.append({'yanchor': 'bottom', 'xref': 'paper', 'xanchor': 'center', 'yref': 'paper', 'text': text, 'y': 0.5, 'x': 0.5, 'font': {'size': 16}, 'showarrow': False}) + annos.append( + { + "yanchor": "bottom", + "xref": "paper", + "xanchor": "center", + "yref": "paper", + "text": text, + "y": 0.5, + "x": 0.5, + "font": {"size": 16}, + "showarrow": False, + } + ) # prepare data for boxplot reads, GC = reads_per_gc.T @@ -559,29 +670,58 @@ def plotlyGCbias(file_name, frequencies, reads_per_gc, region_size): bins = [] for b in reads_per_gc: s = cbook.boxplot_stats(b)[0] - bins.append([s['whislo'], s['q1'], s['q1'], s['med'], s['med'], s['med'], s['q3'], s['q3'], s['whishi']]) + bins.append( + [ + s["whislo"], + s["q1"], + s["q1"], + s["med"], + s["med"], + s["med"], + s["q3"], + s["q3"], + s["whishi"], + ] + ) data = [] # top plot for x, y in zip(bin_labels, bins): - trace = go.Box(x=x, y=y, xaxis='x1', yaxis='y1', boxpoints='outliers', showlegend=False, name="{}".format(x), line=dict(color='rgb(107,174,214)')) + trace = go.Box( + x=x, + y=y, + xaxis="x1", + yaxis="y1", + boxpoints="outliers", + showlegend=False, + name="{}".format(x), + line=dict(color="rgb(107,174,214)"), + ) data.append(trace) # bottom plot x = np.linspace(0, 1, frequencies.shape[0]) - trace = go.Scatter(x=x, y=np.log2(frequencies[:, 2]), xaxis='x2', yaxis='y2', showlegend=False, line=dict(color='rgb(107,174,214)')) + trace = go.Scatter( + x=x, + y=np.log2(frequencies[:, 2]), + xaxis="x2", + yaxis="y2", + showlegend=False, + line=dict(color="rgb(107,174,214)"), + ) data.append(trace) fig.add_traces(data) - fig['layout']['annotations'] = annos + fig["layout"]["annotations"] = annos py.plot(fig, filename=file_name, auto_open=False) def plotGCbias(file_name, frequencies, reads_per_gc, region_size, image_format=None): import matplotlib - matplotlib.use('Agg') - matplotlib.rcParams['pdf.fonttype'] = 42 - matplotlib.rcParams['svg.fonttype'] = 'none' + + matplotlib.use("Agg") + matplotlib.rcParams["pdf.fonttype"] = 42 + matplotlib.rcParams["svg.fonttype"] = "none" import matplotlib.pyplot as plt # prepare data for boxplot @@ -594,21 +734,20 @@ def plotGCbias(file_name, frequencies, reads_per_gc, region_size, image_format=N title = "reads per regions of {} bp".format(region_size) fig = plt.figure(figsize=(6, 8)) ax1 = fig.add_subplot(211, title=title) - ax2 = fig.add_subplot(212, - title='normalized observed/expected read counts') + ax2 = fig.add_subplot(212, title="normalized observed/expected read counts") # make boxplot bp = ax1.boxplot(reads_per_gc, notch=0, patch_artist=True) - plt.setp(bp['boxes'], color='black', facecolor='LightGreen') - plt.setp(bp['medians'], color='black') - plt.setp(bp['whiskers'], color='black', linestyle='dashed') - plt.setp(bp['fliers'], marker='None') + plt.setp(bp["boxes"], color="black", facecolor="LightGreen") + plt.setp(bp["medians"], color="black") + plt.setp(bp["whiskers"], color="black", linestyle="dashed") + plt.setp(bp["fliers"], marker="None") # get the whisker that spands the most - y_max = np.nanmax([x.get_data()[1][1] for x in bp['whiskers']]) + y_max = np.nanmax([x.get_data()[1][1] for x in bp["whiskers"]]) ax1.set_ylim(0 - (y_max * 0.05), y_max * 1.05) - ax1.set_ylabel('Number of reads') - ax1.set_xlabel('GC fraction') + ax1.set_ylabel("Number of reads") + ax1.set_xlabel("GC fraction") xticks = [idx for idx, x in enumerate(bin_labels) if int(x * 100) % 10 == 0] @@ -617,12 +756,12 @@ def plotGCbias(file_name, frequencies, reads_per_gc, region_size, image_format=N x = np.linspace(0, 1, frequencies.shape[0]) y = np.log2(frequencies[:, 2]) - ax2.plot(x, y, color='#8c96f0') - ax2.set_xlabel('GC fraction') - ax2.set_ylabel('log2ratio observed/expected') + ax2.plot(x, y, color="#8c96f0") + ax2.set_xlabel("GC fraction") + ax2.set_ylabel("log2ratio observed/expected") ax2.set_xlim(0.2, 0.7) - y_max = max(y[np.where(x >= 0.2)[0][0]:np.where(x <= 0.7)[0][-1] + 1]) - y_min = min(y[np.where(x >= 0.2)[0][0]:np.where(x <= 0.7)[0][-1] + 1]) + y_max = max(y[np.where(x >= 0.2)[0][0] : np.where(x <= 0.7)[0][-1] + 1]) + y_min = min(y[np.where(x >= 0.2)[0][0] : np.where(x <= 0.7)[0][-1] + 1]) if y_max > 0: y_max *= 1.1 else: @@ -633,7 +772,7 @@ def plotGCbias(file_name, frequencies, reads_per_gc, region_size, image_format=N y_min *= 0.9 ax2.set_ylim(y_min, y_max) plt.tight_layout() - plt.savefig(file_name, bbox_inches='tight', dpi=100, format=image_format) + plt.savefig(file_name, bbox_inches="tight", dpi=100, format=image_format) plt.close() @@ -648,42 +787,49 @@ def main(args=None): global global_vars global_vars = {} - global_vars['2bit'] = args.genome - global_vars['bam'] = args.bamfile - global_vars['filter_out'] = args.blackListFileName - global_vars['extra_sampling_file'] = extra_sampling_file + global_vars["2bit"] = args.genome + global_vars["bam"] = args.bamfile + global_vars["filter_out"] = args.blackListFileName + global_vars["extra_sampling_file"] = extra_sampling_file - tbit = py2bit.open(global_vars['2bit']) - bam, mapped, unmapped, stats = bamHandler.openBam(global_vars['bam'], returnStats=True, nThreads=args.numberOfProcessors) + tbit = py2bit.open(global_vars["2bit"]) + bam, mapped, unmapped, stats = bamHandler.openBam( + global_vars["bam"], returnStats=True, nThreads=args.numberOfProcessors + ) if args.fragmentLength: - fragment_len_dict = \ - {'median': args.fragmentLength} + fragment_len_dict = {"median": args.fragmentLength} else: - fragment_len_dict, __ = \ - get_read_and_fragment_length(args.bamfile, None, - numberOfProcessors=args.numberOfProcessors, - verbose=args.verbose) + fragment_len_dict, __ = get_read_and_fragment_length( + args.bamfile, + None, + numberOfProcessors=args.numberOfProcessors, + verbose=args.verbose, + ) if not fragment_len_dict: - print("\nPlease provide the fragment length used for the " - "sample preparation.\n") + print( + "\nPlease provide the fragment length used for the " + "sample preparation.\n" + ) exit(1) - fragment_len_dict = {'median': int(fragment_len_dict['median'])} + fragment_len_dict = {"median": int(fragment_len_dict["median"])} chrNameBitToBam = tbitToBamChrName(list(tbit.chroms().keys()), bam.references) - global_vars['genome_size'] = sum(tbit.chroms().values()) - global_vars['total_reads'] = mapped - global_vars['reads_per_bp'] = \ - float(global_vars['total_reads']) / args.effectiveGenomeSize + global_vars["genome_size"] = sum(tbit.chroms().values()) + global_vars["total_reads"] = mapped + global_vars["reads_per_bp"] = ( + float(global_vars["total_reads"]) / args.effectiveGenomeSize + ) confidence_p_value = float(1) / args.sampleSize # chromSizes: list of tuples - chromSizes = [(bam.references[i], bam.lengths[i]) - for i in range(len(bam.references))] + chromSizes = [ + (bam.references[i], bam.lengths[i]) for i in range(len(bam.references)) + ] chromSizes = [x for x in chromSizes if x[0] in tbit.chroms()] # use poisson distribution to identify peaks that should be discarted. @@ -693,107 +839,126 @@ def main(args=None): # empirically, a value of at least 4 times as big as the # reads_per_bp was found. # Similarly for the min value, I divide by 4. - global_vars['max_reads'] = poisson(4 * global_vars['reads_per_bp'] * fragment_len_dict['median']).isf(confidence_p_value) + global_vars["max_reads"] = poisson( + 4 * global_vars["reads_per_bp"] * fragment_len_dict["median"] + ).isf(confidence_p_value) # this may be of not use, unless the depth of sequencing is really high # as this value is close to 0 - global_vars['min_reads'] = poisson(0.25 * global_vars['reads_per_bp'] * fragment_len_dict['median']).ppf(confidence_p_value) + global_vars["min_reads"] = poisson( + 0.25 * global_vars["reads_per_bp"] * fragment_len_dict["median"] + ).ppf(confidence_p_value) for key in global_vars: print("{}: {}".format(key, global_vars[key])) print("computing frequencies") # the GC of the genome is sampled each stepSize bp. - stepSize = max(int(global_vars['genome_size'] / args.sampleSize), 1) + stepSize = max(int(global_vars["genome_size"] / args.sampleSize), 1) print("stepSize: {}".format(stepSize)) - data = tabulateGCcontent(fragment_len_dict, - chrNameBitToBam, stepSize, - chromSizes, - numberOfProcessors=args.numberOfProcessors, - verbose=args.verbose, - region=args.region) + data = tabulateGCcontent( + fragment_len_dict, + chrNameBitToBam, + stepSize, + chromSizes, + numberOfProcessors=args.numberOfProcessors, + verbose=args.verbose, + region=args.region, + ) np.savetxt(args.GCbiasFrequenciesFile.name, data) if args.biasPlot: - reads_per_gc = countReadsPerGC(args.regionSize, - chrNameBitToBam, stepSize * 10, - chromSizes, - numberOfProcessors=args.numberOfProcessors, - verbose=args.verbose, - region=args.region) + reads_per_gc = countReadsPerGC( + args.regionSize, + chrNameBitToBam, + stepSize * 10, + chromSizes, + numberOfProcessors=args.numberOfProcessors, + verbose=args.verbose, + region=args.region, + ) if args.plotFileFormat == "plotly": plotlyGCbias(args.biasPlot, data, reads_per_gc, args.regionSize) else: - plotGCbias(args.biasPlot, data, reads_per_gc, args.regionSize, image_format=args.plotFileFormat) + plotGCbias( + args.biasPlot, + data, + reads_per_gc, + args.regionSize, + image_format=args.plotFileFormat, + ) -class Tester(): +class Tester: def __init__(self): import os + self.root = os.path.dirname(os.path.abspath(__file__)) + "/test/test_corrGC/" self.tbitFile = self.root + "sequence.2bit" self.bamFile = self.root + "test.bam" self.mappability = self.root + "mappability.bw" - self.chrNameBam = '2L' - self.chrNameBit = 'chr2L' - bam, mapped, unmapped, stats = bamHandler.openBam(self.bamFile, returnStats=True) + self.chrNameBam = "2L" + self.chrNameBit = "chr2L" + bam, mapped, unmapped, stats = bamHandler.openBam( + self.bamFile, returnStats=True + ) tbit = py2bit.open(self.tbitFile) global debug debug = 0 global global_vars - global_vars = {'2bit': self.tbitFile, - 'bam': self.bamFile, - 'filter_out': None, - 'mappability': self.mappability, - 'extra_sampling_file': None, - 'max_reads': 5, - 'min_reads': 0, - 'min_reads': 0, - 'reads_per_bp': 0.3, - 'total_reads': mapped, - 'genome_size': sum(tbit.chroms().values()) - } + global_vars = { + "2bit": self.tbitFile, + "bam": self.bamFile, + "filter_out": None, + "mappability": self.mappability, + "extra_sampling_file": None, + "max_reads": 5, + "min_reads": 0, + "min_reads": 0, + "reads_per_bp": 0.3, + "total_reads": mapped, + "genome_size": sum(tbit.chroms().values()), + } def testTabulateGCcontentWorker(self): stepSize = 2 - fragmentLength = {'min': 1, 'median': 3, 'max': 5} + fragmentLength = {"min": 1, "median": 3, "max": 5} start = 0 end = 20 - chrNameBam2bit = {'2L': 'chr2L'} - return (self.chrNameBam, - start, end, stepSize, fragmentLength, chrNameBam2bit) + chrNameBam2bit = {"2L": "chr2L"} + return (self.chrNameBam, start, end, stepSize, fragmentLength, chrNameBam2bit) def set_filter_out_file(self): global global_vars - global_vars['filter_out'] = self.root + "filter_out.bed" + global_vars["filter_out"] = self.root + "filter_out.bed" def unset_filter_out_file(self): global global_vars - global_vars['filter_out'] = None + global_vars["filter_out"] = None def set_extra_sampling_file(self): global global_vars - global_vars['extra_sampling_file'] = self.root + "extra_sampling.bed" + global_vars["extra_sampling_file"] = self.root + "extra_sampling.bed" def testTabulateGCcontent(self): - fragmentLength = {'median': 10} - chrNameBitToBam = {'chr2L': '2L'} + fragmentLength = {"median": 10} + chrNameBitToBam = {"chr2L": "2L"} stepSize = 1 - bam = bamHandler.openBam(global_vars['bam']) - chromSizes = [(bam.references[i], bam.lengths[i]) - for i in range(len(bam.references))] - return (fragmentLength, - chrNameBitToBam, stepSize, chromSizes, 1) + bam = bamHandler.openBam(global_vars["bam"]) + chromSizes = [ + (bam.references[i], bam.lengths[i]) for i in range(len(bam.references)) + ] + return (fragmentLength, chrNameBitToBam, stepSize, chromSizes, 1) def testCountReadsPerGC(self): regionSize = 300 - chrNameBitToBam = {'chr2L': '2L'} + chrNameBitToBam = {"chr2L": "2L"} stepSize = 1 - bam = bamHandler.openBam(global_vars['bam']) - chromSizes = [(bam.references[i], bam.lengths[i]) - for i in range(len(bam.references))] - return (regionSize, - chrNameBitToBam, stepSize, chromSizes, 1) + bam = bamHandler.openBam(global_vars["bam"]) + chromSizes = [ + (bam.references[i], bam.lengths[i]) for i in range(len(bam.references)) + ] + return (regionSize, chrNameBitToBam, stepSize, chromSizes, 1) if __name__ == "__main__": diff --git a/deeptools/correctGCBias.py b/deeptools/correctGCBias.py index 1154b93688..3b1e4e7247 100755 --- a/deeptools/correctGCBias.py +++ b/deeptools/correctGCBias.py @@ -20,7 +20,7 @@ from deeptools import utilities from deeptools.bamHandler import openBam -old_settings = np.seterr(all='ignore') +old_settings = np.seterr(all="ignore") def parse_arguments(args=None): @@ -29,21 +29,22 @@ def parse_arguments(args=None): parser = argparse.ArgumentParser( parents=[requiredArgs, parentParser], formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description='This tool corrects the GC-bias using the' - ' method proposed by [Benjamini & Speed (2012). ' - 'Nucleic Acids Research, 40(10)]. It will remove reads' - ' from regions with too high coverage compared to the' - ' expected values (typically GC-rich regions) and will' - ' add reads to regions where too few reads are seen ' - '(typically AT-rich regions). ' - 'The tool ``computeGCBias`` needs to be run first to generate the ' - 'frequency table needed here.', - usage='correctGCBias ' - '-b file.bam --effectiveGenomeSize 2150570000 -g mm9.2bit ' - '--GCbiasFrequenciesFile freq.txt -o gc_corrected.bam\n' - 'help: correctGCBias -h / correctGCBias --help', - conflict_handler='resolve', - add_help=False) + description="This tool corrects the GC-bias using the" + " method proposed by [Benjamini & Speed (2012). " + "Nucleic Acids Research, 40(10)]. It will remove reads" + " from regions with too high coverage compared to the" + " expected values (typically GC-rich regions) and will" + " add reads to regions where too few reads are seen " + "(typically AT-rich regions). " + "The tool ``computeGCBias`` needs to be run first to generate the " + "frequency table needed here.", + usage="correctGCBias " + "-b file.bam --effectiveGenomeSize 2150570000 -g mm9.2bit " + "--GCbiasFrequenciesFile freq.txt -o gc_corrected.bam\n" + "help: correctGCBias -h / correctGCBias --help", + conflict_handler="resolve", + add_help=False, + ) return parser @@ -56,59 +57,74 @@ def process_args(args=None): def getRequiredArgs(): parser = argparse.ArgumentParser(add_help=False) - required = parser.add_argument_group('Required arguments') + required = parser.add_argument_group("Required arguments") # define the arguments - required.add_argument('--bamfile', '-b', - metavar='BAM file', - help='Sorted BAM file to correct.', - required=True) - required.add_argument('--effectiveGenomeSize', - help='The effective genome size is the portion ' - 'of the genome that is mappable. Large fractions of ' - 'the genome are stretches of NNNN that should be ' - 'discarded. Also, if repetitive regions were not ' - 'included in the mapping of reads, the effective ' - 'genome size needs to be adjusted accordingly. ' - 'A table of values is available here: ' - 'http://deeptools.readthedocs.io/en/latest/content/feature/effectiveGenomeSize.html .', - default=None, - type=int, - required=True) - - required.add_argument('--genome', '-g', - help='Genome in two bit format. Most genomes can be ' - 'found here: http://hgdownload.cse.ucsc.edu/gbdb/ ' - 'Search for the .2bit ending. Otherwise, fasta ' - 'files can be converted to 2bit using faToTwoBit ' - 'available here: ' - 'http://hgdownload.cse.ucsc.edu/admin/exe/', - metavar='two bit file', - required=True) - - required.add_argument('--GCbiasFrequenciesFile', '-freq', - help='Indicate the output file from ' - 'computeGCBias containing ' - 'the observed and expected read frequencies per GC-' - 'content.', - type=argparse.FileType('r'), - metavar='FILE', - required=True) - - output = parser.add_argument_group('Output options') - output.add_argument('--correctedFile', '-o', - help='Name of the corrected file. The ending will ' - 'be used to decide the output file format. The options ' - 'are ".bam", ".bw" for a bigWig file, ".bg" for a ' - 'bedGraph file.', - metavar='FILE', - type=argparse.FileType('w'), - required=True) + required.add_argument( + "--bamfile", + "-b", + metavar="BAM file", + help="Sorted BAM file to correct.", + required=True, + ) + required.add_argument( + "--effectiveGenomeSize", + help="The effective genome size is the portion " + "of the genome that is mappable. Large fractions of " + "the genome are stretches of NNNN that should be " + "discarded. Also, if repetitive regions were not " + "included in the mapping of reads, the effective " + "genome size needs to be adjusted accordingly. " + "A table of values is available here: " + "http://deeptools.readthedocs.io/en/latest/content/feature/effectiveGenomeSize.html .", + default=None, + type=int, + required=True, + ) + + required.add_argument( + "--genome", + "-g", + help="Genome in two bit format. Most genomes can be " + "found here: http://hgdownload.cse.ucsc.edu/gbdb/ " + "Search for the .2bit ending. Otherwise, fasta " + "files can be converted to 2bit using faToTwoBit " + "available here: " + "http://hgdownload.cse.ucsc.edu/admin/exe/", + metavar="two bit file", + required=True, + ) + + required.add_argument( + "--GCbiasFrequenciesFile", + "-freq", + help="Indicate the output file from " + "computeGCBias containing " + "the observed and expected read frequencies per GC-" + "content.", + type=argparse.FileType("r"), + metavar="FILE", + required=True, + ) + + output = parser.add_argument_group("Output options") + output.add_argument( + "--correctedFile", + "-o", + help="Name of the corrected file. The ending will " + "be used to decide the output file format. The options " + 'are ".bam", ".bw" for a bigWig file, ".bg" for a ' + "bedGraph file.", + metavar="FILE", + type=argparse.FileType("w"), + required=True, + ) # define the optional arguments - optional = parser.add_argument_group('Optional arguments') - optional.add_argument("--help", "-h", action="help", - help="show this help message and exit") + optional = parser.add_argument_group("Optional arguments") + optional.add_argument( + "--help", "-h", action="help", help="show this help message and exit" + ) return parser @@ -181,8 +197,8 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step): i = 0 - tbit = py2bit.open(global_vars['2bit']) - bam = openBam(global_vars['bam']) + tbit = py2bit.open(global_vars["2bit"]) + bam = openBam(global_vars["bam"]) read_repetitions = 0 removed_duplicated_reads = 0 startTime = time.time() @@ -191,8 +207,7 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step): # r.flag & 4 == 0 is to skip unmapped # reads that nevertheless are asigned # to a genomic position - reads = [r for r in bam.fetch(chrNameBam, start, end) - if r.flag & 4 == 0] + reads = [r for r in bam.fetch(chrNameBam, start, end) if r.flag & 4 == 0] bam.close() @@ -203,8 +218,7 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step): r_index += 1 try: # calculate GC content of read fragment - gc = getReadGCcontent(tbit, read, fragmentLength, - chrNameBit) + gc = getReadGCcontent(tbit, read, fragmentLength, chrNameBit) except Exception as detail: print(detail) """ this exception happens when the end of a @@ -214,18 +228,23 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step): continue # is this read in the same orientation and position as the previous? - if r_index > 0 and read.pos == reads[r_index - 1].pos and \ - read.is_reverse == reads[r_index - 1].is_reverse \ - and read.pnext == reads[r_index - 1].pnext: + if ( + r_index > 0 + and read.pos == reads[r_index - 1].pos + and read.is_reverse == reads[r_index - 1].is_reverse + and read.pnext == reads[r_index - 1].pnext + ): read_repetitions += 1 - if read_repetitions >= global_vars['max_dup_gc'][gc]: + if read_repetitions >= global_vars["max_dup_gc"][gc]: removed_duplicated_reads += 1 continue else: read_repetitions = 0 try: - fragmentStart, fragmentEnd = getFragmentFromRead(read, fragmentLength, extendPairedEnds=True) + fragmentStart, fragmentEnd = getFragmentFromRead( + read, fragmentLength, extendPairedEnds=True + ) vectorStart = max(fragmentStart - start, 0) vectorEnd = min(fragmentEnd - start, end - start) except TypeError: @@ -239,25 +258,33 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step): try: if debug: endTime = time.time() - print("{}, processing {} ({:.1f} per sec) " - "reads @ {}:{}-{}".format(multiprocessing.current_process().name, - i, i / (endTime - startTime), - chrNameBit, start, end)) + print( + "{}, processing {} ({:.1f} per sec) " + "reads @ {}:{}-{}".format( + multiprocessing.current_process().name, + i, + i / (endTime - startTime), + chrNameBit, + start, + end, + ) + ) except NameError: pass if i == 0: return None - _file = open(utilities.getTempFileName(suffix='.bg'), 'w') + _file = open(utilities.getTempFileName(suffix=".bg"), "w") # save in bedgraph format for bin in range(0, len(cvg_corr), step): - value = np.mean(cvg_corr[bin:min(bin + step, end)]) + value = np.mean(cvg_corr[bin : min(bin + step, end)]) if value > 0: writeStart = start + bin writeEnd = min(start + bin + step, end) - _file.write("%s\t%d\t%d\t%.1f\n" % (chrNameBit, writeStart, - writeEnd, value)) + _file.write( + "%s\t%d\t%d\t%.1f\n" % (chrNameBit, writeStart, writeEnd, value) + ) tempFileName = _file.name _file.close() @@ -288,10 +315,15 @@ def writeCorrectedSam_wrapper(args): return writeCorrectedSam_worker(*args) -def writeCorrectedSam_worker(chrNameBam, chrNameBit, start, end, - step=None, - tag_but_not_change_number=False, - verbose=True): +def writeCorrectedSam_worker( + chrNameBam, + chrNameBit, + start, + end, + step=None, + tag_but_not_change_number=False, + verbose=True, +): r""" Writes a BAM file, deleting and adding some reads in order to compensate for the GC bias. **This is a stochastic method.** @@ -333,12 +365,12 @@ def writeCorrectedSam_worker(chrNameBam, chrNameBit, start, end, print("Sam for %s %s %s " % (chrNameBit, start, end)) i = 0 - tbit = py2bit.open(global_vars['2bit']) + tbit = py2bit.open(global_vars["2bit"]) - bam = openBam(global_vars['bam']) - tempFileName = utilities.getTempFileName(suffix='.bam') + bam = openBam(global_vars["bam"]) + tempFileName = utilities.getTempFileName(suffix=".bam") - outfile = pysam.Samfile(tempFileName, 'wb', template=bam) + outfile = pysam.Samfile(tempFileName, "wb", template=bam) startTime = time.time() matePairs = {} read_repetitions = 0 @@ -347,8 +379,11 @@ def writeCorrectedSam_worker(chrNameBam, chrNameBit, start, end, # cache data # r.flag & 4 == 0 is to filter unmapped reads that # have a genomic position - reads = [r for r in bam.fetch(chrNameBam, start, end) - if r.pos > start and r.flag & 4 == 0] + reads = [ + r + for r in bam.fetch(chrNameBam, start, end) + if r.pos > start and r.flag & 4 == 0 + ] r_index = -1 for read in reads: @@ -361,26 +396,29 @@ def writeCorrectedSam_worker(chrNameBam, chrNameBit, start, end, # check if a mate has already been procesed # to apply the same correction try: - copies = matePairs[read.qname]['copies'] - gc = matePairs[read.qname]['gc'] + copies = matePairs[read.qname]["copies"] + gc = matePairs[read.qname]["gc"] del matePairs[read.qname] except: # this exception happens when a mate is # not present. This could # happen because of removal of the mate # by some filtering - gc = getReadGCcontent(tbit, read, fragmentLength, - chrNameBit) + gc = getReadGCcontent(tbit, read, fragmentLength, chrNameBit) if gc: copies = numCopiesOfRead(float(1) / R_gc[gc]) else: copies = 1 # is this read in the same orientation and position as the previous? - if gc and r_index > 0 and read.pos == reads[r_index - 1].pos \ - and read.is_reverse == reads[r_index - 1].is_reverse \ - and read.pnext == reads[r_index - 1].pnext: + if ( + gc + and r_index > 0 + and read.pos == reads[r_index - 1].pos + and read.is_reverse == reads[r_index - 1].is_reverse + and read.pnext == reads[r_index - 1].pnext + ): read_repetitions += 1 - if read_repetitions >= global_vars['max_dup_gc'][gc]: + if read_repetitions >= global_vars["max_dup_gc"][gc]: copies = 0 # in other words do not take into account this read removed_duplicated_reads += 1 else: @@ -405,23 +443,23 @@ def writeCorrectedSam_worker(chrNameBam, chrNameBit, start, end, replace_tags = True if gc: - GC = int(100 * np.round(float(gc) / fragmentLength, - decimals=2)) - readTag.append( - ('YC', float(round(float(1) / R_gc[gc], 2)), "f")) - readTag.append(('YN', copies, "i")) + GC = int(100 * np.round(float(gc) / fragmentLength, decimals=2)) + readTag.append(("YC", float(round(float(1) / R_gc[gc], 2)), "f")) + readTag.append(("YN", copies, "i")) else: GC = -1 - readTag.append(('YG', GC, "i")) + readTag.append(("YG", GC, "i")) if replace_tags: read.set_tags(readTag) - if read.is_paired and read.is_proper_pair \ - and not read.mate_is_unmapped \ - and not read.is_reverse: - matePairs[readName] = {'copies': copies, - 'gc': gc} + if ( + read.is_paired + and read.is_proper_pair + and not read.mate_is_unmapped + and not read.is_reverse + ): + matePairs[readName] = {"copies": copies, "gc": gc} """ outfile.write(read) @@ -440,23 +478,40 @@ def writeCorrectedSam_worker(chrNameBam, chrNameBit, start, end, if verbose: if i % 500000 == 0 and i > 0: endTime = time.time() - print("{}, processing {} ({:.1f} per sec) reads " - "@ {}:{}-{}".format(multiprocessing.current_process().name, - i, i / (endTime - startTime), - chrNameBit, start, end)) + print( + "{}, processing {} ({:.1f} per sec) reads " + "@ {}:{}-{}".format( + multiprocessing.current_process().name, + i, + i / (endTime - startTime), + chrNameBit, + start, + end, + ) + ) i += 1 outfile.close() if verbose: endTime = time.time() - print("{}, processing {} ({:.1f} per sec) reads " - "@ {}:{}-{}".format(multiprocessing.current_process().name, - i, i / (endTime - startTime), - chrNameBit, start, end)) - percentage = float(removed_duplicated_reads) * 100 / len(reads) \ - if len(reads) > 0 else 0 - print("duplicated reads removed %d of %d (%.2f) " % - (removed_duplicated_reads, len(reads), percentage)) + print( + "{}, processing {} ({:.1f} per sec) reads " + "@ {}:{}-{}".format( + multiprocessing.current_process().name, + i, + i / (endTime - startTime), + chrNameBit, + start, + end, + ) + ) + percentage = ( + float(removed_duplicated_reads) * 100 / len(reads) if len(reads) > 0 else 0 + ) + print( + "duplicated reads removed %d of %d (%.2f) " + % (removed_duplicated_reads, len(reads), percentage) + ) return tempFileName @@ -542,10 +597,10 @@ def run_shell_command(command): subprocess.check_call(command, shell=True) except subprocess.CalledProcessError as error: - sys.stderr.write('Error{}\n'.format(error)) + sys.stderr.write("Error{}\n".format(error)) exit(1) except Exception as error: - sys.stderr.write('Error: {}\n'.format(error)) + sys.stderr.write("Error: {}\n".format(error)) exit(1) @@ -561,42 +616,47 @@ def main(args=None): global global_vars global_vars = {} - global_vars['2bit'] = args.genome - global_vars['bam'] = args.bamfile + global_vars["2bit"] = args.genome + global_vars["bam"] = args.bamfile # compute the probability to find more than one read (a redundant read) # at a certain position based on the gc of the read fragment # the binomial function is used for that - max_dup_gc = [binom.isf(1e-7, F_gc[x], 1.0 / N_gc[x]) - if F_gc[x] > 0 and N_gc[x] > 0 else 1 - for x in range(len(F_gc))] + max_dup_gc = [ + binom.isf(1e-7, F_gc[x], 1.0 / N_gc[x]) if F_gc[x] > 0 and N_gc[x] > 0 else 1 + for x in range(len(F_gc)) + ] - global_vars['max_dup_gc'] = max_dup_gc + global_vars["max_dup_gc"] = max_dup_gc - tbit = py2bit.open(global_vars['2bit']) - bam, mapped, unmapped, stats = openBam(args.bamfile, returnStats=True, nThreads=args.numberOfProcessors) + tbit = py2bit.open(global_vars["2bit"]) + bam, mapped, unmapped, stats = openBam( + args.bamfile, returnStats=True, nThreads=args.numberOfProcessors + ) - global_vars['genome_size'] = sum(tbit.chroms().values()) - global_vars['total_reads'] = mapped - global_vars['reads_per_bp'] = \ - float(global_vars['total_reads']) / args.effectiveGenomeSize + global_vars["genome_size"] = sum(tbit.chroms().values()) + global_vars["total_reads"] = mapped + global_vars["reads_per_bp"] = ( + float(global_vars["total_reads"]) / args.effectiveGenomeSize + ) # apply correction print("applying correction") # divide the genome in fragments containing about 4e5 reads. # This amount of reads takes about 20 seconds # to process per core (48 cores, 256 Gb memory) - chunkSize = int(4e5 / global_vars['reads_per_bp']) + chunkSize = int(4e5 / global_vars["reads_per_bp"]) # chromSizes: list of tuples - chromSizes = [(bam.references[i], bam.lengths[i]) - for i in range(len(bam.references))] + chromSizes = [ + (bam.references[i], bam.lengths[i]) for i in range(len(bam.references)) + ] regionStart = 0 if args.region: - chromSizes, regionStart, regionEnd, chunkSize = \ - mapReduce.getUserRegion(chromSizes, args.region, - max_chunk_size=chunkSize) + chromSizes, regionStart, regionEnd, chunkSize = mapReduce.getUserRegion( + chromSizes, args.region, max_chunk_size=chunkSize + ) print("genome partition size for multiprocessing: {}".format(chunkSize)) print("using region {}".format(args.region)) @@ -617,20 +677,21 @@ def main(args=None): print("Reads in this chromosome will be skipped") continue length = min(size, i + chunkSize) - mp_args.append((chrom, chrNameBamToBit[chrom], i, length, - bedGraphStep)) + mp_args.append((chrom, chrNameBamToBit[chrom], i, length, bedGraphStep)) c += 1 pool = multiprocessing.Pool(args.numberOfProcessors) - if args.correctedFile.name.endswith('bam'): + if args.correctedFile.name.endswith("bam"): if len(mp_args) > 1 and args.numberOfProcessors > 1: - print(("using {} processors for {} " - "number of tasks".format(args.numberOfProcessors, - len(mp_args)))) - - res = pool.map_async( - writeCorrectedSam_wrapper, mp_args).get(9999999) + print( + ( + "using {} processors for {} " + "number of tasks".format(args.numberOfProcessors, len(mp_args)) + ) + ) + + res = pool.map_async(writeCorrectedSam_wrapper, mp_args).get(9999999) else: res = list(map(writeCorrectedSam_wrapper, mp_args)) @@ -655,8 +716,7 @@ def main(args=None): for tempFileName in res: os.remove(tempFileName) - if args.correctedFile.name.endswith('bg') or \ - args.correctedFile.name.endswith('bw'): + if args.correctedFile.name.endswith("bg") or args.correctedFile.name.endswith("bw"): if len(mp_args) > 1 and args.numberOfProcessors > 1: @@ -666,11 +726,11 @@ def main(args=None): oname = args.correctedFile.name args.correctedFile.close() - if oname.endswith('bg'): - f = open(oname, 'wb') + if oname.endswith("bg"): + f = open(oname, "wb") for tempFileName in res: if tempFileName: - shutil.copyfileobj(open(tempFileName, 'rb'), f) + shutil.copyfileobj(open(tempFileName, "rb"), f) os.remove(tempFileName) f.close() else: @@ -678,68 +738,66 @@ def main(args=None): writeBedGraph.bedGraphToBigWig(chromSizes, res, oname) -class Tester(): +class Tester: def __init__(self): import os + self.root = os.path.dirname(os.path.abspath(__file__)) + "/test/test_corrGC/" self.tbitFile = self.root + "sequence.2bit" self.bamFile = self.root + "test.bam" - self.chrNameBam = '2L' - self.chrNameBit = 'chr2L' + self.chrNameBam = "2L" + self.chrNameBit = "chr2L" bam, mapped, unmapped, stats = openBam(self.bamFile, returnStats=True) tbit = py2bit.open(self.tbitFile) global debug debug = 0 global global_vars - global_vars = {'2bit': self.tbitFile, - 'bam': self.bamFile, - 'filter_out': None, - 'extra_sampling_file': None, - 'max_reads': 5, - 'min_reads': 0, - 'min_reads': 0, - 'reads_per_bp': 0.3, - 'total_reads': mapped, - 'genome_size': sum(tbit.chroms().values())} + global_vars = { + "2bit": self.tbitFile, + "bam": self.bamFile, + "filter_out": None, + "extra_sampling_file": None, + "max_reads": 5, + "min_reads": 0, + "min_reads": 0, + "reads_per_bp": 0.3, + "total_reads": mapped, + "genome_size": sum(tbit.chroms().values()), + } def testWriteCorrectedChunk(self): - """ prepare arguments for test - """ + """prepare arguments for test""" global R_gc, R_gc_min, R_gc_max R_gc = np.loadtxt(self.root + "R_gc_paired.txt") - global_vars['max_dup_gc'] = np.ones(301) + global_vars["max_dup_gc"] = np.ones(301) start = 200 end = 300 bedGraphStep = 25 - return (self.chrNameBam, - self.chrNameBit, start, end, bedGraphStep) + return (self.chrNameBam, self.chrNameBit, start, end, bedGraphStep) def testWriteCorrectedSam(self): - """ prepare arguments for test - """ + """prepare arguments for test""" global R_gc, R_gc_min, R_gc_max R_gc = np.loadtxt(self.root + "R_gc_paired.txt") - global_vars['max_dup_gc'] = np.ones(301) + global_vars["max_dup_gc"] = np.ones(301) start = 200 end = 250 - return (self.chrNameBam, - self.chrNameBit, start, end) + return (self.chrNameBam, self.chrNameBit, start, end) def testWriteCorrectedSam_paired(self): - """ prepare arguments for test. - """ + """prepare arguments for test.""" global R_gc, R_gc_min, R_gc_max R_gc = np.loadtxt(self.root + "R_gc_paired.txt") start = 0 end = 500 global global_vars - global_vars['bam'] = self.root + "paired.bam" - return 'chr2L', 'chr2L', start, end + global_vars["bam"] = self.root + "paired.bam" + return "chr2L", "chr2L", start, end if __name__ == "__main__": diff --git a/deeptools/heatmapper_utilities.py b/deeptools/heatmapper_utilities.py index aa3559096f..13d1bb47ed 100644 --- a/deeptools/heatmapper_utilities.py +++ b/deeptools/heatmapper_utilities.py @@ -1,19 +1,21 @@ import numpy as np import matplotlib -matplotlib.use('Agg') -matplotlib.rcParams['pdf.fonttype'] = 42 -matplotlib.rcParams['svg.fonttype'] = 'none' + +matplotlib.use("Agg") +matplotlib.rcParams["pdf.fonttype"] = 42 +matplotlib.rcParams["svg.fonttype"] = "none" from deeptools import cm # noqa: F401 import matplotlib.colors as pltcolors import plotly.graph_objs as go -old_settings = np.seterr(all='ignore') +old_settings = np.seterr(all="ignore") debug = 0 if debug: from ipdb import set_trace -def plot_single(ax, ma, average_type, color, label, plot_type='lines'): + +def plot_single(ax, ma, average_type, color, label, plot_type="lines"): """ Adds a line to the plot in the given ax using the specified method @@ -68,7 +70,7 @@ def plot_single(ax, ma, average_type, color, label, plot_type='lines'): """ - if isinstance(ma,list): + if isinstance(ma, list): summary_list = [] for ma_each in ma: tmp = np.ma.__getattribute__(average_type)(ma_each, axis=0) @@ -82,13 +84,13 @@ def plot_single(ax, ma, average_type, color, label, plot_type='lines'): if isinstance(color, np.ndarray): color = pltcolors.to_hex(color, keep_alpha=True) ax.plot(x, summary, color=color, label=label, alpha=0.9) - if plot_type == 'fill': - ax.fill_between(x, summary, facecolor=color, alpha=0.6, edgecolor='none') + if plot_type == "fill": + ax.fill_between(x, summary, facecolor=color, alpha=0.6, edgecolor="none") if debug: set_trace() - if plot_type in ['se', 'std']: - if plot_type == 'se': # standard error + if plot_type in ["se", "std"]: + if plot_type == "se": # standard error std = np.std(ma, axis=0) / np.sqrt(ma.shape[0]) else: std = np.std(ma, axis=0) @@ -98,39 +100,49 @@ def plot_single(ax, ma, average_type, color, label, plot_type='lines'): # between the mean (or median etc.) and the std or se f_color = pltcolors.colorConverter.to_rgba(color, alpha) - ax.fill_between(x, summary, summary + std, facecolor=f_color, edgecolor='none') - ax.fill_between(x, summary, summary - std, facecolor=f_color, edgecolor='none') + ax.fill_between(x, summary, summary + std, facecolor=f_color, edgecolor="none") + ax.fill_between(x, summary, summary - std, facecolor=f_color, edgecolor="none") ax.set_xlim(0, max(x)) return ax -def plotly_single(ma, average_type, color, label, plot_type='line'): +def plotly_single(ma, average_type, color, label, plot_type="line"): """A plotly version of plot_single. Returns a list of traces""" summary = list(np.ma.__getattribute__(average_type)(ma, axis=0)) x = list(np.arange(len(summary))) if isinstance(color, str): color = list(matplotlib.colors.to_rgb(color)) - traces = [go.Scatter(x=x, y=summary, name=label, line={'color': "rgba({},{},{},0.9)".format(color[0], color[1], color[2])}, showlegend=False)] - if plot_type == 'fill': - traces[0].update(fill='tozeroy', fillcolor=color) - - if plot_type in ['se', 'std']: - if plot_type == 'se': # standard error + traces = [ + go.Scatter( + x=x, + y=summary, + name=label, + line={"color": "rgba({},{},{},0.9)".format(color[0], color[1], color[2])}, + showlegend=False, + ) + ] + if plot_type == "fill": + traces[0].update(fill="tozeroy", fillcolor=color) + + if plot_type in ["se", "std"]: + if plot_type == "se": # standard error std = np.std(ma, axis=0) / np.sqrt(ma.shape[0]) else: std = np.std(ma, axis=0) x_rev = x[::-1] lower = summary - std - trace = go.Scatter(x=x + x_rev, - y=np.concatenate([summary + std, lower[::-1]]), - fill='tozerox', - fillcolor="rgba({},{},{},0.2)".format(color[0], color[1], color[2]), - line=go.Line(color='transparent'), - showlegend=False, - name=label) + trace = go.Scatter( + x=x + x_rev, + y=np.concatenate([summary + std, lower[::-1]]), + fill="tozerox", + fillcolor="rgba({},{},{},0.2)".format(color[0], color[1], color[2]), + line=go.Line(color="transparent"), + showlegend=False, + name=label, + ) traces.append(trace) return traces @@ -148,42 +160,44 @@ def getProfileTicks(hm, referencePointLabel, startLabel, endLabel, idx): As of matplotlib 3.1 there is no longer padding added to all ticks. Reference point ticks will be adjusted by width/2 or width for spacing and the last half of scaled ticks will be shifed by 1 bin so the ticks are at the beginning of bins. """ - w = hm.parameters['bin size'] - b = hm.parameters['upstream'] - a = hm.parameters['downstream'] + w = hm.parameters["bin size"] + b = hm.parameters["upstream"] + a = hm.parameters["downstream"] if idx is not None: w = w[idx] b = b[idx] a = a[idx] try: - c = hm.parameters['unscaled 5 prime'] + c = hm.parameters["unscaled 5 prime"] if idx is not None: c = c[idx] except: c = 0 try: - d = hm.parameters['unscaled 3 prime'] + d = hm.parameters["unscaled 3 prime"] if idx is not None: d = d[idx] except: d = 0 - m = hm.parameters['body'] + m = hm.parameters["body"] if idx is not None: m = m[idx] if b < 1e5: quotient = 1000 - symbol = 'Kb' + symbol = "Kb" else: quotient = 1e6 - symbol = 'Mb' + symbol = "Mb" if m == 0: xticks = [(k / w) for k in [0, b - 0.5 * w, b + a - w]] - xtickslabel = ['{0:.1f}'.format(-(float(b) / quotient)), - referencePointLabel, - '{0:.1f}{1}'.format(float(a) / quotient, symbol)] + xtickslabel = [ + "{0:.1f}".format(-(float(b) / quotient)), + referencePointLabel, + "{0:.1f}{1}".format(float(a) / quotient, symbol), + ] else: xticks_values = [0] xtickslabel = [] @@ -191,7 +205,7 @@ def getProfileTicks(hm, referencePointLabel, startLabel, endLabel, idx): # only if upstream region is set, add a x tick if b > 0: xticks_values.append(b) - xtickslabel.append('{0:.1f}'.format(-(float(b) / quotient))) + xtickslabel.append("{0:.1f}".format(-(float(b) / quotient))) xtickslabel.append(startLabel) @@ -211,7 +225,7 @@ def getProfileTicks(hm, referencePointLabel, startLabel, endLabel, idx): if a > 0: xticks_values.append(b + c + m + d + a - w) - xtickslabel.append('{0:.1f}{1}'.format(float(a) / quotient, symbol)) + xtickslabel.append("{0:.1f}{1}".format(float(a) / quotient, symbol)) xticks = [(k / w) for k in xticks_values] xticks = [max(x, 0) for x in xticks] diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index 208735b9d1..163078dbe1 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -6,9 +6,10 @@ from collections import OrderedDict import numpy as np import matplotlib -matplotlib.use('Agg') -matplotlib.rcParams['pdf.fonttype'] = 42 -matplotlib.rcParams['svg.fonttype'] = 'none' + +matplotlib.use("Agg") +matplotlib.rcParams["pdf.fonttype"] = 42 +matplotlib.rcParams["svg.fonttype"] = "none" import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties import matplotlib.gridspec as gridspec @@ -27,24 +28,26 @@ from deeptools.computeMatrixOperations import filterHeatmapValues debug = 0 -old_settings = np.seterr(all='ignore') +old_settings = np.seterr(all="ignore") plt.ioff() def parse_arguments(args=None): parser = argparse.ArgumentParser( - parents=[parserCommon.heatmapperMatrixArgs(), - parserCommon.heatmapperOutputArgs(mode='heatmap'), - parserCommon.heatmapperOptionalArgs(mode='heatmap')], + parents=[ + parserCommon.heatmapperMatrixArgs(), + parserCommon.heatmapperOutputArgs(mode="heatmap"), + parserCommon.heatmapperOptionalArgs(mode="heatmap"), + ], formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description='This tool creates a heatmap for ' - 'scores associated with genomic regions. ' - 'The program requires a matrix file ' - 'generated by the tool ``computeMatrix``.', - epilog='An example usage is: plotHeatmap -m matrix.gz', - usage='plotHeatmap -m matrix.gz\n' - 'help: plotHeatmap -h / plotHeatmap --help', - add_help=False) + description="This tool creates a heatmap for " + "scores associated with genomic regions. " + "The program requires a matrix file " + "generated by the tool ``computeMatrix``.", + epilog="An example usage is: plotHeatmap -m matrix.gz", + usage="plotHeatmap -m matrix.gz\n" "help: plotHeatmap -h / plotHeatmap --help", + add_help=False, + ) return parser @@ -52,17 +55,33 @@ def parse_arguments(args=None): def process_args(args=None): args = parse_arguments().parse_args(args) - args.heatmapHeight = args.heatmapHeight if args.heatmapHeight > 3 and args.heatmapHeight <= 100 else 10 + args.heatmapHeight = ( + args.heatmapHeight + if args.heatmapHeight > 3 and args.heatmapHeight <= 100 + else 10 + ) if not matplotlib.colors.is_color_like(args.missingDataColor): - exit("The value {0} for --missingDataColor is not valid".format(args.missingDataColor)) + exit( + "The value {0} for --missingDataColor is not valid".format( + args.missingDataColor + ) + ) - args.boxAroundHeatmaps = True if args.boxAroundHeatmaps == 'yes' else False + args.boxAroundHeatmaps = True if args.boxAroundHeatmaps == "yes" else False return args -def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGroup, colorbar_position, fig): +def prepare_layout( + hm_matrix, + heatmapsize, + showSummaryPlot, + showColorbar, + perGroup, + colorbar_position, + fig, +): """ prepare the plot layout as a grid having as many rows @@ -80,7 +99,9 @@ def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGro # on the number of regions contained in the if perGroup: # heatmap - height_ratio = np.array([np.amax(np.diff(hm_matrix.group_boundaries))] * numrows) + height_ratio = np.array( + [np.amax(np.diff(hm_matrix.group_boundaries))] * numrows + ) # scale ratio to sum = heatmapheight height_ratio = heatmapheight * (height_ratio.astype(float) / height_ratio.sum()) else: @@ -95,7 +116,7 @@ def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGro width_ratio = [heatmapwidth] * numcols if showColorbar: - if colorbar_position == 'below': + if colorbar_position == "below": numrows += 2 # a spacer needs to be added to avoid overlaps height_ratio += [4 / 2.54] # spacer height_ratio += [1 / 2.54] @@ -113,13 +134,20 @@ def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGro # numbers to heatmapheigt fractions height_ratio = np.concatenate([[sumplot_height, spacer_height], height_ratio]) - grids = gridspec.GridSpec(numrows, numcols, height_ratios=height_ratio, width_ratios=width_ratio, figure=fig) + grids = gridspec.GridSpec( + numrows, + numcols, + height_ratios=height_ratio, + width_ratios=width_ratio, + figure=fig, + ) return grids -def autobreaklinetitle(title,sep="[-_,.:]",lmax=15): + +def autobreaklinetitle(title, sep="[-_,.:]", lmax=15): outsep = "-" - sss = [ rr for rr in re.split(sep,title) if len(rr) ] + sss = [rr for rr in re.split(sep, title) if len(rr)] newtitle, tmp = "", "" for ss in sss: tmp0 = tmp @@ -133,15 +161,38 @@ def autobreaklinetitle(title,sep="[-_,.:]",lmax=15): newtitle = "\n" + newtitle return newtitle -def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, wspace, hspace, colorbar_position, label_rotation=0.0): + +def addProfilePlot( + hm, + plt, + fig, + grids, + iterNum, + iterNum2, + perGroup, + averageType, + plot_type, + yAxisLabel, + color_list, + yMin, + yMax, + wspace, + hspace, + colorbar_position, + label_rotation=0.0, +): """ A function to add profile plots to the given figure, possibly in a custom grid subplot which mimics a tight layout (if wspace and hspace are not None) """ if wspace is not None and hspace is not None: - if colorbar_position == 'side': - gridsSub = gridspec.GridSpecFromSubplotSpec(1, iterNum, subplot_spec=grids[0, :-1], wspace=wspace, hspace=hspace) + if colorbar_position == "side": + gridsSub = gridspec.GridSpecFromSubplotSpec( + 1, iterNum, subplot_spec=grids[0, :-1], wspace=wspace, hspace=hspace + ) else: - gridsSub = gridspec.GridSpecFromSubplotSpec(1, iterNum, subplot_spec=grids[0, :], wspace=wspace, hspace=hspace) + gridsSub = gridspec.GridSpecFromSubplotSpec( + 1, iterNum, subplot_spec=grids[0, :], wspace=wspace, hspace=hspace + ) ax_list = [] globalYmin = np.inf @@ -165,25 +216,28 @@ def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType for group in range(iterNum2): if perGroup: sub_matrix = hm.matrix.get_matrix(sample_id, group) - line_label = sub_matrix['sample'] + line_label = sub_matrix["sample"] else: sub_matrix = hm.matrix.get_matrix(group, sample_id) - line_label = sub_matrix['group'] - plot_single(ax_profile, sub_matrix['matrix'], - averageType, - color_list[group], - line_label, - plot_type=plot_type) + line_label = sub_matrix["group"] + plot_single( + ax_profile, + sub_matrix["matrix"], + averageType, + color_list[group], + line_label, + plot_type=plot_type, + ) if sample_id > 0 and len(yMin) == 1 and len(yMax) == 1: plt.setp(ax_profile.get_yticklabels(), visible=False) - ax_profile.get_yaxis().set_tick_params(direction='in',pad=-22) # beisi - if sample_id == 0 and yAxisLabel != '': + ax_profile.get_yaxis().set_tick_params(direction="in", pad=-22) # beisi + if sample_id == 0 and yAxisLabel != "": ax_profile.set_ylabel(yAxisLabel) xticks, xtickslabel = hm.getTicks(tickIdx) - if np.ceil(max(xticks)) != float(sub_matrix['matrix'].shape[1] - 1): - tickscale = float(sub_matrix['matrix'].shape[1] - 1) / max(xticks) + if np.ceil(max(xticks)) != float(sub_matrix["matrix"].shape[1] - 1): + tickscale = float(sub_matrix["matrix"].shape[1] - 1) / max(xticks) xticks_use = [x * tickscale for x in xticks] ax_profile.axes.set_xticks(xticks_use) else: @@ -195,8 +249,8 @@ def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType # such that they don't fall off # the heatmap sides ticks = ax_profile.xaxis.get_major_ticks() - ticks[0].label1.set_horizontalalignment('left') - ticks[-1].label1.set_horizontalalignment('right') + ticks[0].label1.set_horizontalalignment("left") + ticks[-1].label1.set_horizontalalignment("right") globalYmin = min(float(globalYmin), ax_profile.get_ylim()[0]) globalYmax = max(globalYmax, ax_profile.get_ylim()[1]) @@ -219,20 +273,30 @@ def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType return ax_list -def plotlyMatrix(hm, - outFilename, - yMin=[None], yMax=[None], - zMin=[None], zMax=[None], - showSummaryPlot=False, - cmap=None, colorList=None, colorBarPosition='side', - perGroup=False, - averageType='median', yAxisLabel='', xAxisLabel='', - plotTitle='', - showColorbar=False, - label_rotation=0.0): +def plotlyMatrix( + hm, + outFilename, + yMin=[None], + yMax=[None], + zMin=[None], + zMax=[None], + showSummaryPlot=False, + cmap=None, + colorList=None, + colorBarPosition="side", + perGroup=False, + averageType="median", + yAxisLabel="", + xAxisLabel="", + plotTitle="", + showColorbar=False, + label_rotation=0.0, +): label_rotation *= -1.0 - if colorBarPosition != 'side': - sys.error.write("Warning: It is not currently possible to have multiple colorbars with plotly!\n") + if colorBarPosition != "side": + sys.error.write( + "Warning: It is not currently possible to have multiple colorbars with plotly!\n" + ) nRows = hm.matrix.get_num_groups() nCols = hm.matrix.get_num_samples() @@ -244,8 +308,8 @@ def plotlyMatrix(hm, if showSummaryPlot: profileHeight = 0.2 profileBottomBuffer = 0.05 - profileSideBuffer = 0. - profileWidth = 1. / nCols + profileSideBuffer = 0.0 + profileWidth = 1.0 / nCols if nCols > 1: profileSideBuffer = 0.1 / (nCols - 1) profileWidth = 0.9 / nCols @@ -253,7 +317,7 @@ def plotlyMatrix(hm, dataSummary = [] annos = [] fig = go.Figure() - fig['layout'].update(title=plotTitle) + fig["layout"].update(title=plotTitle) xAxisN = 1 yAxisN = 1 @@ -262,8 +326,8 @@ def plotlyMatrix(hm, yMinLocal = np.inf yMaxLocal = -np.inf for i in range(nCols): - xanchor = 'x{}'.format(xAxisN) - yanchor = 'y{}'.format(yAxisN) + xanchor = "x{}".format(xAxisN) + yanchor = "y{}".format(yAxisN) xBase = i * (profileSideBuffer + profileWidth) yBase = 1 - profileHeight xDomain = [xBase, xBase + profileWidth] @@ -272,20 +336,32 @@ def plotlyMatrix(hm, if perGroup: mat = hm.matrix.get_matrix(i, j) xTicks, xTicksLabels = hm.getTicks(i) - label = mat['sample'] + label = mat["sample"] else: mat = hm.matrix.get_matrix(j, i) xTicks, xTicksLabels = hm.getTicks(j) - label = mat['group'] + label = mat["group"] if j == 0: - fig['layout']['xaxis{}'.format(xAxisN)] = dict(domain=xDomain, anchor=yanchor, range=[0, mat['matrix'].shape[1]], tickmode='array', tickvals=xTicks, ticktext=xTicksLabels, tickangle=label_rotation) - fig['layout']['yaxis{}'.format(yAxisN)] = dict(anchor=xanchor, domain=yDomain) - trace = plotly_single(mat['matrix'], averageType, colorList[j], label)[0] + fig["layout"]["xaxis{}".format(xAxisN)] = dict( + domain=xDomain, + anchor=yanchor, + range=[0, mat["matrix"].shape[1]], + tickmode="array", + tickvals=xTicks, + ticktext=xTicksLabels, + tickangle=label_rotation, + ) + fig["layout"]["yaxis{}".format(yAxisN)] = dict( + anchor=xanchor, domain=yDomain + ) + trace = plotly_single(mat["matrix"], averageType, colorList[j], label)[ + 0 + ] trace.update(xaxis=xanchor, yaxis=yanchor, legendgroup=label) - if min(trace['y']) < yMinLocal: - yMinLocal = min(trace['y']) - if max(trace['y']) > yMaxLocal: - yMaxLocal = max(trace['y']) + if min(trace["y"]) < yMinLocal: + yMinLocal = min(trace["y"]) + if max(trace["y"]) > yMaxLocal: + yMaxLocal = max(trace["y"]) if i == 0: trace.update(showlegend=True) dataSummary.append(trace) @@ -296,7 +372,19 @@ def plotlyMatrix(hm, else: title = hm.matrix.sample_labels[i] titleX = xBase + 0.5 * profileWidth - annos.append({'yanchor': 'bottom', 'xref': 'paper', 'xanchor': 'center', 'yref': 'paper', 'text': title, 'y': 1.0, 'x': titleX, 'font': {'size': 16}, 'showarrow': False}) + annos.append( + { + "yanchor": "bottom", + "xref": "paper", + "xanchor": "center", + "yref": "paper", + "text": title, + "y": 1.0, + "x": titleX, + "font": {"size": 16}, + "showarrow": False, + } + ) xAxisN += 1 yAxisN += 1 @@ -308,22 +396,22 @@ def plotlyMatrix(hm, yMaxUse = yMaxLocal if yMax[(i - 1) % len(yMax)] is not None: yMaxUse = yMax[(i - 1) % len(yMax)] - fig['layout']['yaxis{}'.format(i)].update(range=[yMinUse, yMaxUse]) - fig['layout']['yaxis1'].update(title=yAxisLabel) + fig["layout"]["yaxis{}".format(i)].update(range=[yMinUse, yMaxUse]) + fig["layout"]["yaxis1"].update(title=yAxisLabel) # Add the heatmap dataHeatmap = [] zMinLocal = np.inf zMaxLocal = -np.inf - heatmapWidth = 1. / nCols + heatmapWidth = 1.0 / nCols heatmapSideBuffer = 0.0 if nCols > 1: - heatmapWidth = .9 / nCols + heatmapWidth = 0.9 / nCols heatmapSideBuffer = 0.1 / (nCols - 1) heatmapHeight = 1.0 - profileHeight - profileBottomBuffer for i in range(nCols): - xanchor = 'x{}'.format(xAxisN) + xanchor = "x{}".format(xAxisN) xBase = i * (heatmapSideBuffer + heatmapWidth) # Determine the height of each heatmap, they have no buffer @@ -333,50 +421,78 @@ def plotlyMatrix(hm, mat = hm.matrix.get_matrix(i, j) else: mat = hm.matrix.get_matrix(j, i) - lengths.append(mat['matrix'].shape[0]) - fractionalHeights = heatmapHeight * np.cumsum(lengths).astype(float) / np.sum(lengths).astype(float) + lengths.append(mat["matrix"].shape[0]) + fractionalHeights = ( + heatmapHeight + * np.cumsum(lengths).astype(float) + / np.sum(lengths).astype(float) + ) xDomain = [xBase, xBase + heatmapWidth] - fig['layout']['xaxis{}'.format(xAxisN)] = dict(domain=xDomain, anchor='free', position=0.0, range=[0, mat['matrix'].shape[1]], tickmode='array', tickvals=xTicks, ticktext=xTicksLabels, title=xAxisLabel) + fig["layout"]["xaxis{}".format(xAxisN)] = dict( + domain=xDomain, + anchor="free", + position=0.0, + range=[0, mat["matrix"].shape[1]], + tickmode="array", + tickvals=xTicks, + ticktext=xTicksLabels, + title=xAxisLabel, + ) # Start adding the heatmaps for j in range(nRows): if perGroup: mat = hm.matrix.get_matrix(i, j) - label = mat['sample'] + label = mat["sample"] start = hm.matrix.group_boundaries[i] end = hm.matrix.group_boundaries[i + 1] else: mat = hm.matrix.get_matrix(j, i) - label = mat['group'] + label = mat["group"] start = hm.matrix.group_boundaries[j] end = hm.matrix.group_boundaries[j + 1] regs = hm.matrix.regions[start:end] regs = [x[2] for x in regs] - yanchor = 'y{}'.format(yAxisN) - yDomain = [heatmapHeight - fractionalHeights[j + 1], heatmapHeight - fractionalHeights[j]] + yanchor = "y{}".format(yAxisN) + yDomain = [ + heatmapHeight - fractionalHeights[j + 1], + heatmapHeight - fractionalHeights[j], + ] visible = False if i == 0: visible = True - fig['layout']['yaxis{}'.format(yAxisN)] = dict(domain=yDomain, anchor=xanchor, visible=visible, title=label, tickmode='array', tickvals=[], ticktext=[]) - if np.min(mat['matrix']) < zMinLocal: - zMinLocal = np.min(mat['matrix']) - if np.max(mat['matrix']) < zMaxLocal: - zMaxLocal = np.max(mat['matrix']) - - trace = go.Heatmap(z=np.flipud(mat['matrix']), - y=regs[::-1], - xaxis=xanchor, - yaxis=yanchor, - showlegend=False, - name=label, - showscale=False) + fig["layout"]["yaxis{}".format(yAxisN)] = dict( + domain=yDomain, + anchor=xanchor, + visible=visible, + title=label, + tickmode="array", + tickvals=[], + ticktext=[], + ) + if np.min(mat["matrix"]) < zMinLocal: + zMinLocal = np.min(mat["matrix"]) + if np.max(mat["matrix"]) < zMaxLocal: + zMaxLocal = np.max(mat["matrix"]) + + trace = go.Heatmap( + z=np.flipud(mat["matrix"]), + y=regs[::-1], + xaxis=xanchor, + yaxis=yanchor, + showlegend=False, + name=label, + showscale=False, + ) dataHeatmap.append(trace) yAxisN += 1 xAxisN += 1 if showColorbar: dataHeatmap[-1].update(showscale=True) - dataHeatmap[-1]['colorbar'].update(len=heatmapHeight, y=0, yanchor='bottom', ypad=0.0) + dataHeatmap[-1]["colorbar"].update( + len=heatmapHeight, y=0, yanchor="bottom", ypad=0.0 + ) # Adjust z bounds and colorscale for trace in dataHeatmap: @@ -386,36 +502,49 @@ def plotlyMatrix(hm, zMinUse = zMin[0] if zMax[0] is not None: zMaxUse = zMax[0] - trace.update(zmin=zMinUse, zmax=zMaxUse, colorscale=convertCmap(cmap[0], vmin=zMinUse, vmax=zMaxUse)) + trace.update( + zmin=zMinUse, + zmax=zMaxUse, + colorscale=convertCmap(cmap[0], vmin=zMinUse, vmax=zMaxUse), + ) dataSummary.extend(dataHeatmap) fig.add_traces(dataSummary) - fig['layout']['annotations'] = annos + fig["layout"]["annotations"] = annos py.plot(fig, filename=outFilename, auto_open=False) -def plotMatrix(hm, outFileName, - colorMapDict={'colorMap': ['binary'], 'missingDataColor': 'black', 'alpha': 1.0}, - plotTitle='', - xAxisLabel='', yAxisLabel='', regionsLabel='', - zMin=None, zMax=None, - yMin=None, yMax=None, - averageType='median', - reference_point_label=None, - startLabel='TSS', endLabel="TES", - heatmapHeight=25, - heatmapWidth=7.5, - perGroup=False, whatToShow='plot, heatmap and colorbar', - plot_type='lines', - linesAtTickMarks=False, - image_format=None, - legend_location='upper-left', - box_around_heatmaps=True, - label_rotation=0.0, - dpi=200, - interpolation_method='auto'): - - hm.reference_point_label = hm.parameters['ref point'] +def plotMatrix( + hm, + outFileName, + colorMapDict={"colorMap": ["binary"], "missingDataColor": "black", "alpha": 1.0}, + plotTitle="", + xAxisLabel="", + yAxisLabel="", + regionsLabel="", + zMin=None, + zMax=None, + yMin=None, + yMax=None, + averageType="median", + reference_point_label=None, + startLabel="TSS", + endLabel="TES", + heatmapHeight=25, + heatmapWidth=7.5, + perGroup=False, + whatToShow="plot, heatmap and colorbar", + plot_type="lines", + linesAtTickMarks=False, + image_format=None, + legend_location="upper-left", + box_around_heatmaps=True, + label_rotation=0.0, + dpi=200, + interpolation_method="auto", +): + + hm.reference_point_label = hm.parameters["ref point"] if reference_point_label is not None: hm.reference_point_label = [reference_point_label] * hm.matrix.get_num_samples() hm.startLabel = startLabel @@ -430,12 +559,12 @@ def plotMatrix(hm, outFileName, zMin = [None] else: zMin = [zMin] # convert to list to support multiple entries - elif 'auto' in zMin: + elif "auto" in zMin: matrix_flatten = hm.matrix.flatten() auto_min = np.percentile(matrix_flatten, 1.0) if np.isnan(auto_min): auto_min = None - new_mins = [float(x) if x != 'auto' else auto_min for x in zMin] + new_mins = [float(x) if x != "auto" else auto_min for x in zMin] zMin = new_mins else: new_mins = [float(x) for x in zMin] @@ -450,12 +579,12 @@ def plotMatrix(hm, outFileName, zMax = [None] else: zMax = [zMax] - elif 'auto' in zMax: + elif "auto" in zMax: matrix_flatten = hm.matrix.flatten() auto_max = np.percentile(matrix_flatten, 98.0) if np.isnan(auto_max): auto_max = None - new_maxs = [float(x) if x != 'auto' else auto_max for x in zMax] + new_maxs = [float(x) if x != "auto" else auto_max for x in zMax] zMax = new_maxs else: new_maxs = [float(x) for x in zMax] @@ -463,9 +592,11 @@ def plotMatrix(hm, outFileName, if (len(zMin) > 1) & (len(zMax) > 1): for index, value in enumerate(zMax): if value <= zMin[index]: - sys.stderr.write("Warnirng: In bigwig {}, the given zmin ({}) is larger than " - "or equal to the given zmax ({}). Thus, it has been set " - "to None. \n".format(index + 1, zMin[index], value)) + sys.stderr.write( + "Warnirng: In bigwig {}, the given zmin ({}) is larger than " + "or equal to the given zmax ({}). Thus, it has been set " + "to None. \n".format(index + 1, zMin[index], value) + ) zMin[index] = None if yMin is None: @@ -477,42 +608,51 @@ def plotMatrix(hm, outFileName, if not isinstance(yMax, list): yMax = [yMax] - plt.rcParams['font.size'] = 8.0 + plt.rcParams["font.size"] = 8.0 fontP = FontProperties() showSummaryPlot = False showColorbar = False - if whatToShow == 'plot and heatmap': + if whatToShow == "plot and heatmap": showSummaryPlot = True - elif whatToShow == 'heatmap and colorbar': + elif whatToShow == "heatmap and colorbar": showColorbar = True - elif whatToShow == 'plot, heatmap and colorbar': + elif whatToShow == "plot, heatmap and colorbar": showSummaryPlot = True showColorbar = True # colormap for the heatmap - if colorMapDict['colorMap']: + if colorMapDict["colorMap"]: cmap = [] - for color_map in colorMapDict['colorMap']: + for color_map in colorMapDict["colorMap"]: copy_cmp = copy.copy(plt.get_cmap(color_map)) cmap.append(copy_cmp) - cmap[-1].set_bad(colorMapDict['missingDataColor']) # nans are printed using this color + cmap[-1].set_bad( + colorMapDict["missingDataColor"] + ) # nans are printed using this color - if colorMapDict['colorList'] and len(colorMapDict['colorList']) > 0: + if colorMapDict["colorList"] and len(colorMapDict["colorList"]) > 0: # make a cmap for each color list given cmap = [] - for color_list in colorMapDict['colorList']: - cmap.append(matplotlib.colors.LinearSegmentedColormap.from_list( - 'my_cmap', color_list.replace(' ', '').split(","), N=colorMapDict['colorNumber'])) - cmap[-1].set_bad(colorMapDict['missingDataColor']) # nans are printed using this color + for color_list in colorMapDict["colorList"]: + cmap.append( + matplotlib.colors.LinearSegmentedColormap.from_list( + "my_cmap", + color_list.replace(" ", "").split(","), + N=colorMapDict["colorNumber"], + ) + ) + cmap[-1].set_bad( + colorMapDict["missingDataColor"] + ) # nans are printed using this color if len(cmap) > 1 or len(zMin) > 1 or len(zMax) > 1: # position color bar below heatmap when more than one # heatmap color is given - colorbar_position = 'below' + colorbar_position = "below" else: - colorbar_position = 'side' + colorbar_position = "side" # figsize: w,h tuple in inches figwidth = heatmapWidth / 2.54 @@ -529,7 +669,7 @@ def plotMatrix(hm, outFileName, num_cols = numsamples total_figwidth = figwidth * num_cols if showColorbar: - if colorbar_position == 'below': + if colorbar_position == "below": figheight += 1 / 2.54 else: total_figwidth += 1 / 2.54 @@ -544,36 +684,47 @@ def plotMatrix(hm, outFileName, showColorbar, perGroup, colorbar_position, - fig + fig, ) # color map for the summary plot (profile) on top of the heatmap - cmap_plot = plt.get_cmap('jet') + cmap_plot = plt.get_cmap("jet") numgroups = hm.matrix.get_num_groups() if perGroup: - color_list = cmap_plot(np.arange(hm.matrix.get_num_samples()) / hm.matrix.get_num_samples()) + color_list = cmap_plot( + np.arange(hm.matrix.get_num_samples()) / hm.matrix.get_num_samples() + ) else: color_list = cmap_plot(np.arange(numgroups) / numgroups) - alpha = colorMapDict['alpha'] - if image_format == 'plotly': - return plotlyMatrix(hm, - outFileName, - yMin=yMin, yMax=yMax, - zMin=zMin, zMax=zMax, - showSummaryPlot=showSummaryPlot, showColorbar=showColorbar, - cmap=cmap, colorList=color_list, colorBarPosition=colorbar_position, - perGroup=perGroup, - averageType=averageType, plotTitle=plotTitle, - xAxisLabel=xAxisLabel, yAxisLabel=yAxisLabel, - label_rotation=label_rotation) + alpha = colorMapDict["alpha"] + if image_format == "plotly": + return plotlyMatrix( + hm, + outFileName, + yMin=yMin, + yMax=yMax, + zMin=zMin, + zMax=zMax, + showSummaryPlot=showSummaryPlot, + showColorbar=showColorbar, + cmap=cmap, + colorList=color_list, + colorBarPosition=colorbar_position, + perGroup=perGroup, + averageType=averageType, + plotTitle=plotTitle, + xAxisLabel=xAxisLabel, + yAxisLabel=yAxisLabel, + label_rotation=label_rotation, + ) # check if matrix is reference-point based using the upstream >0 value # and is sorted by region length. If this is # the case, prepare the data to plot a border at the regions end - regions_length_in_bins = [None] * len(hm.parameters['upstream']) - if hm.matrix.sort_using == 'region_length' and hm.matrix.sort_method != 'no': - for idx in range(len(hm.parameters['upstream'])): - if hm.parameters['ref point'][idx] is None: + regions_length_in_bins = [None] * len(hm.parameters["upstream"]) + if hm.matrix.sort_using == "region_length" and hm.matrix.sort_method != "no": + for idx in range(len(hm.parameters["upstream"])): + if hm.parameters["ref point"][idx] is None: regions_length_in_bins[idx] = None continue @@ -583,16 +734,25 @@ def plotMatrix(hm, outFileName, _reg_len = [] for ind_reg in _group: if isinstance(ind_reg, dict): - _len = ind_reg['end'] - ind_reg['start'] + _len = ind_reg["end"] - ind_reg["start"] else: _len = sum([x[1] - x[0] for x in ind_reg[1]]) - if hm.parameters['ref point'][idx] == 'TSS': - _reg_len.append((hm.parameters['upstream'][idx] + _len) / hm.parameters['bin size'][idx]) - elif hm.parameters['ref point'][idx] == 'center': + if hm.parameters["ref point"][idx] == "TSS": + _reg_len.append( + (hm.parameters["upstream"][idx] + _len) + / hm.parameters["bin size"][idx] + ) + elif hm.parameters["ref point"][idx] == "center": _len *= 0.5 - _reg_len.append((hm.parameters['upstream'][idx] + _len) / hm.parameters['bin size'][idx]) - elif hm.parameters['ref point'][idx] == 'TES': - _reg_len.append((hm.parameters['upstream'][idx] - _len) / hm.parameters['bin size'][idx]) + _reg_len.append( + (hm.parameters["upstream"][idx] + _len) + / hm.parameters["bin size"][idx] + ) + elif hm.parameters["ref point"][idx] == "TES": + _reg_len.append( + (hm.parameters["upstream"][idx] - _len) + / hm.parameters["bin size"][idx] + ) foo.append(_reg_len) regions_length_in_bins[idx] = foo @@ -604,13 +764,41 @@ def plotMatrix(hm, outFileName, else: iterNum = hm.matrix.get_num_samples() iterNum2 = numgroups - ax_list = addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, None, None, colorbar_position, label_rotation) - - if legend_location != 'none': - ax = ax_list[-1] # beisi + ax_list = addProfilePlot( + hm, + plt, + fig, + grids, + iterNum, + iterNum2, + perGroup, + averageType, + plot_type, + yAxisLabel, + color_list, + yMin, + yMax, + None, + None, + colorbar_position, + label_rotation, + ) + + if legend_location != "none": + ax = ax_list[-1] # beisi box = ax.get_position() - ax.set_position([box.x0, box.y0 - box.height * 0.1, box.width, box.height * 0.9]) - legend = ax.legend(loc='lower right', shadow=False, fontsize='x-large', bbox_to_anchor=(0, 1.3, 1, .22), ncol=10, frameon=False, prop=fontP) # beisi, legend line + ax.set_position( + [box.x0, box.y0 - box.height * 0.1, box.width, box.height * 0.9] + ) + legend = ax.legend( + loc="lower right", + shadow=False, + fontsize="x-large", + bbox_to_anchor=(0, 1.3, 1, 0.22), + ncol=10, + frameon=False, + prop=fontP, + ) # beisi, legend line ax.add_artist(legend) # ax_list[-1].legend(loc=legend_location.replace('-', ' '), ncol=1, prop=fontP, # frameon=False, markerscale=0.5) @@ -653,33 +841,37 @@ def plotMatrix(hm, outFileName, if box_around_heatmaps is False: # Turn off the boxes around the individual heatmaps - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - ax.spines['bottom'].set_visible(False) - ax.spines['left'].set_visible(False) - rows, cols = sub_matrix['matrix'].shape + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + ax.spines["bottom"].set_visible(False) + ax.spines["left"].set_visible(False) + rows, cols = sub_matrix["matrix"].shape # if the number of rows is too large, then the 'nearest' method simply # drops rows. A better solution is to relate the threshold to the DPI of the image - if interpolation_method == 'auto': + if interpolation_method == "auto": if rows >= 1000: - interpolation_method = 'bilinear' + interpolation_method = "bilinear" else: - interpolation_method = 'nearest' + interpolation_method = "nearest" # if np.clip is not used, then values of the matrix that exceed the zmax limit are # highlighted. Usually, a significant amount of pixels are equal or above the zmax and # the default behaviour produces images full of large highlighted dots. # If interpolation='nearest' is used, this has no effect - sub_matrix['matrix'] = np.clip(sub_matrix['matrix'], zMin[zmin_idx], zMax[zmax_idx]) - img = ax.imshow(sub_matrix['matrix'], - aspect='auto', - interpolation=interpolation_method, - origin='upper', - vmin=zMin[zmin_idx], - vmax=zMax[zmax_idx], - cmap=cmap[cmap_idx], - alpha=alpha, - extent=[0, cols, rows, 0]) + sub_matrix["matrix"] = np.clip( + sub_matrix["matrix"], zMin[zmin_idx], zMax[zmax_idx] + ) + img = ax.imshow( + sub_matrix["matrix"], + aspect="auto", + interpolation=interpolation_method, + origin="upper", + vmin=zMin[zmin_idx], + vmax=zMax[zmax_idx], + cmap=cmap[cmap_idx], + alpha=alpha, + extent=[0, cols, rows, 0], + ) img.set_rasterized(True) # plot border at the end of the regions # if ordered by length @@ -687,14 +879,19 @@ def plotMatrix(hm, outFileName, x_lim = ax.get_xlim() y_lim = ax.get_ylim() - ax.plot(regions_length_in_bins[sample][group_idx], - np.arange(len(regions_length_in_bins[sample][group_idx])), - '--', color='black', linewidth=0.5, dashes=(3, 2)) + ax.plot( + regions_length_in_bins[sample][group_idx], + np.arange(len(regions_length_in_bins[sample][group_idx])), + "--", + color="black", + linewidth=0.5, + dashes=(3, 2), + ) ax.set_xlim(x_lim) ax.set_ylim(y_lim) if perGroup: - ax.axes.set_xlabel(sub_matrix['group']) + ax.axes.set_xlabel(sub_matrix["group"]) if sample < hm.matrix.get_num_samples() - 1: ax.axes.get_xaxis().set_visible(False) else: @@ -702,32 +899,45 @@ def plotMatrix(hm, outFileName, ax.axes.set_xlabel(xAxisLabel) ax.axes.set_yticks([]) if perGroup and group == 0: - ax.axes.set_ylabel(sub_matrix['sample'],rotation=75,labelpad=0,fontsize=15) + ax.axes.set_ylabel( + sub_matrix["sample"], rotation=75, labelpad=0, fontsize=15 + ) elif not perGroup and sample == 0: - ax.axes.set_ylabel(sub_matrix['group'],rotation=75,labelpad=0,horizontalalignment='right',fontsize=15) + ax.axes.set_ylabel( + sub_matrix["group"], + rotation=75, + labelpad=0, + horizontalalignment="right", + fontsize=15, + ) # Plot vertical lines at tick marks if desired if linesAtTickMarks: xticks_heat, xtickslabel_heat = hm.getTicks(sample) - xticks_heat = [x + 0.5 for x in xticks_heat] # There's an offset of 0.5 compared to the profile plot - if np.ceil(max(xticks_heat)) != float(sub_matrix['matrix'].shape[1]): - tickscale = float(sub_matrix['matrix'].shape[1]) / max(xticks_heat) + xticks_heat = [ + x + 0.5 for x in xticks_heat + ] # There's an offset of 0.5 compared to the profile plot + if np.ceil(max(xticks_heat)) != float(sub_matrix["matrix"].shape[1]): + tickscale = float(sub_matrix["matrix"].shape[1]) / max(xticks_heat) xticks_heat_use = [x * tickscale for x in xticks_heat] else: xticks_heat_use = xticks_heat for x in xticks_heat_use: - ax.axvline(x=x, color='black', linewidth=0.5, dashes=(3, 2)) + ax.axvline(x=x, color="black", linewidth=0.5, dashes=(3, 2)) # add labels to last block in a column - if (perGroup and sample == numsamples - 1) or \ - (not perGroup and group_idx == numgroups - 1): + if (perGroup and sample == numsamples - 1) or ( + not perGroup and group_idx == numgroups - 1 + ): # add xticks to the bottom heatmap (last group) ax.axes.get_xaxis().set_visible(True) xticks_heat, xtickslabel_heat = hm.getTicks(sample) - xticks_heat = [x + 0.5 for x in xticks_heat] # There's an offset of 0.5 compared to the profile plot - if np.ceil(max(xticks_heat)) != float(sub_matrix['matrix'].shape[1]): - tickscale = float(sub_matrix['matrix'].shape[1]) / max(xticks_heat) + xticks_heat = [ + x + 0.5 for x in xticks_heat + ] # There's an offset of 0.5 compared to the profile plot + if np.ceil(max(xticks_heat)) != float(sub_matrix["matrix"].shape[1]): + tickscale = float(sub_matrix["matrix"].shape[1]) / max(xticks_heat) xticks_heat_use = [x * tickscale for x in xticks_heat] ax.axes.set_xticks(xticks_heat_use) else: @@ -738,15 +948,12 @@ def plotMatrix(hm, outFileName, # such that they don't fall off # the heatmap sides ticks = ax.xaxis.get_major_ticks() - ticks[0].label1.set_horizontalalignment('left') - ticks[-1].label1.set_horizontalalignment('right') + ticks[0].label1.set_horizontalalignment("left") + ticks[-1].label1.set_horizontalalignment("right") - ax.get_xaxis().set_tick_params( - which='both', - top=False, - direction='out') + ax.get_xaxis().set_tick_params(which="both", top=False, direction="out") - if showColorbar and colorbar_position == 'below': + if showColorbar and colorbar_position == "below": # draw a colormap per each heatmap below the last block if perGroup: col = group_idx @@ -754,22 +961,24 @@ def plotMatrix(hm, outFileName, col = sample ax = fig.add_subplot(grids[-1, col]) tick_locator = ticker.MaxNLocator(nbins=3) - cbar = fig.colorbar(img, cax=ax, orientation='horizontal', ticks=tick_locator) + cbar = fig.colorbar( + img, cax=ax, orientation="horizontal", ticks=tick_locator + ) labels = cbar.ax.get_xticklabels() ticks = cbar.ax.get_xticks() if ticks[0] == 0: # if the label is at the start of the colobar # move it a bit inside to avoid overlapping # with other labels - labels[0].set_horizontalalignment('left') + labels[0].set_horizontalalignment("left") if ticks[-1] == 1: # if the label is at the end of the colobar # move it a bit inside to avoid overlapping # with other labels - labels[-1].set_horizontalalignment('right') + labels[-1].set_horizontalalignment("right") # cbar.ax.set_xticklabels(labels, rotation=90) - if showColorbar and colorbar_position != 'below': + if showColorbar and colorbar_position != "below": if showSummaryPlot: # we don't want to colorbar to extend # over the profiles and spacer top rows @@ -781,12 +990,18 @@ def plotMatrix(hm, outFileName, fig.colorbar(img, cax=ax) if box_around_heatmaps: - fig.get_layout_engine().set(wspace=0.10, hspace=0.025, rect=(0.04, 0, 0.96, 0.85)) + fig.get_layout_engine().set( + wspace=0.10, hspace=0.025, rect=(0.04, 0, 0.96, 0.85) + ) else: # When no box is plotted the space between heatmaps is reduced - fig.get_layout_engine().set(wspace=0.05, hspace=0.01, rect=(0.04, 0, 0.96, 0.85)) + fig.get_layout_engine().set( + wspace=0.05, hspace=0.01, rect=(0.04, 0, 0.96, 0.85) + ) - plt.savefig(outFileName, bbox_inches='tight', pad_inches=0.1, dpi=dpi, format=image_format) + plt.savefig( + outFileName, bbox_inches="tight", pad_inches=0.1, dpi=dpi, format=image_format + ) plt.close() @@ -805,8 +1020,7 @@ def mergeSmallGroups(matrixDict): if len(to_merge): to_merge.append(label) new_label = " ".join(to_merge) - new_ma = np.concatenate([matrixDict[item] - for item in to_merge], axis=0) + new_ma = np.concatenate([matrixDict[item] for item in to_merge], axis=0) else: new_label = label new_ma = matrixDict[label] @@ -833,24 +1047,39 @@ def main(args=None): args.matrixFile.close() hm.read_matrix_file(matrix_file) - if hm.parameters['min threshold'] is not None or hm.parameters['max threshold'] is not None: - filterHeatmapValues(hm, hm.parameters['min threshold'], hm.parameters['max threshold']) + if ( + hm.parameters["min threshold"] is not None + or hm.parameters["max threshold"] is not None + ): + filterHeatmapValues( + hm, hm.parameters["min threshold"], hm.parameters["max threshold"] + ) - if args.sortRegions == 'keep': - args.sortRegions = 'no' # These are the same thing + if args.sortRegions == "keep": + args.sortRegions = "no" # These are the same thing if args.kmeans is not None: - hm.matrix.hmcluster(args.kmeans, method='kmeans', clustering_samples=args.clusterUsingSamples) + hm.matrix.hmcluster( + args.kmeans, method="kmeans", clustering_samples=args.clusterUsingSamples + ) elif args.hclust is not None: - print("Performing hierarchical clustering." - "Please note that it might be very slow for large datasets.\n") - hm.matrix.hmcluster(args.hclust, method='hierarchical', clustering_samples=args.clusterUsingSamples) + print( + "Performing hierarchical clustering." + "Please note that it might be very slow for large datasets.\n" + ) + hm.matrix.hmcluster( + args.hclust, + method="hierarchical", + clustering_samples=args.clusterUsingSamples, + ) group_len_ratio = np.diff(hm.matrix.group_boundaries) / len(hm.matrix.regions) if np.any(group_len_ratio < 5.0 / 1000): problem = np.flatnonzero(group_len_ratio < 5.0 / 1000) - sys.stderr.write("WARNING: Group '{}' is too small for plotting, you might want to remove it. " - "There will likely be an error message from matplotlib regarding this " - "below.\n".format(hm.matrix.group_labels[problem[0]])) + sys.stderr.write( + "WARNING: Group '{}' is too small for plotting, you might want to remove it. " + "There will likely be an error message from matplotlib regarding this " + "below.\n".format(hm.matrix.group_labels[problem[0]]) + ) if args.regionsLabel: hm.matrix.set_group_labels(args.regionsLabel) @@ -858,19 +1087,25 @@ def main(args=None): if args.samplesLabel and len(args.samplesLabel): hm.matrix.set_sample_labels(args.samplesLabel) - if args.sortRegions != 'no': + if args.sortRegions != "no": sortUsingSamples = [] if args.sortUsingSamples is not None: for i in args.sortUsingSamples: - if (i > 0 and i <= hm.matrix.get_num_samples()): + if i > 0 and i <= hm.matrix.get_num_samples(): sortUsingSamples.append(i - 1) else: - exit("The value {0} for --sortSamples is not valid. Only values from 1 to {1} are allowed.".format(args.sortUsingSamples, hm.matrix.get_num_samples())) - print('Samples used for ordering within each group: ', sortUsingSamples) - - hm.matrix.sort_groups(sort_using=args.sortUsing, - sort_method=args.sortRegions, - sample_list=sortUsingSamples) + exit( + "The value {0} for --sortSamples is not valid. Only values from 1 to {1} are allowed.".format( + args.sortUsingSamples, hm.matrix.get_num_samples() + ) + ) + print("Samples used for ordering within each group: ", sortUsingSamples) + + hm.matrix.sort_groups( + sort_using=args.sortUsing, + sort_method=args.sortRegions, + sample_list=sortUsingSamples, + ) if args.silhouette: if args.kmeans is not None: @@ -884,31 +1119,40 @@ def main(args=None): if args.outFileSortedRegions: hm.save_BED(args.outFileSortedRegions) - colormap_dict = {'colorMap': args.colorMap, - 'colorList': args.colorList, - 'colorNumber': args.colorNumber, - 'missingDataColor': args.missingDataColor, - 'alpha': args.alpha} - - plotMatrix(hm, - args.outFileName, - colormap_dict, args.plotTitle, - args.xAxisLabel, args.yAxisLabel, args.regionsLabel, - args.zMin, args.zMax, - args.yMin, args.yMax, - args.averageTypeSummaryPlot, - args.refPointLabel, - args.startLabel, - args.endLabel, - args.heatmapHeight, - args.heatmapWidth, - args.perGroup, - args.whatToShow, - linesAtTickMarks=args.linesAtTickMarks, - plot_type=args.plotType, - image_format=args.plotFileFormat, - legend_location=args.legendLocation, - box_around_heatmaps=args.boxAroundHeatmaps, - label_rotation=args.label_rotation, - dpi=args.dpi, - interpolation_method=args.interpolationMethod) + colormap_dict = { + "colorMap": args.colorMap, + "colorList": args.colorList, + "colorNumber": args.colorNumber, + "missingDataColor": args.missingDataColor, + "alpha": args.alpha, + } + + plotMatrix( + hm, + args.outFileName, + colormap_dict, + args.plotTitle, + args.xAxisLabel, + args.yAxisLabel, + args.regionsLabel, + args.zMin, + args.zMax, + args.yMin, + args.yMax, + args.averageTypeSummaryPlot, + args.refPointLabel, + args.startLabel, + args.endLabel, + args.heatmapHeight, + args.heatmapWidth, + args.perGroup, + args.whatToShow, + linesAtTickMarks=args.linesAtTickMarks, + plot_type=args.plotType, + image_format=args.plotFileFormat, + legend_location=args.legendLocation, + box_around_heatmaps=args.boxAroundHeatmaps, + label_rotation=args.label_rotation, + dpi=args.dpi, + interpolation_method=args.interpolationMethod, + ) diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index da1c212f39..0526fe483c 100755 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -8,9 +8,10 @@ import numpy as np from math import ceil import matplotlib -matplotlib.use('Agg') -matplotlib.rcParams['pdf.fonttype'] = 42 -matplotlib.rcParams['svg.fonttype'] = 'none' + +matplotlib.use("Agg") +matplotlib.rcParams["pdf.fonttype"] = 42 +matplotlib.rcParams["svg.fonttype"] = "none" import deeptools.cm # noqa: F401 import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties @@ -29,31 +30,35 @@ debug = 0 if debug: from ipdb import set_trace -old_settings = np.seterr(all='ignore') +old_settings = np.seterr(all="ignore") plt.ioff() + def parse_arguments(args=None): parser = argparse.ArgumentParser( - parents=[parserCommon.heatmapperMatrixArgs(), - parserCommon.heatmapperOutputArgs(mode='profile'), - parserCommon.heatmapperOptionalArgs(mode='profile')], + parents=[ + parserCommon.heatmapperMatrixArgs(), + parserCommon.heatmapperOutputArgs(mode="profile"), + parserCommon.heatmapperOptionalArgs(mode="profile"), + ], formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description='This tool creates a profile plot for ' - 'scores over sets of genomic regions. ' - 'Typically, these regions are genes, but ' - 'any other regions defined in BED ' - ' will work. A matrix generated ' - 'by computeMatrix is required.', - epilog='An example usage is: plotProfile -m matrix.gz', + description="This tool creates a profile plot for " + "scores over sets of genomic regions. " + "Typically, these regions are genes, but " + "any other regions defined in BED " + " will work. A matrix generated " + "by computeMatrix is required.", + epilog="An example usage is: plotProfile -m matrix.gz", add_help=False, - usage='plotProfile -m matrix.gz\n' - 'help: plotProfile -h / plotProfile --help') + usage="plotProfile -m matrix.gz\n" "help: plotProfile -h / plotProfile --help", + ) return parser -def autobreaklinetitle(title,sep="[-_,.:]",lmax=15): + +def autobreaklinetitle(title, sep="[-_,.:]", lmax=15): outsep = "-" - sss = [ rr for rr in re.split(sep,title) if len(rr) ] + sss = [rr for rr in re.split(sep, title) if len(rr)] newtitle, tmp = "", "" for ss in sss: tmp0 = tmp @@ -67,6 +72,7 @@ def autobreaklinetitle(title,sep="[-_,.:]",lmax=15): newtitle = "\n" + newtitle return newtitle + def process_args(args=None): args = parse_arguments().parse_args(args) @@ -81,9 +87,9 @@ def process_args(args=None): args.yMax = [None] # Sometimes Galaxy sends --yMax '' and --yMin '' - if args.yMin == ['']: + if args.yMin == [""]: args.yMin = [None] - if args.yMax == ['']: + if args.yMax == [""]: args.yMax = [None] # Convert to floats @@ -104,23 +110,30 @@ def process_args(args=None): class Profile(object): - def __init__(self, hm, out_file_name, - plot_title='', y_axis_label='', - y_min=None, y_max=None, - averagetype='median', - reference_point_label=None, - start_label='TSS', end_label='TES', - plot_height=7, - plot_width=11, - per_group=False, - repgrplist=None, - plot_type='lines', - image_format=None, - color_list=None, - legend_location='best', - plots_per_row=8, - label_rotation=0, - dpi=200): + def __init__( + self, + hm, + out_file_name, + plot_title="", + y_axis_label="", + y_min=None, + y_max=None, + averagetype="median", + reference_point_label=None, + start_label="TSS", + end_label="TES", + plot_height=7, + plot_width=11, + per_group=False, + repgrplist=None, + plot_type="lines", + image_format=None, + color_list=None, + legend_location="best", + plots_per_row=8, + label_rotation=0, + dpi=200, + ): """ Using the hm matrix, makes a line plot either per group or per sample @@ -175,7 +188,7 @@ def __init__(self, hm, out_file_name, # Honor reference point labels from computeMatrix if reference_point_label is None: - self.reference_point_label = hm.parameters['ref point'] + self.reference_point_label = hm.parameters["ref point"] # decide how many plots are needed if self.per_group: self.numplots = self.hm.matrix.get_num_groups() @@ -195,13 +208,15 @@ def __init__(self, hm, out_file_name, cols = self.numplots self.grids = gridspec.GridSpec(rows, cols) - plt.rcParams['font.size'] = 10.0 + plt.rcParams["font.size"] = 10.0 self.font_p = FontProperties() - self.font_p.set_size('small') + self.font_p.set_size("small") # convert cm values to inches plot_height_inches = rows * self.cm2inch(self.plot_height)[0] - self.fig = plt.figure(figsize=self.cm2inch(cols * self.plot_width, rows * self.plot_height)) + self.fig = plt.figure( + figsize=self.cm2inch(cols * self.plot_width, rows * self.plot_height) + ) self.fig.suptitle(self.plot_title, y=(1 - (0.06 / plot_height_inches))) # Ensure that the labels are vectors @@ -217,7 +232,13 @@ def getTicks(self, idx): """ This is essentially a wrapper around getProfileTicks to accomdate the fact that each column has its own ticks. """ - xticks, xtickslabel = getProfileTicks(self.hm, self.reference_point_label[idx], self.start_label[idx], self.end_label[idx], idx) + xticks, xtickslabel = getProfileTicks( + self.hm, + self.reference_point_label[idx], + self.start_label[idx], + self.end_label[idx], + idx, + ) return xticks, xtickslabel @staticmethod @@ -230,8 +251,9 @@ def cm2inch(*tupl): def plot_hexbin(self): from matplotlib import cm + cmap = cm.coolwarm - cmap.set_bad('black') + cmap.set_bad("black") if self.image_format == "plotly": return self.plotly_hexbin() @@ -244,17 +266,20 @@ def plot_hexbin(self): # split the ax to make room for the colorbar and for each of the # groups - sub_grid = gridspec.GridSpecFromSubplotSpec(self.numlines, 2, subplot_spec=self.grids[row, col], - width_ratios=[0.92, 0.08], wspace=0.05, hspace=0.1) + sub_grid = gridspec.GridSpecFromSubplotSpec( + self.numlines, + 2, + subplot_spec=self.grids[row, col], + width_ratios=[0.92, 0.08], + wspace=0.05, + hspace=0.1, + ) ax = self.fig.add_subplot(sub_grid[0, 0]) ax.tick_params( - axis='y', - which='both', - left=False, - right=False, - labelleft=True) + axis="y", which="both", left=False, right=False, labelleft=True + ) if self.per_group: title = self.hm.matrix.group_labels[plot] @@ -271,7 +296,7 @@ def plot_hexbin(self): _row, _col = data_idx, plot sub_matrix = self.hm.matrix.get_matrix(_row, _col) - ma = sub_matrix['matrix'] + ma = sub_matrix["matrix"] x_values = np.tile(np.arange(ma.shape[1]), (ma.shape[0], 1)) img = ax.hexbin(x_values.flatten(), ma.flatten(), cmap=cmap, mincnt=1) _vmin, _vmax = img.get_clim() @@ -303,19 +328,26 @@ def plot_hexbin(self): sub_matrix = self.hm.matrix.get_matrix(_row, _col) if self.per_group: - label = sub_matrix['sample'] + label = sub_matrix["sample"] else: - label = sub_matrix['group'] + label = sub_matrix["group"] - ma = sub_matrix['matrix'] + ma = sub_matrix["matrix"] try: # matplotlib 2.0 - ax.set_facecolor('black') + ax.set_facecolor("black") except: # matplotlib <2.0 - ax.set_axis_bgcolor('black') + ax.set_axis_bgcolor("black") x_values = np.tile(np.arange(ma.shape[1]), (ma.shape[0], 1)) - img = ax.hexbin(x_values.flatten(), ma.flatten(), cmap=cmap, mincnt=1, vmin=vmin, vmax=vmax) + img = ax.hexbin( + x_values.flatten(), + ma.flatten(), + cmap=cmap, + mincnt=1, + vmin=vmin, + vmax=vmax, + ) if plot == 0: ax.axes.set_ylabel(label) @@ -333,7 +365,7 @@ def plot_hexbin(self): xticks, xtickslabel = self.getTicks(plot) if np.ceil(max(xticks)) != float(ma.shape[1] - 1): - tickscale = float(sub_matrix['matrix'].shape[1]) / max(xticks) + tickscale = float(sub_matrix["matrix"].shape[1]) / max(xticks) xticks_use = [x * tickscale for x in xticks] ax_list[0].axes.set_xticks(xticks_use) else: @@ -343,8 +375,8 @@ def plot_hexbin(self): # such that they don't fall off # the heatmap sides ticks = ax_list[-1].xaxis.get_major_ticks() - ticks[0].label1.set_horizontalalignment('left') - ticks[-1].label1.set_horizontalalignment('right') + ticks[0].label1.set_horizontalalignment("left") + ticks[-1].label1.set_horizontalalignment("right") cax = self.fig.add_subplot(sub_grid[:, 1]) self.fig.colorbar(img, cax=cax) @@ -357,11 +389,13 @@ def plot_hexbin(self): def plotly_hexbin(self): """plot_hexbin, but for plotly. it's annoying that we have to have sub-subplots""" fig = go.Figure() - cols = self.plots_per_row if self.numplots > self.plots_per_row else self.numplots + cols = ( + self.plots_per_row if self.numplots > self.plots_per_row else self.numplots + ) rows = np.ceil(self.numplots / float(cols)).astype(int) - fig['layout'].update(title=self.plot_title) - domainWidth = .9 / cols - domainHeight = .9 / rows + fig["layout"].update(title=self.plot_title) + domainWidth = 0.9 / cols + domainHeight = 0.9 / rows bufferHeight = 0.0 if rows > 1: bufferHeight = 0.1 / (rows - 1) @@ -397,9 +431,21 @@ def plotly_hexbin(self): base = col * (domainWidth + bufferWidth) domain = [base, base + domainWidth] titleX = base + 0.5 * domainWidth - xanchor = 'x{}'.format(i + 1) - fig['layout']['xaxis{}'.format(i + 1)] = dict(domain=domain) - annos.append({'yanchor': 'bottom', 'xref': 'paper', 'xanchor': 'center', 'yref': 'paper', 'text': title, 'y': titleY, 'x': titleX, 'font': {'size': 16}, 'showarrow': False}) + xanchor = "x{}".format(i + 1) + fig["layout"]["xaxis{}".format(i + 1)] = dict(domain=domain) + annos.append( + { + "yanchor": "bottom", + "xref": "paper", + "xanchor": "center", + "yref": "paper", + "text": title, + "y": titleY, + "x": titleX, + "font": {"size": 16}, + "showarrow": False, + } + ) # set yMin/yMax yMin = np.inf @@ -411,7 +457,7 @@ def plotly_hexbin(self): else: _row, _col = j, i - ma = self.hm.matrix.get_matrix(_row, _col)['matrix'] + ma = self.hm.matrix.get_matrix(_row, _col)["matrix"] if np.min(ma) < yMin: yMin = np.min(ma) if np.max(ma) > yMax: @@ -427,30 +473,53 @@ def plotly_hexbin(self): else: _row, _col = j, i foo = i * self.numlines + j + 1 - yanchor = 'y{}'.format(foo) + yanchor = "y{}".format(foo) base = row * (domainHeight + bufferHeight) + j * subHeight domain = [base, base + subHeight] - fig['layout']['yaxis{}'.format(foo)] = {'domain': domain, 'title': self.y_axis_label, 'anchor': xanchor, 'range': [yMin, yMax]} + fig["layout"]["yaxis{}".format(foo)] = { + "domain": domain, + "title": self.y_axis_label, + "anchor": xanchor, + "range": [yMin, yMax], + } if j == 0: _ = "xaxis{}".format(xanchor[1:]) - fig['layout'][_].update(anchor='y{}'.format(foo)) + fig["layout"][_].update(anchor="y{}".format(foo)) if col == 0: titleY = base + 0.5 * subHeight - annos.append({'yanchor': 'middle', 'xref': 'paper', 'xanchor': 'left', 'yref': 'paper', 'text': sideLabels[j], 'y': titleY, 'x': -0.03, 'font': {'size': 16}, 'showarrow': False, 'textangle': -90}) + annos.append( + { + "yanchor": "middle", + "xref": "paper", + "xanchor": "left", + "yref": "paper", + "text": sideLabels[j], + "y": titleY, + "x": -0.03, + "font": {"size": 16}, + "showarrow": False, + "textangle": -90, + } + ) sub_matrix = self.hm.matrix.get_matrix(_row, _col) - ma = self.hm.matrix.get_matrix(_row, _col)['matrix'] + ma = self.hm.matrix.get_matrix(_row, _col)["matrix"] - fig['layout']['xaxis{}'.format(i + 1)].update(range=[0, ma.shape[1]]) + fig["layout"]["xaxis{}".format(i + 1)].update(range=[0, ma.shape[1]]) if self.per_group: - label = sub_matrix['sample'] + label = sub_matrix["sample"] else: - label = sub_matrix['group'] + label = sub_matrix["group"] # Manually compute the 2D histogram with 100x100 bins x_values = np.tile(np.arange(ma.shape[1]), (ma.shape[0], 1)) - z, xe, ye = np.histogram2d(x_values.flatten(), ma.flatten(), bins=100, range=[[0, ma.shape[1]], [yMin, yMax]]) + z, xe, ye = np.histogram2d( + x_values.flatten(), + ma.flatten(), + bins=100, + range=[[0, ma.shape[1]], [yMin, yMax]], + ) _vmin = np.min(z) _vmax = np.max(z) @@ -459,7 +528,15 @@ def plotly_hexbin(self): if _vmax > vmax: vmax = _vmax - trace = go.Contour(z=z.T, x=xe, y=ye, xaxis=xanchor, yaxis=yanchor, name=label, connectgaps=False) + trace = go.Contour( + z=z.T, + x=xe, + y=ye, + xaxis=xanchor, + yaxis=yanchor, + name=label, + connectgaps=False, + ) data.append(trace) # Assume the bounds for the last graph are correct @@ -471,18 +548,25 @@ def plotly_hexbin(self): else: xticks_use = xticks xticks_use = [np.ceil(x) for x in xticks_use] - fig['layout']['xaxis{}'.format(i + 1)].update(tickmode='array', tickvals=xticks_use, ticktext=xtickslabel, tickangle=self.label_rotation) + fig["layout"]["xaxis{}".format(i + 1)].update( + tickmode="array", + tickvals=xticks_use, + ticktext=xtickslabel, + tickangle=self.label_rotation, + ) for trace in data: trace.update(zmin=vmin, zmax=vmax) fig.add_traces(data) - fig['layout']['annotations'] = annos + fig["layout"]["annotations"] = annos py.plot(fig, filename=self.out_file_name, auto_open=False) def plot_heatmap(self): - cmap = ['RdYlBu_r'] - if self.color_list is not None: # check the length to be equal to the numebr of plots otherwise multiply it! + cmap = ["RdYlBu_r"] + if ( + self.color_list is not None + ): # check the length to be equal to the numebr of plots otherwise multiply it! cmap = self.color_list if len(cmap) < self.numplots: all_colors = cmap @@ -517,18 +601,20 @@ def plot_heatmap(self): localYMax = None # split the ax to make room for the colorbar - sub_grid = gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=self.grids[row, col], - width_ratios=[0.92, 0.08], wspace=0.05) + sub_grid = gridspec.GridSpecFromSubplotSpec( + 1, + 2, + subplot_spec=self.grids[row, col], + width_ratios=[0.92, 0.08], + wspace=0.05, + ) ax = self.fig.add_subplot(sub_grid[0]) cax = self.fig.add_subplot(sub_grid[1]) ax.tick_params( - axis='y', - which='both', - left=False, - right=False, - labelleft=True) + axis="y", which="both", left=False, right=False, labelleft=True + ) if self.per_group: title = self.hm.matrix.group_labels[plot] @@ -551,13 +637,23 @@ def plot_heatmap(self): sub_matrix = self.hm.matrix.get_matrix(row, col) if self.per_group: - label = sub_matrix['sample'] + label = sub_matrix["sample"] else: - label = sub_matrix['group'] + label = sub_matrix["group"] labels.append(label) - mat.append(np.ma.__getattribute__(self.averagetype)(sub_matrix['matrix'], axis=0)) - img = ax.imshow(np.vstack(mat), interpolation='nearest', - cmap=cmap[plot], aspect='auto', vmin=localYMin, vmax=localYMax) + mat.append( + np.ma.__getattribute__(self.averagetype)( + sub_matrix["matrix"], axis=0 + ) + ) + img = ax.imshow( + np.vstack(mat), + interpolation="nearest", + cmap=cmap[plot], + aspect="auto", + vmin=localYMin, + vmax=localYMax, + ) self.fig.colorbar(img, cax=cax) totalWidth = np.vstack(mat).shape[1] @@ -573,12 +669,14 @@ def plot_heatmap(self): # such that they don't fall off # the heatmap sides ticks = ax.xaxis.get_major_ticks() - ticks[0].label1.set_horizontalalignment('left') - ticks[-1].label1.set_horizontalalignment('right') + ticks[0].label1.set_horizontalalignment("left") + ticks[-1].label1.set_horizontalalignment("right") # add labels as y ticks labels ymin, ymax = ax.axes.get_ylim() - pos, distance = np.linspace(ymin, ymax, len(labels), retstep=True, endpoint=False) + pos, distance = np.linspace( + ymin, ymax, len(labels), retstep=True, endpoint=False + ) d_half = float(distance) / 2 yticks = [x + d_half for x in pos] @@ -602,11 +700,13 @@ def plot_heatmap(self): def plotly_heatmap(self): """plot_heatmap, but with plotly output""" fig = go.Figure() - cols = self.plots_per_row if self.numplots > self.plots_per_row else self.numplots + cols = ( + self.plots_per_row if self.numplots > self.plots_per_row else self.numplots + ) rows = np.ceil(self.numplots / float(cols)).astype(int) - fig['layout'].update(title=self.plot_title) - domainWidth = .9 / cols - domainHeight = .9 / rows + fig["layout"].update(title=self.plot_title) + domainWidth = 0.9 / cols + domainHeight = 0.9 / rows bufferHeight = 0.0 if rows > 1: bufferHeight = 0.1 / (rows - 1) @@ -634,17 +734,36 @@ def plotly_heatmap(self): base = row * (domainHeight + bufferHeight) domain = [base, base + domainHeight] titleY = base + domainHeight - xanchor = 'x{}'.format(i + 1) - yanchor = 'y{}'.format(i + 1) + xanchor = "x{}".format(i + 1) + yanchor = "y{}".format(i + 1) visible = False if col == 0: visible = True - fig['layout']['yaxis{}'.format(i + 1)] = {'domain': domain, 'anchor': xanchor, 'visible': visible} + fig["layout"]["yaxis{}".format(i + 1)] = { + "domain": domain, + "anchor": xanchor, + "visible": visible, + } base = col * (domainWidth + bufferWidth) domain = [base, base + domainWidth] titleX = base + 0.5 * domainWidth - fig['layout']['xaxis{}'.format(i + 1)] = {'domain': domain, 'anchor': yanchor} - annos.append({'yanchor': 'bottom', 'xref': 'paper', 'xanchor': 'center', 'yref': 'paper', 'text': title, 'y': titleY, 'x': titleX, 'font': {'size': 16}, 'showarrow': False}) + fig["layout"]["xaxis{}".format(i + 1)] = { + "domain": domain, + "anchor": yanchor, + } + annos.append( + { + "yanchor": "bottom", + "xref": "paper", + "xanchor": "center", + "yref": "paper", + "text": title, + "y": titleY, + "x": titleX, + "font": {"size": 16}, + "showarrow": False, + } + ) mat = [] labels = [] @@ -657,17 +776,28 @@ def plotly_heatmap(self): sub_matrix = self.hm.matrix.get_matrix(row, col) if self.per_group: - label = sub_matrix['sample'] + label = sub_matrix["sample"] else: - label = sub_matrix['group'] + label = sub_matrix["group"] labels.append(label) - mat.append(np.ma.__getattribute__(self.averagetype)(sub_matrix['matrix'], axis=0)) + mat.append( + np.ma.__getattribute__(self.averagetype)( + sub_matrix["matrix"], axis=0 + ) + ) if np.min(mat[-1]) < zmin: zmin = np.min(mat[-1]) if np.max(mat[-1]) > zmax: zmax = np.max(mat[-1]) totalWidth = len(mat[-1]) - trace = go.Heatmap(name=title, z=mat, x=range(totalWidth + 1), y=labels, xaxis=xanchor, yaxis=yanchor) + trace = go.Heatmap( + name=title, + z=mat, + x=range(totalWidth + 1), + y=labels, + xaxis=xanchor, + yaxis=yanchor, + ) data.append(trace) # Add ticks @@ -678,7 +808,12 @@ def plotly_heatmap(self): else: xticks_use = xticks xticks_use = [np.ceil(x) for x in xticks_use] - fig['layout']['xaxis{}'.format(i + 1)].update(tickmode='array', tickvals=xticks_use, ticktext=xtickslabel, tickangle=self.label_rotation) + fig["layout"]["xaxis{}".format(i + 1)].update( + tickmode="array", + tickvals=xticks_use, + ticktext=xtickslabel, + tickangle=self.label_rotation, + ) # Adjust color scale limits for i, trace in enumerate(data): @@ -691,7 +826,7 @@ def plotly_heatmap(self): trace.update(zmin=zminUse, zmax=zmaxUse) fig.add_traces(data) - fig['layout']['annotations'] = annos + fig["layout"]["annotations"] = annos py.plot(fig, filename=self.out_file_name, auto_open=False) def plot_profile(self): @@ -701,21 +836,30 @@ def plot_profile(self): self.y_max = [None] if not self.color_list: - cmap_plot = plt.get_cmap('jet') + cmap_plot = plt.get_cmap("jet") if self.numlines > 1: # kmeans, so we need to color by cluster - self.color_list = cmap_plot(np.arange(self.numlines, dtype=float) / float(self.numlines)) + self.color_list = cmap_plot( + np.arange(self.numlines, dtype=float) / float(self.numlines) + ) else: - self.color_list = cmap_plot(np.arange(self.numplots, dtype=float) / float(self.numplots)) - if (self.numlines > 1 and len(self.color_list) < self.numlines) or\ - (self.numlines == 1 and len(self.color_list) < self.numplots): - sys.exit("\nThe given list of colors is too small, " - "at least {} colors are needed\n".format(self.numlines)) + self.color_list = cmap_plot( + np.arange(self.numplots, dtype=float) / float(self.numplots) + ) + if (self.numlines > 1 and len(self.color_list) < self.numlines) or ( + self.numlines == 1 and len(self.color_list) < self.numplots + ): + sys.exit( + "\nThe given list of colors is too small, " + "at least {} colors are needed\n".format(self.numlines) + ) for color in self.color_list: if not pltcolors.is_color_like(color): - sys.exit("\nThe color name {} is not valid. Check " - "the name or try with a html hex string " - "for example #eeff22".format(color)) + sys.exit( + "\nThe color name {} is not valid. Check " + "the name or try with a html hex string " + "for example #eeff22".format(color) + ) if self.image_format == "plotly": return self.plotly_profile() @@ -762,12 +906,14 @@ def plot_profile(self): if len(self.repgrplist) >= nsamptmp: thisrepgrp = self.repgrplist[data_idx] else: - thisrepgrp = repgrplistuniq[int(data_idx / (nsamptmp/self.numlines))] + thisrepgrp = repgrplistuniq[ + int(data_idx / (nsamptmp / self.numlines)) + ] try: repgrp_samp_dict[thisrepgrp].append(data_idx) except: - repgrp_samp_dict[thisrepgrp] = [ data_idx ] - + repgrp_samp_dict[thisrepgrp] = [data_idx] + if debug: set_trace() for irepgrp, repgrp in enumerate(repgrplistuniq): @@ -775,46 +921,58 @@ def plot_profile(self): for data_idx in repgrp_samp_dict[repgrp]: _row, _col = plot, data_idx sub_matrix = self.hm.matrix.get_matrix(_row, _col) - sub_matrix_list.append(sub_matrix['matrix']) - + sub_matrix_list.append(sub_matrix["matrix"]) + label = f"{repgrp}(n={len(repgrp_samp_dict[repgrp])})" - + if self.numlines > 1: coloridx = irepgrp else: coloridx = plot - plot_single(ax, sub_matrix_list, - self.averagetype, - self.color_list[coloridx], - label, - plot_type=self.plot_type) + plot_single( + ax, + sub_matrix_list, + self.averagetype, + self.color_list[coloridx], + label, + plot_type=self.plot_type, + ) else: for data_idx in range(self.numlines): if self.per_group: _row, _col = plot, data_idx else: _row, _col = data_idx, plot - if localYMin is None or self.y_min[col % len(self.y_min)] < localYMin: + if ( + localYMin is None + or self.y_min[col % len(self.y_min)] < localYMin + ): localYMin = self.y_min[col % len(self.y_min)] - if localYMax is None or self.y_max[col % len(self.y_max)] > localYMax: + if ( + localYMax is None + or self.y_max[col % len(self.y_max)] > localYMax + ): localYMax = self.y_max[col % len(self.y_max)] - + sub_matrix = self.hm.matrix.get_matrix(_row, _col) - + if self.per_group: - label = sub_matrix['sample'] + label = sub_matrix["sample"] else: - label = sub_matrix['group'] - + label = sub_matrix["group"] + if self.numlines > 1: coloridx = data_idx else: coloridx = plot - plot_single(ax, sub_matrix['matrix'], - self.averagetype, - self.color_list[coloridx], - label, - plot_type=self.plot_type) + plot_single( + ax, + sub_matrix["matrix"], + self.averagetype, + self.color_list[coloridx], + label, + plot_type=self.plot_type, + ) globalYmin = min(np.float64(globalYmin), ax.get_ylim()[0]) globalYmax = max(globalYmax, ax.get_ylim()[1]) @@ -822,7 +980,7 @@ def plot_profile(self): if col > 0 and len(self.y_min) == 1 and len(self.y_max) == 1: plt.setp(ax.get_yticklabels(), visible=False) - totalWidth = sub_matrix['matrix'].shape[1] + totalWidth = sub_matrix["matrix"].shape[1] xticks, xtickslabel = self.getTicks(tickIdx) if np.ceil(max(xticks)) != float(totalWidth - 1): tickscale = float(totalWidth) / max(xticks) @@ -835,15 +993,19 @@ def plot_profile(self): # such that they don't fall off # the heatmap sides ticks = ax.xaxis.get_major_ticks() - ticks[0].label1.set_horizontalalignment('left') - ticks[-1].label1.set_horizontalalignment('right') + ticks[0].label1.set_horizontalalignment("left") + ticks[-1].label1.set_horizontalalignment("right") - if first and self.y_axis_label != '': + if first and self.y_axis_label != "": ax.set_ylabel(self.y_axis_label) - if first and self.plot_type not in ['heatmap', 'overlapped_lines']: - ax.legend(loc=self.legend_location.replace('-', ' '), - ncol=1, prop=self.font_p, - frameon=False, markerscale=0.5) + if first and self.plot_type not in ["heatmap", "overlapped_lines"]: + ax.legend( + loc=self.legend_location.replace("-", " "), + ncol=1, + prop=self.font_p, + frameon=False, + markerscale=0.5, + ) if len(self.y_min) == 1 and len(self.y_max) == 1: first = False ax_list.append(ax) @@ -876,11 +1038,13 @@ def plotly_profile(self): y_min, y_max, and color_list are set already """ fig = go.Figure() - cols = self.plots_per_row if self.numplots > self.plots_per_row else self.numplots + cols = ( + self.plots_per_row if self.numplots > self.plots_per_row else self.numplots + ) rows = np.ceil(self.numplots / float(cols)).astype(int) - fig['layout'].update(title=self.plot_title) - domainWidth = .9 / cols - domainHeight = .9 / rows + fig["layout"].update(title=self.plot_title) + domainWidth = 0.9 / cols + domainHeight = 0.9 / rows bufferHeight = 0.0 if rows > 1: bufferHeight = 0.1 / (rows - 1) @@ -896,22 +1060,42 @@ def plotly_profile(self): row = np.floor(i / self.plots_per_row) # row = rows - i / self.plots_per_row - 1 col = i % self.plots_per_row - xanchor = 'x{}'.format(i + 1) - yanchor = 'y{}'.format(i + 1) + xanchor = "x{}".format(i + 1) + yanchor = "y{}".format(i + 1) base = row * (domainHeight + bufferHeight) domain = [base, base + domainHeight] titleY = base + domainHeight - fig['layout']['yaxis{}'.format(i + 1)] = {'domain': domain, 'title': self.y_axis_label, 'anchor': xanchor, 'autorange': False} + fig["layout"]["yaxis{}".format(i + 1)] = { + "domain": domain, + "title": self.y_axis_label, + "anchor": xanchor, + "autorange": False, + } base = col * (domainWidth + bufferWidth) domain = [base, base + domainWidth] titleX = base + 0.5 * domainWidth - fig['layout']['xaxis{}'.format(i + 1)] = {'domain': domain, 'anchor': yanchor} + fig["layout"]["xaxis{}".format(i + 1)] = { + "domain": domain, + "anchor": yanchor, + } if self.per_group: title = self.hm.matrix.group_labels[i] else: title = self.hm.matrix.sample_labels[i] - annos.append({'yanchor': 'bottom', 'xref': 'paper', 'xanchor': 'center', 'yref': 'paper', 'text': title, 'y': titleY, 'x': titleX, 'font': {'size': 16}, 'showarrow': False}) + annos.append( + { + "yanchor": "bottom", + "xref": "paper", + "xanchor": "center", + "yref": "paper", + "text": title, + "y": titleY, + "x": titleX, + "font": {"size": 16}, + "showarrow": False, + } + ) for j in range(self.numlines): if self.per_group: @@ -920,33 +1104,37 @@ def plotly_profile(self): _row, _col = j, i sub_matrix = self.hm.matrix.get_matrix(_row, _col) - fig['layout']['xaxis{}'.format(i + 1)].update(range=[0, sub_matrix['matrix'].shape[1]]) + fig["layout"]["xaxis{}".format(i + 1)].update( + range=[0, sub_matrix["matrix"].shape[1]] + ) if self.per_group: - label = sub_matrix['sample'] + label = sub_matrix["sample"] else: - label = sub_matrix['group'] + label = sub_matrix["group"] if self.numlines > 1: coloridx = j else: coloridx = i color = self.color_list[coloridx] - traces = plotly_single(sub_matrix['matrix'], - self.averagetype, - color, - label, - plot_type=self.plot_type) + traces = plotly_single( + sub_matrix["matrix"], + self.averagetype, + color, + label, + plot_type=self.plot_type, + ) for trace in traces: trace.update(xaxis=xanchor, yaxis=yanchor) - if yMin is None or min(trace['y']) < yMin: - yMin = min(trace['y']) - if yMax is None or max(trace['y']) > yMax: - yMax = max(trace['y']) + if yMin is None or min(trace["y"]) < yMin: + yMin = min(trace["y"]) + if yMax is None or max(trace["y"]) > yMax: + yMax = max(trace["y"]) if row == col == 0: traces[0].update(showlegend=True) data.extend(traces) - totalWidth = sub_matrix['matrix'].shape[1] + totalWidth = sub_matrix["matrix"].shape[1] xticks, xtickslabel = self.getTicks(i) if np.ceil(max(xticks)) != float(totalWidth): tickscale = float(totalWidth) / max(xticks) @@ -954,20 +1142,25 @@ def plotly_profile(self): else: xticks_use = xticks xticks_use = [np.ceil(x) for x in xticks_use] - fig['layout']['xaxis{}'.format(i + 1)].update(tickmode='array', tickvals=xticks_use, ticktext=xtickslabel, tickangle=self.label_rotation) + fig["layout"]["xaxis{}".format(i + 1)].update( + tickmode="array", + tickvals=xticks_use, + ticktext=xtickslabel, + tickangle=self.label_rotation, + ) # Set the y limits for i in range(self.numplots): - yaxis = 'yaxis{}'.format(i + 1) + yaxis = "yaxis{}".format(i + 1) yRange = [yMin, yMax] if self.y_min[i % len(self.y_min)] is not None: yRange[0] = self.y_min[i % len(self.y_min)] if self.y_max[i % len(self.y_max)] is not None: yRange[1] = self.y_max[i % len(self.y_max)] - fig['layout'][yaxis].update(range=yRange) + fig["layout"][yaxis].update(range=yRange) fig.add_traces(data) - fig['layout']['annotations'] = annos + fig["layout"]["annotations"] = annos py.plot(fig, filename=self.out_file_name, auto_open=False) @@ -978,21 +1171,40 @@ def main(args=None): args.matrixFile.close() hm.read_matrix_file(matrix_file) - if hm.parameters['min threshold'] is not None or hm.parameters['max threshold'] is not None: - filterHeatmapValues(hm, hm.parameters['min threshold'], hm.parameters['max threshold']) + if ( + hm.parameters["min threshold"] is not None + or hm.parameters["max threshold"] is not None + ): + filterHeatmapValues( + hm, hm.parameters["min threshold"], hm.parameters["max threshold"] + ) if args.kmeans is not None: - hm.matrix.hmcluster(args.kmeans, method='kmeans', clustering_samples=args.clusterUsingSamples) + hm.matrix.hmcluster( + args.kmeans, method="kmeans", clustering_samples=args.clusterUsingSamples + ) else: if args.hclust is not None: - print("Performing hierarchical clustering." - "Please note that it might be very slow for large datasets.\n") - hm.matrix.hmcluster(args.hclust, method='hierarchical', clustering_samples=args.clusterUsingSamples) - - group_len_ratio = np.diff(hm.matrix.group_boundaries) / float(len(hm.matrix.regions)) + print( + "Performing hierarchical clustering." + "Please note that it might be very slow for large datasets.\n" + ) + hm.matrix.hmcluster( + args.hclust, + method="hierarchical", + clustering_samples=args.clusterUsingSamples, + ) + + group_len_ratio = np.diff(hm.matrix.group_boundaries) / float( + len(hm.matrix.regions) + ) if np.any(group_len_ratio < 5.0 / 1000): problem = np.flatnonzero(group_len_ratio < 5.0 / 1000) - sys.stderr.write("WARNING: Group '{}' is too small for plotting, you might want to remove it. \n".format(hm.matrix.group_labels[problem[0]])) + sys.stderr.write( + "WARNING: Group '{}' is too small for plotting, you might want to remove it. \n".format( + hm.matrix.group_labels[problem[0]] + ) + ) if args.regionsLabel: hm.matrix.set_group_labels(args.regionsLabel) @@ -1001,37 +1213,44 @@ def main(args=None): hm.matrix.set_sample_labels(args.samplesLabel) if args.outFileNameData: - hm.save_tabulated_values(args.outFileNameData, reference_point_label=args.refPointLabel, - start_label=args.startLabel, - end_label=args.endLabel, - averagetype=args.averageType) + hm.save_tabulated_values( + args.outFileNameData, + reference_point_label=args.refPointLabel, + start_label=args.startLabel, + end_label=args.endLabel, + averagetype=args.averageType, + ) if args.outFileSortedRegions: hm.save_BED(args.outFileSortedRegions) - prof = Profile(hm, args.outFileName, - plot_title=args.plotTitle, - y_axis_label=args.yAxisLabel, - y_min=args.yMin, y_max=args.yMax, - averagetype=args.averageType, - reference_point_label=args.refPointLabel, - start_label=args.startLabel, - end_label=args.endLabel, - plot_height=args.plotHeight, - plot_width=args.plotWidth, - per_group=args.perGroup, - repgrplist=args.repgrplist, - plot_type=args.plotType, - image_format=args.plotFileFormat, - color_list=args.colors, - legend_location=args.legendLocation, - plots_per_row=args.numPlotsPerRow, - label_rotation=args.label_rotation, - dpi=args.dpi) - - if args.plotType == 'heatmap': + prof = Profile( + hm, + args.outFileName, + plot_title=args.plotTitle, + y_axis_label=args.yAxisLabel, + y_min=args.yMin, + y_max=args.yMax, + averagetype=args.averageType, + reference_point_label=args.refPointLabel, + start_label=args.startLabel, + end_label=args.endLabel, + plot_height=args.plotHeight, + plot_width=args.plotWidth, + per_group=args.perGroup, + repgrplist=args.repgrplist, + plot_type=args.plotType, + image_format=args.plotFileFormat, + color_list=args.colors, + legend_location=args.legendLocation, + plots_per_row=args.numPlotsPerRow, + label_rotation=args.label_rotation, + dpi=args.dpi, + ) + + if args.plotType == "heatmap": prof.plot_heatmap() - elif args.plotType == 'overlapped_lines': + elif args.plotType == "overlapped_lines": prof.plot_hexbin() else: prof.plot_profile() From b0e47072dc5886afbbb95882fdfcf5b90c38e8af Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Wed, 24 Sep 2025 15:04:03 -0500 Subject: [PATCH 14/18] flake8 lint --- .github/workflows/test.yml | 4 ++++ deeptools/computeGCBias.py | 17 +++++------------ deeptools/correctGCBias.py | 37 +++++++++++++++++-------------------- deeptools/heatmapper.py | 2 +- deeptools/plotHeatmap.py | 27 ++++++++++++++------------- deeptools/plotProfile.py | 17 +++++++++-------- 6 files changed, 50 insertions(+), 54 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 201633d0f5..7ee61fbe5a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -45,6 +45,10 @@ jobs: with: python-version: '3.12' cache: 'pip' + - uses: mamba-org/setup-micromamba@v2 + with: + # Any version from https://github.com/mamba-org/micromamba-releases + micromamba-version: '2.0.5-0' - name: pip install run: | pip install .[actions] diff --git a/deeptools/computeGCBias.py b/deeptools/computeGCBias.py index 3732e1bc58..7209b51007 100755 --- a/deeptools/computeGCBias.py +++ b/deeptools/computeGCBias.py @@ -212,8 +212,8 @@ def getPositionsToSample(chrom, start, end, stepSize): if len(out_match) > 0: for intval in out_match: positions_to_sample = positions_to_sample[ - (positions_to_sample < intval[0]) - | (positions_to_sample >= intval[1]) + (positions_to_sample < intval[0]) | + (positions_to_sample >= intval[1]) ] return positions_to_sample @@ -488,8 +488,6 @@ def tabulateGCcontent( [ 0. , 0. , 1. ], [ 0. , 0. , 1. ]]) """ - global global_vars - chrNameBamToBit = dict([(v, k) for k, v in chrNameBitToBam.items()]) chunkSize = int(min(2e6, 4e5 / global_vars["reads_per_bp"])) chromSizes = [(k, v) for k, v in chromSizes if k in list(chrNameBamToBit.keys())] @@ -550,8 +548,6 @@ def countReadsPerGC( [134. , 0.43666667], [134. , 0.44 ]]) """ - global global_vars - chrNameBamToBit = dict([(v, k) for k, v in chrNameBitToBam.items()]) chunkSize = int(min(2e6, 4e5 / global_vars["reads_per_bp"])) @@ -588,7 +584,7 @@ def smooth(x, window_len=3): if i < half_width or i + half_width + 1 > len(x): continue else: - y[i] = np.mean(x[i - half_width : i + half_width + 1]) + y[i] = np.mean(x[i - half_width: i + half_width + 1]) # clip low values, this avoid problems with zeros return y @@ -760,8 +756,8 @@ def plotGCbias(file_name, frequencies, reads_per_gc, region_size, image_format=N ax2.set_xlabel("GC fraction") ax2.set_ylabel("log2ratio observed/expected") ax2.set_xlim(0.2, 0.7) - y_max = max(y[np.where(x >= 0.2)[0][0] : np.where(x <= 0.7)[0][-1] + 1]) - y_min = min(y[np.where(x >= 0.2)[0][0] : np.where(x <= 0.7)[0][-1] + 1]) + y_max = max(y[np.where(x >= 0.2)[0][0]: np.where(x <= 0.7)[0][-1] + 1]) + y_min = min(y[np.where(x >= 0.2)[0][0]: np.where(x <= 0.7)[0][-1] + 1]) if y_max > 0: y_max *= 1.1 else: @@ -929,15 +925,12 @@ def testTabulateGCcontentWorker(self): return (self.chrNameBam, start, end, stepSize, fragmentLength, chrNameBam2bit) def set_filter_out_file(self): - global global_vars global_vars["filter_out"] = self.root + "filter_out.bed" def unset_filter_out_file(self): - global global_vars global_vars["filter_out"] = None def set_extra_sampling_file(self): - global global_vars global_vars["extra_sampling_file"] = self.root + "extra_sampling.bed" def testTabulateGCcontent(self): diff --git a/deeptools/correctGCBias.py b/deeptools/correctGCBias.py index 3b1e4e7247..048bf4a64c 100755 --- a/deeptools/correctGCBias.py +++ b/deeptools/correctGCBias.py @@ -190,7 +190,6 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step): ['chr2L\t200\t225\t31.6\n', 'chr2L\t225\t250\t33.8\n', 'chr2L\t250\t275\t37.9\n', 'chr2L\t275\t300\t40.9\n'] >>> os.remove(tempFile) """ - global R_gc fragmentLength = len(R_gc) - 1 cvg_corr = np.zeros(end - start) @@ -229,10 +228,10 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step): # is this read in the same orientation and position as the previous? if ( - r_index > 0 - and read.pos == reads[r_index - 1].pos - and read.is_reverse == reads[r_index - 1].is_reverse - and read.pnext == reads[r_index - 1].pnext + r_index > 0 and + read.pos == reads[r_index - 1].pos and + read.is_reverse == reads[r_index - 1].is_reverse and + read.pnext == reads[r_index - 1].pnext ): read_repetitions += 1 if read_repetitions >= global_vars["max_dup_gc"][gc]: @@ -278,7 +277,7 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step): _file = open(utilities.getTempFileName(suffix=".bg"), "w") # save in bedgraph format for bin in range(0, len(cvg_corr), step): - value = np.mean(cvg_corr[bin : min(bin + step, end)]) + value = np.mean(cvg_corr[bin: min(bin + step, end)]) if value > 0: writeStart = start + bin writeEnd = min(start + bin + step, end) @@ -358,7 +357,6 @@ def writeCorrectedSam_worker( >>> res = os.remove(tempFile) >>> res = os.remove(tempFile+".bai") """ - global R_gc fragmentLength = len(R_gc) - 1 if verbose: @@ -411,11 +409,11 @@ def writeCorrectedSam_worker( copies = 1 # is this read in the same orientation and position as the previous? if ( - gc - and r_index > 0 - and read.pos == reads[r_index - 1].pos - and read.is_reverse == reads[r_index - 1].is_reverse - and read.pnext == reads[r_index - 1].pnext + gc and + r_index > 0 and + read.pos == reads[r_index - 1].pos and + read.is_reverse == reads[r_index - 1].is_reverse and + read.pnext == reads[r_index - 1].pnext ): read_repetitions += 1 if read_repetitions >= global_vars["max_dup_gc"][gc]: @@ -454,10 +452,10 @@ def writeCorrectedSam_worker( read.set_tags(readTag) if ( - read.is_paired - and read.is_proper_pair - and not read.mate_is_unmapped - and not read.is_reverse + read.is_paired and + read.is_proper_pair and + not read.mate_is_unmapped and + not read.is_reverse ): matePairs[readName] = {"copies": copies, "gc": gc} @@ -767,7 +765,7 @@ def __init__(self): def testWriteCorrectedChunk(self): """prepare arguments for test""" - global R_gc, R_gc_min, R_gc_max + global R_gc R_gc = np.loadtxt(self.root + "R_gc_paired.txt") global_vars["max_dup_gc"] = np.ones(301) @@ -779,7 +777,7 @@ def testWriteCorrectedChunk(self): def testWriteCorrectedSam(self): """prepare arguments for test""" - global R_gc, R_gc_min, R_gc_max + global R_gc R_gc = np.loadtxt(self.root + "R_gc_paired.txt") global_vars["max_dup_gc"] = np.ones(301) @@ -790,12 +788,11 @@ def testWriteCorrectedSam(self): def testWriteCorrectedSam_paired(self): """prepare arguments for test.""" - global R_gc, R_gc_min, R_gc_max + global R_gc R_gc = np.loadtxt(self.root + "R_gc_paired.txt") start = 0 end = 500 - global global_vars global_vars["bam"] = self.root + "paired.bam" return "chr2L", "chr2L", start, end diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index c1cf5ea1f9..941e91f17f 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -1309,7 +1309,7 @@ def hmcluster(self, k, evaluate_silhouette=True, method='kmeans', clustering_sam cluster_number = 1 for cluster in cluster_order: cluster_ids = _cluster_ids_list[cluster] - self.group_labels.append("cluster_{}_n{}".format(cluster_number,len(cluster_ids))) + self.group_labels.append("cluster_{}_n{}".format(cluster_number, len(cluster_ids))) cluster_number += 1 self.group_boundaries.append(self.group_boundaries[-1] + len(cluster_ids)) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index 163078dbe1..1bf66a15d5 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -15,7 +15,9 @@ import matplotlib.gridspec as gridspec from matplotlib import ticker -import sys, re, os, copy +import sys +import re +import copy import plotly.offline as py import plotly.graph_objs as go @@ -423,9 +425,9 @@ def plotlyMatrix( mat = hm.matrix.get_matrix(j, i) lengths.append(mat["matrix"].shape[0]) fractionalHeights = ( - heatmapHeight - * np.cumsum(lengths).astype(float) - / np.sum(lengths).astype(float) + heatmapHeight * + np.cumsum(lengths).astype(float) / + np.sum(lengths).astype(float) ) xDomain = [xBase, xBase + heatmapWidth] fig["layout"]["xaxis{}".format(xAxisN)] = dict( @@ -739,20 +741,19 @@ def plotMatrix( _len = sum([x[1] - x[0] for x in ind_reg[1]]) if hm.parameters["ref point"][idx] == "TSS": _reg_len.append( - (hm.parameters["upstream"][idx] + _len) - / hm.parameters["bin size"][idx] + (hm.parameters["upstream"][idx] + _len) / + hm.parameters["bin size"][idx] ) elif hm.parameters["ref point"][idx] == "center": _len *= 0.5 _reg_len.append( - (hm.parameters["upstream"][idx] + _len) - / hm.parameters["bin size"][idx] + (hm.parameters["upstream"][idx] + _len) / + hm.parameters["bin size"][idx] ) elif hm.parameters["ref point"][idx] == "TES": _reg_len.append( - (hm.parameters["upstream"][idx] - _len) - / hm.parameters["bin size"][idx] - ) + (hm.parameters["upstream"][idx] - _len) / + hm.parameters["bin size"][idx]) foo.append(_reg_len) regions_length_in_bins[idx] = foo @@ -1048,8 +1049,8 @@ def main(args=None): hm.read_matrix_file(matrix_file) if ( - hm.parameters["min threshold"] is not None - or hm.parameters["max threshold"] is not None + hm.parameters["min threshold"] is not None or + hm.parameters["max threshold"] is not None ): filterHeatmapValues( hm, hm.parameters["min threshold"], hm.parameters["max threshold"] diff --git a/deeptools/plotProfile.py b/deeptools/plotProfile.py index 0526fe483c..e3672f75d3 100755 --- a/deeptools/plotProfile.py +++ b/deeptools/plotProfile.py @@ -2,7 +2,8 @@ # -*- coding: utf-8 -*- -import sys, os, re +import sys +import re import argparse import numpy as np @@ -899,7 +900,7 @@ def plot_profile(self): repgrp_samp_dict = {} repgrplistuniq = [] for tmp in self.repgrplist: - if not tmp in repgrplistuniq: + if tmp not in repgrplistuniq: repgrplistuniq.append(tmp) for data_idx in range(nsamptmp): @@ -944,13 +945,13 @@ def plot_profile(self): else: _row, _col = data_idx, plot if ( - localYMin is None - or self.y_min[col % len(self.y_min)] < localYMin + localYMin is None or + self.y_min[col % len(self.y_min)] < localYMin ): localYMin = self.y_min[col % len(self.y_min)] if ( - localYMax is None - or self.y_max[col % len(self.y_max)] > localYMax + localYMax is None or + self.y_max[col % len(self.y_max)] > localYMax ): localYMax = self.y_max[col % len(self.y_max)] @@ -1172,8 +1173,8 @@ def main(args=None): hm.read_matrix_file(matrix_file) if ( - hm.parameters["min threshold"] is not None - or hm.parameters["max threshold"] is not None + hm.parameters["min threshold"] is not None or + hm.parameters["max threshold"] is not None ): filterHeatmapValues( hm, hm.parameters["min threshold"], hm.parameters["max threshold"] From 5cf1e395c69777c1ff15b4dbbea55f2ddd3a5aac Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Thu, 25 Sep 2025 13:54:14 -0500 Subject: [PATCH 15/18] debug from numpy --- deeptools/computeMatrixOperations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deeptools/computeMatrixOperations.py b/deeptools/computeMatrixOperations.py index 0224f00a39..aca60e7024 100755 --- a/deeptools/computeMatrixOperations.py +++ b/deeptools/computeMatrixOperations.py @@ -7,7 +7,7 @@ import os import csv from importlib.metadata import version - +import warnings def parse_arguments(): parser = argparse.ArgumentParser( @@ -427,7 +427,7 @@ def filterHeatmapValues(hm, minVal, maxVal): minVal = -np.inf if maxVal is None: maxVal = np.inf - np.warnings.filterwarnings('ignore') + warnings.filterwarnings('ignore') for i, (x, y) in enumerate(zip(np.nanmin(hm.matrix.matrix, axis=1), np.nanmax(hm.matrix.matrix, axis=1))): # x/y will be nan iff a row is entirely nan. Don't filter. if np.isnan(x) or (x >= minVal and y <= maxVal): From f9f9e3b048f891541f177a8193d1051e66e3ca1a Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Thu, 25 Sep 2025 13:59:52 -0500 Subject: [PATCH 16/18] pep8 --- deeptools/computeMatrixOperations.py | 1 + 1 file changed, 1 insertion(+) diff --git a/deeptools/computeMatrixOperations.py b/deeptools/computeMatrixOperations.py index aca60e7024..8e03070521 100755 --- a/deeptools/computeMatrixOperations.py +++ b/deeptools/computeMatrixOperations.py @@ -9,6 +9,7 @@ from importlib.metadata import version import warnings + def parse_arguments(): parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, From baea09b0560e85e3b2be3069883e965cdaa93518 Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Thu, 25 Sep 2025 14:08:05 -0500 Subject: [PATCH 17/18] update workflow --- .github/workflows/test.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7ee61fbe5a..201633d0f5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -45,10 +45,6 @@ jobs: with: python-version: '3.12' cache: 'pip' - - uses: mamba-org/setup-micromamba@v2 - with: - # Any version from https://github.com/mamba-org/micromamba-releases - micromamba-version: '2.0.5-0' - name: pip install run: | pip install .[actions] From 3e15ce32ccbb32b4c3d69cc7966fa262a46f9968 Mon Sep 17 00:00:00 2001 From: "Xu, Beisi" Date: Tue, 7 Oct 2025 11:59:41 -0500 Subject: [PATCH 18/18] debug for new deeptools,matplotlib version --- deeptools/plotHeatmap.py | 792 ++++++++++++++------------------------- 1 file changed, 272 insertions(+), 520 deletions(-) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index 1bf66a15d5..761a9750de 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -6,18 +6,15 @@ from collections import OrderedDict import numpy as np import matplotlib - -matplotlib.use("Agg") -matplotlib.rcParams["pdf.fonttype"] = 42 -matplotlib.rcParams["svg.fonttype"] = "none" +matplotlib.use('Agg') +matplotlib.rcParams['pdf.fonttype'] = 42 +matplotlib.rcParams['svg.fonttype'] = 'none' import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties import matplotlib.gridspec as gridspec from matplotlib import ticker - -import sys -import re import copy +import sys import plotly.offline as py import plotly.graph_objs as go @@ -30,26 +27,26 @@ from deeptools.computeMatrixOperations import filterHeatmapValues debug = 0 -old_settings = np.seterr(all="ignore") +old_settings = np.seterr(all='ignore') plt.ioff() +import re +from pdb import set_trace def parse_arguments(args=None): parser = argparse.ArgumentParser( - parents=[ - parserCommon.heatmapperMatrixArgs(), - parserCommon.heatmapperOutputArgs(mode="heatmap"), - parserCommon.heatmapperOptionalArgs(mode="heatmap"), - ], + parents=[parserCommon.heatmapperMatrixArgs(), + parserCommon.heatmapperOutputArgs(mode='heatmap'), + parserCommon.heatmapperOptionalArgs(mode='heatmap')], formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description="This tool creates a heatmap for " - "scores associated with genomic regions. " - "The program requires a matrix file " - "generated by the tool ``computeMatrix``.", - epilog="An example usage is: plotHeatmap -m matrix.gz", - usage="plotHeatmap -m matrix.gz\n" "help: plotHeatmap -h / plotHeatmap --help", - add_help=False, - ) + description='This tool creates a heatmap for ' + 'scores associated with genomic regions. ' + 'The program requires a matrix file ' + 'generated by the tool ``computeMatrix``.', + epilog='An example usage is: plotHeatmap -m matrix.gz', + usage='plotHeatmap -m matrix.gz\n' + 'help: plotHeatmap -h / plotHeatmap --help', + add_help=False) return parser @@ -57,33 +54,17 @@ def parse_arguments(args=None): def process_args(args=None): args = parse_arguments().parse_args(args) - args.heatmapHeight = ( - args.heatmapHeight - if args.heatmapHeight > 3 and args.heatmapHeight <= 100 - else 10 - ) + args.heatmapHeight = args.heatmapHeight if args.heatmapHeight > 3 and args.heatmapHeight <= 100 else 10 if not matplotlib.colors.is_color_like(args.missingDataColor): - exit( - "The value {0} for --missingDataColor is not valid".format( - args.missingDataColor - ) - ) + exit("The value {0} for --missingDataColor is not valid".format(args.missingDataColor)) - args.boxAroundHeatmaps = True if args.boxAroundHeatmaps == "yes" else False + args.boxAroundHeatmaps = True if args.boxAroundHeatmaps == 'yes' else False return args -def prepare_layout( - hm_matrix, - heatmapsize, - showSummaryPlot, - showColorbar, - perGroup, - colorbar_position, - fig, -): +def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGroup, colorbar_position, fig): """ prepare the plot layout as a grid having as many rows @@ -101,9 +82,7 @@ def prepare_layout( # on the number of regions contained in the if perGroup: # heatmap - height_ratio = np.array( - [np.amax(np.diff(hm_matrix.group_boundaries))] * numrows - ) + height_ratio = np.array([np.amax(np.diff(hm_matrix.group_boundaries))] * numrows) # scale ratio to sum = heatmapheight height_ratio = heatmapheight * (height_ratio.astype(float) / height_ratio.sum()) else: @@ -118,7 +97,7 @@ def prepare_layout( width_ratio = [heatmapwidth] * numcols if showColorbar: - if colorbar_position == "below": + if colorbar_position == 'below': numrows += 2 # a spacer needs to be added to avoid overlaps height_ratio += [4 / 2.54] # spacer height_ratio += [1 / 2.54] @@ -131,23 +110,16 @@ def prepare_layout( # make height of summary plot # proportional to the width of heatmap sumplot_height = heatmapwidth - spacer_height = heatmapwidth / 8 + spacer_height = heatmapwidth / 24 # beisi # scale height_ratios to convert from row # numbers to heatmapheigt fractions height_ratio = np.concatenate([[sumplot_height, spacer_height], height_ratio]) - grids = gridspec.GridSpec( - numrows, - numcols, - height_ratios=height_ratio, - width_ratios=width_ratio, - figure=fig, - ) + grids = gridspec.GridSpec(numrows, numcols, height_ratios=height_ratio, width_ratios=width_ratio, figure=fig) return grids - -def autobreaklinetitle(title, sep="[-_,.:]", lmax=15): +def autobreaklinetitle(title, sep="[-_,.:]", lmax=15): # beisi outsep = "-" sss = [rr for rr in re.split(sep, title) if len(rr)] newtitle, tmp = "", "" @@ -163,38 +135,15 @@ def autobreaklinetitle(title, sep="[-_,.:]", lmax=15): newtitle = "\n" + newtitle return newtitle - -def addProfilePlot( - hm, - plt, - fig, - grids, - iterNum, - iterNum2, - perGroup, - averageType, - plot_type, - yAxisLabel, - color_list, - yMin, - yMax, - wspace, - hspace, - colorbar_position, - label_rotation=0.0, -): +def addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, wspace, hspace, colorbar_position, label_rotation=0.0): """ A function to add profile plots to the given figure, possibly in a custom grid subplot which mimics a tight layout (if wspace and hspace are not None) """ if wspace is not None and hspace is not None: - if colorbar_position == "side": - gridsSub = gridspec.GridSpecFromSubplotSpec( - 1, iterNum, subplot_spec=grids[0, :-1], wspace=wspace, hspace=hspace - ) + if colorbar_position == 'side': + gridsSub = gridspec.GridSpecFromSubplotSpec(1, iterNum, subplot_spec=grids[0, :-1], wspace=wspace, hspace=hspace) else: - gridsSub = gridspec.GridSpecFromSubplotSpec( - 1, iterNum, subplot_spec=grids[0, :], wspace=wspace, hspace=hspace - ) + gridsSub = gridspec.GridSpecFromSubplotSpec(1, iterNum, subplot_spec=grids[0, :], wspace=wspace, hspace=hspace) ax_list = [] globalYmin = np.inf @@ -214,22 +163,19 @@ def addProfilePlot( else: ax_profile = fig.add_subplot(grids[0, sample_id]) - ax_profile.set_title(autobreaklinetitle(title)) + ax_profile.set_title(autobreaklinetitle(title)) # beisi for group in range(iterNum2): if perGroup: sub_matrix = hm.matrix.get_matrix(sample_id, group) - line_label = sub_matrix["sample"] + line_label = sub_matrix['sample'] else: sub_matrix = hm.matrix.get_matrix(group, sample_id) - line_label = sub_matrix["group"] - plot_single( - ax_profile, - sub_matrix["matrix"], - averageType, - color_list[group], - line_label, - plot_type=plot_type, - ) + line_label = sub_matrix['group'] + plot_single(ax_profile, sub_matrix['matrix'], + averageType, + color_list[group], + line_label, + plot_type=plot_type) if sample_id > 0 and len(yMin) == 1 and len(yMax) == 1: plt.setp(ax_profile.get_yticklabels(), visible=False) @@ -238,8 +184,8 @@ def addProfilePlot( if sample_id == 0 and yAxisLabel != "": ax_profile.set_ylabel(yAxisLabel) xticks, xtickslabel = hm.getTicks(tickIdx) - if np.ceil(max(xticks)) != float(sub_matrix["matrix"].shape[1] - 1): - tickscale = float(sub_matrix["matrix"].shape[1] - 1) / max(xticks) + if np.ceil(max(xticks)) != float(sub_matrix['matrix'].shape[1] - 1): + tickscale = float(sub_matrix['matrix'].shape[1] - 1) / max(xticks) xticks_use = [x * tickscale for x in xticks] ax_profile.axes.set_xticks(xticks_use) else: @@ -251,8 +197,8 @@ def addProfilePlot( # such that they don't fall off # the heatmap sides ticks = ax_profile.xaxis.get_major_ticks() - ticks[0].label1.set_horizontalalignment("left") - ticks[-1].label1.set_horizontalalignment("right") + ticks[0].label1.set_horizontalalignment('left') + ticks[-1].label1.set_horizontalalignment('right') globalYmin = min(float(globalYmin), ax_profile.get_ylim()[0]) globalYmax = max(globalYmax, ax_profile.get_ylim()[1]) @@ -275,30 +221,20 @@ def addProfilePlot( return ax_list -def plotlyMatrix( - hm, - outFilename, - yMin=[None], - yMax=[None], - zMin=[None], - zMax=[None], - showSummaryPlot=False, - cmap=None, - colorList=None, - colorBarPosition="side", - perGroup=False, - averageType="median", - yAxisLabel="", - xAxisLabel="", - plotTitle="", - showColorbar=False, - label_rotation=0.0, -): +def plotlyMatrix(hm, + outFilename, + yMin=[None], yMax=[None], + zMin=[None], zMax=[None], + showSummaryPlot=False, + cmap=None, colorList=None, colorBarPosition='side', + perGroup=False, + averageType='median', yAxisLabel='', xAxisLabel='', + plotTitle='', + showColorbar=False, + label_rotation=0.0): label_rotation *= -1.0 - if colorBarPosition != "side": - sys.error.write( - "Warning: It is not currently possible to have multiple colorbars with plotly!\n" - ) + if colorBarPosition != 'side': + sys.error.write("Warning: It is not currently possible to have multiple colorbars with plotly!\n") nRows = hm.matrix.get_num_groups() nCols = hm.matrix.get_num_samples() @@ -310,8 +246,8 @@ def plotlyMatrix( if showSummaryPlot: profileHeight = 0.2 profileBottomBuffer = 0.05 - profileSideBuffer = 0.0 - profileWidth = 1.0 / nCols + profileSideBuffer = 0. + profileWidth = 1. / nCols if nCols > 1: profileSideBuffer = 0.1 / (nCols - 1) profileWidth = 0.9 / nCols @@ -319,7 +255,7 @@ def plotlyMatrix( dataSummary = [] annos = [] fig = go.Figure() - fig["layout"].update(title=plotTitle) + fig['layout'].update(title=plotTitle) xAxisN = 1 yAxisN = 1 @@ -328,8 +264,8 @@ def plotlyMatrix( yMinLocal = np.inf yMaxLocal = -np.inf for i in range(nCols): - xanchor = "x{}".format(xAxisN) - yanchor = "y{}".format(yAxisN) + xanchor = 'x{}'.format(xAxisN) + yanchor = 'y{}'.format(yAxisN) xBase = i * (profileSideBuffer + profileWidth) yBase = 1 - profileHeight xDomain = [xBase, xBase + profileWidth] @@ -338,32 +274,20 @@ def plotlyMatrix( if perGroup: mat = hm.matrix.get_matrix(i, j) xTicks, xTicksLabels = hm.getTicks(i) - label = mat["sample"] + label = mat['sample'] else: mat = hm.matrix.get_matrix(j, i) xTicks, xTicksLabels = hm.getTicks(j) - label = mat["group"] + label = mat['group'] if j == 0: - fig["layout"]["xaxis{}".format(xAxisN)] = dict( - domain=xDomain, - anchor=yanchor, - range=[0, mat["matrix"].shape[1]], - tickmode="array", - tickvals=xTicks, - ticktext=xTicksLabels, - tickangle=label_rotation, - ) - fig["layout"]["yaxis{}".format(yAxisN)] = dict( - anchor=xanchor, domain=yDomain - ) - trace = plotly_single(mat["matrix"], averageType, colorList[j], label)[ - 0 - ] + fig['layout']['xaxis{}'.format(xAxisN)] = dict(domain=xDomain, anchor=yanchor, range=[0, mat['matrix'].shape[1]], tickmode='array', tickvals=xTicks, ticktext=xTicksLabels, tickangle=label_rotation) + fig['layout']['yaxis{}'.format(yAxisN)] = dict(anchor=xanchor, domain=yDomain) + trace = plotly_single(mat['matrix'], averageType, colorList[j], label)[0] trace.update(xaxis=xanchor, yaxis=yanchor, legendgroup=label) - if min(trace["y"]) < yMinLocal: - yMinLocal = min(trace["y"]) - if max(trace["y"]) > yMaxLocal: - yMaxLocal = max(trace["y"]) + if min(trace['y']) < yMinLocal: + yMinLocal = min(trace['y']) + if max(trace['y']) > yMaxLocal: + yMaxLocal = max(trace['y']) if i == 0: trace.update(showlegend=True) dataSummary.append(trace) @@ -374,19 +298,7 @@ def plotlyMatrix( else: title = hm.matrix.sample_labels[i] titleX = xBase + 0.5 * profileWidth - annos.append( - { - "yanchor": "bottom", - "xref": "paper", - "xanchor": "center", - "yref": "paper", - "text": title, - "y": 1.0, - "x": titleX, - "font": {"size": 16}, - "showarrow": False, - } - ) + annos.append({'yanchor': 'bottom', 'xref': 'paper', 'xanchor': 'center', 'yref': 'paper', 'text': title, 'y': 1.0, 'x': titleX, 'font': {'size': 16}, 'showarrow': False}) xAxisN += 1 yAxisN += 1 @@ -398,22 +310,22 @@ def plotlyMatrix( yMaxUse = yMaxLocal if yMax[(i - 1) % len(yMax)] is not None: yMaxUse = yMax[(i - 1) % len(yMax)] - fig["layout"]["yaxis{}".format(i)].update(range=[yMinUse, yMaxUse]) - fig["layout"]["yaxis1"].update(title=yAxisLabel) + fig['layout']['yaxis{}'.format(i)].update(range=[yMinUse, yMaxUse]) + fig['layout']['yaxis1'].update(title=yAxisLabel) # Add the heatmap dataHeatmap = [] zMinLocal = np.inf zMaxLocal = -np.inf - heatmapWidth = 1.0 / nCols + heatmapWidth = 1. / nCols heatmapSideBuffer = 0.0 if nCols > 1: - heatmapWidth = 0.9 / nCols + heatmapWidth = .9 / nCols heatmapSideBuffer = 0.1 / (nCols - 1) heatmapHeight = 1.0 - profileHeight - profileBottomBuffer for i in range(nCols): - xanchor = "x{}".format(xAxisN) + xanchor = 'x{}'.format(xAxisN) xBase = i * (heatmapSideBuffer + heatmapWidth) # Determine the height of each heatmap, they have no buffer @@ -423,78 +335,50 @@ def plotlyMatrix( mat = hm.matrix.get_matrix(i, j) else: mat = hm.matrix.get_matrix(j, i) - lengths.append(mat["matrix"].shape[0]) - fractionalHeights = ( - heatmapHeight * - np.cumsum(lengths).astype(float) / - np.sum(lengths).astype(float) - ) + lengths.append(mat['matrix'].shape[0]) + fractionalHeights = heatmapHeight * np.cumsum(lengths).astype(float) / np.sum(lengths).astype(float) xDomain = [xBase, xBase + heatmapWidth] - fig["layout"]["xaxis{}".format(xAxisN)] = dict( - domain=xDomain, - anchor="free", - position=0.0, - range=[0, mat["matrix"].shape[1]], - tickmode="array", - tickvals=xTicks, - ticktext=xTicksLabels, - title=xAxisLabel, - ) + fig['layout']['xaxis{}'.format(xAxisN)] = dict(domain=xDomain, anchor='free', position=0.0, range=[0, mat['matrix'].shape[1]], tickmode='array', tickvals=xTicks, ticktext=xTicksLabels, title=xAxisLabel) # Start adding the heatmaps for j in range(nRows): if perGroup: mat = hm.matrix.get_matrix(i, j) - label = mat["sample"] + label = mat['sample'] start = hm.matrix.group_boundaries[i] end = hm.matrix.group_boundaries[i + 1] else: mat = hm.matrix.get_matrix(j, i) - label = mat["group"] + label = mat['group'] start = hm.matrix.group_boundaries[j] end = hm.matrix.group_boundaries[j + 1] regs = hm.matrix.regions[start:end] regs = [x[2] for x in regs] - yanchor = "y{}".format(yAxisN) - yDomain = [ - heatmapHeight - fractionalHeights[j + 1], - heatmapHeight - fractionalHeights[j], - ] + yanchor = 'y{}'.format(yAxisN) + yDomain = [heatmapHeight - fractionalHeights[j + 1], heatmapHeight - fractionalHeights[j]] visible = False if i == 0: visible = True - fig["layout"]["yaxis{}".format(yAxisN)] = dict( - domain=yDomain, - anchor=xanchor, - visible=visible, - title=label, - tickmode="array", - tickvals=[], - ticktext=[], - ) - if np.min(mat["matrix"]) < zMinLocal: - zMinLocal = np.min(mat["matrix"]) - if np.max(mat["matrix"]) < zMaxLocal: - zMaxLocal = np.max(mat["matrix"]) - - trace = go.Heatmap( - z=np.flipud(mat["matrix"]), - y=regs[::-1], - xaxis=xanchor, - yaxis=yanchor, - showlegend=False, - name=label, - showscale=False, - ) + fig['layout']['yaxis{}'.format(yAxisN)] = dict(domain=yDomain, anchor=xanchor, visible=visible, title=label, tickmode='array', tickvals=[], ticktext=[]) + if np.min(mat['matrix']) < zMinLocal: + zMinLocal = np.min(mat['matrix']) + if np.max(mat['matrix']) < zMaxLocal: + zMaxLocal = np.max(mat['matrix']) + + trace = go.Heatmap(z=np.flipud(mat['matrix']), + y=regs[::-1], + xaxis=xanchor, + yaxis=yanchor, + showlegend=False, + name=label, + showscale=False) dataHeatmap.append(trace) yAxisN += 1 xAxisN += 1 if showColorbar: dataHeatmap[-1].update(showscale=True) - dataHeatmap[-1]["colorbar"].update( - len=heatmapHeight, y=0, yanchor="bottom", ypad=0.0 - ) + dataHeatmap[-1]['colorbar'].update(len=heatmapHeight, y=0, yanchor='bottom', ypad=0.0) # Adjust z bounds and colorscale for trace in dataHeatmap: @@ -504,49 +388,36 @@ def plotlyMatrix( zMinUse = zMin[0] if zMax[0] is not None: zMaxUse = zMax[0] - trace.update( - zmin=zMinUse, - zmax=zMaxUse, - colorscale=convertCmap(cmap[0], vmin=zMinUse, vmax=zMaxUse), - ) + trace.update(zmin=zMinUse, zmax=zMaxUse, colorscale=convertCmap(cmap[0], vmin=zMinUse, vmax=zMaxUse)) dataSummary.extend(dataHeatmap) fig.add_traces(dataSummary) - fig["layout"]["annotations"] = annos + fig['layout']['annotations'] = annos py.plot(fig, filename=outFilename, auto_open=False) -def plotMatrix( - hm, - outFileName, - colorMapDict={"colorMap": ["binary"], "missingDataColor": "black", "alpha": 1.0}, - plotTitle="", - xAxisLabel="", - yAxisLabel="", - regionsLabel="", - zMin=None, - zMax=None, - yMin=None, - yMax=None, - averageType="median", - reference_point_label=None, - startLabel="TSS", - endLabel="TES", - heatmapHeight=25, - heatmapWidth=7.5, - perGroup=False, - whatToShow="plot, heatmap and colorbar", - plot_type="lines", - linesAtTickMarks=False, - image_format=None, - legend_location="upper-left", - box_around_heatmaps=True, - label_rotation=0.0, - dpi=200, - interpolation_method="auto", -): - - hm.reference_point_label = hm.parameters["ref point"] +def plotMatrix(hm, outFileName, + colorMapDict={'colorMap': ['binary'], 'missingDataColor': 'black', 'alpha': 1.0}, + plotTitle='', + xAxisLabel='', yAxisLabel='', regionsLabel='', + zMin=None, zMax=None, + yMin=None, yMax=None, + averageType='median', + reference_point_label=None, + startLabel='TSS', endLabel="TES", + heatmapHeight=25, + heatmapWidth=7.5, + perGroup=False, whatToShow='plot, heatmap and colorbar', + plot_type='lines', + linesAtTickMarks=False, + image_format=None, + legend_location='upper-left', + box_around_heatmaps=True, + label_rotation=0.0, + dpi=200, + interpolation_method='auto'): + + hm.reference_point_label = hm.parameters['ref point'] if reference_point_label is not None: hm.reference_point_label = [reference_point_label] * hm.matrix.get_num_samples() hm.startLabel = startLabel @@ -561,12 +432,12 @@ def plotMatrix( zMin = [None] else: zMin = [zMin] # convert to list to support multiple entries - elif "auto" in zMin: + elif 'auto' in zMin: matrix_flatten = hm.matrix.flatten() auto_min = np.percentile(matrix_flatten, 1.0) if np.isnan(auto_min): auto_min = None - new_mins = [float(x) if x != "auto" else auto_min for x in zMin] + new_mins = [float(x) if x != 'auto' else auto_min for x in zMin] zMin = new_mins else: new_mins = [float(x) for x in zMin] @@ -581,12 +452,12 @@ def plotMatrix( zMax = [None] else: zMax = [zMax] - elif "auto" in zMax: + elif 'auto' in zMax: matrix_flatten = hm.matrix.flatten() auto_max = np.percentile(matrix_flatten, 98.0) if np.isnan(auto_max): auto_max = None - new_maxs = [float(x) if x != "auto" else auto_max for x in zMax] + new_maxs = [float(x) if x != 'auto' else auto_max for x in zMax] zMax = new_maxs else: new_maxs = [float(x) for x in zMax] @@ -594,11 +465,9 @@ def plotMatrix( if (len(zMin) > 1) & (len(zMax) > 1): for index, value in enumerate(zMax): if value <= zMin[index]: - sys.stderr.write( - "Warnirng: In bigwig {}, the given zmin ({}) is larger than " - "or equal to the given zmax ({}). Thus, it has been set " - "to None. \n".format(index + 1, zMin[index], value) - ) + sys.stderr.write("Warnirng: In bigwig {}, the given zmin ({}) is larger than " + "or equal to the given zmax ({}). Thus, it has been set " + "to None. \n".format(index + 1, zMin[index], value)) zMin[index] = None if yMin is None: @@ -610,51 +479,42 @@ def plotMatrix( if not isinstance(yMax, list): yMax = [yMax] - plt.rcParams["font.size"] = 8.0 + plt.rcParams['font.size'] = 8.0 fontP = FontProperties() showSummaryPlot = False showColorbar = False - if whatToShow == "plot and heatmap": + if whatToShow == 'plot and heatmap': showSummaryPlot = True - elif whatToShow == "heatmap and colorbar": + elif whatToShow == 'heatmap and colorbar': showColorbar = True - elif whatToShow == "plot, heatmap and colorbar": + elif whatToShow == 'plot, heatmap and colorbar': showSummaryPlot = True showColorbar = True # colormap for the heatmap - if colorMapDict["colorMap"]: + if colorMapDict['colorMap']: cmap = [] - for color_map in colorMapDict["colorMap"]: + for color_map in colorMapDict['colorMap']: copy_cmp = copy.copy(plt.get_cmap(color_map)) cmap.append(copy_cmp) - cmap[-1].set_bad( - colorMapDict["missingDataColor"] - ) # nans are printed using this color + cmap[-1].set_bad(colorMapDict['missingDataColor']) # nans are printed using this color - if colorMapDict["colorList"] and len(colorMapDict["colorList"]) > 0: + if colorMapDict['colorList'] and len(colorMapDict['colorList']) > 0: # make a cmap for each color list given cmap = [] - for color_list in colorMapDict["colorList"]: - cmap.append( - matplotlib.colors.LinearSegmentedColormap.from_list( - "my_cmap", - color_list.replace(" ", "").split(","), - N=colorMapDict["colorNumber"], - ) - ) - cmap[-1].set_bad( - colorMapDict["missingDataColor"] - ) # nans are printed using this color + for color_list in colorMapDict['colorList']: + cmap.append(matplotlib.colors.LinearSegmentedColormap.from_list( + 'my_cmap', color_list.replace(' ', '').split(","), N=colorMapDict['colorNumber'])) + cmap[-1].set_bad(colorMapDict['missingDataColor']) # nans are printed using this color if len(cmap) > 1 or len(zMin) > 1 or len(zMax) > 1: # position color bar below heatmap when more than one # heatmap color is given - colorbar_position = "below" + colorbar_position = 'below' else: - colorbar_position = "side" + colorbar_position = 'side' # figsize: w,h tuple in inches figwidth = heatmapWidth / 2.54 @@ -671,12 +531,12 @@ def plotMatrix( num_cols = numsamples total_figwidth = figwidth * num_cols if showColorbar: - if colorbar_position == "below": + if colorbar_position == 'below': figheight += 1 / 2.54 else: total_figwidth += 1 / 2.54 - fig = plt.figure(figsize=(total_figwidth, figheight), constrained_layout=True) + fig = plt.figure(figsize=(total_figwidth, figheight), constrained_layout=True) #beisi fig.suptitle(plotTitle, y=1 - (0.06 / figheight)) grids = prepare_layout( @@ -686,47 +546,36 @@ def plotMatrix( showColorbar, perGroup, colorbar_position, - fig, + fig ) # color map for the summary plot (profile) on top of the heatmap - cmap_plot = plt.get_cmap("jet") + cmap_plot = plt.get_cmap('jet') numgroups = hm.matrix.get_num_groups() if perGroup: - color_list = cmap_plot( - np.arange(hm.matrix.get_num_samples()) / hm.matrix.get_num_samples() - ) + color_list = cmap_plot(np.arange(hm.matrix.get_num_samples()) / hm.matrix.get_num_samples()) else: color_list = cmap_plot(np.arange(numgroups) / numgroups) - alpha = colorMapDict["alpha"] - if image_format == "plotly": - return plotlyMatrix( - hm, - outFileName, - yMin=yMin, - yMax=yMax, - zMin=zMin, - zMax=zMax, - showSummaryPlot=showSummaryPlot, - showColorbar=showColorbar, - cmap=cmap, - colorList=color_list, - colorBarPosition=colorbar_position, - perGroup=perGroup, - averageType=averageType, - plotTitle=plotTitle, - xAxisLabel=xAxisLabel, - yAxisLabel=yAxisLabel, - label_rotation=label_rotation, - ) + alpha = colorMapDict['alpha'] + if image_format == 'plotly': + return plotlyMatrix(hm, + outFileName, + yMin=yMin, yMax=yMax, + zMin=zMin, zMax=zMax, + showSummaryPlot=showSummaryPlot, showColorbar=showColorbar, + cmap=cmap, colorList=color_list, colorBarPosition=colorbar_position, + perGroup=perGroup, + averageType=averageType, plotTitle=plotTitle, + xAxisLabel=xAxisLabel, yAxisLabel=yAxisLabel, + label_rotation=label_rotation) # check if matrix is reference-point based using the upstream >0 value # and is sorted by region length. If this is # the case, prepare the data to plot a border at the regions end - regions_length_in_bins = [None] * len(hm.parameters["upstream"]) - if hm.matrix.sort_using == "region_length" and hm.matrix.sort_method != "no": - for idx in range(len(hm.parameters["upstream"])): - if hm.parameters["ref point"][idx] is None: + regions_length_in_bins = [None] * len(hm.parameters['upstream']) + if hm.matrix.sort_using == 'region_length' and hm.matrix.sort_method != 'no': + for idx in range(len(hm.parameters['upstream'])): + if hm.parameters['ref point'][idx] is None: regions_length_in_bins[idx] = None continue @@ -736,24 +585,16 @@ def plotMatrix( _reg_len = [] for ind_reg in _group: if isinstance(ind_reg, dict): - _len = ind_reg["end"] - ind_reg["start"] + _len = ind_reg['end'] - ind_reg['start'] else: _len = sum([x[1] - x[0] for x in ind_reg[1]]) - if hm.parameters["ref point"][idx] == "TSS": - _reg_len.append( - (hm.parameters["upstream"][idx] + _len) / - hm.parameters["bin size"][idx] - ) - elif hm.parameters["ref point"][idx] == "center": + if hm.parameters['ref point'][idx] == 'TSS': + _reg_len.append((hm.parameters['upstream'][idx] + _len) / hm.parameters['bin size'][idx]) + elif hm.parameters['ref point'][idx] == 'center': _len *= 0.5 - _reg_len.append( - (hm.parameters["upstream"][idx] + _len) / - hm.parameters["bin size"][idx] - ) - elif hm.parameters["ref point"][idx] == "TES": - _reg_len.append( - (hm.parameters["upstream"][idx] - _len) / - hm.parameters["bin size"][idx]) + _reg_len.append((hm.parameters['upstream'][idx] + _len) / hm.parameters['bin size'][idx]) + elif hm.parameters['ref point'][idx] == 'TES': + _reg_len.append((hm.parameters['upstream'][idx] - _len) / hm.parameters['bin size'][idx]) foo.append(_reg_len) regions_length_in_bins[idx] = foo @@ -765,44 +606,11 @@ def plotMatrix( else: iterNum = hm.matrix.get_num_samples() iterNum2 = numgroups - ax_list = addProfilePlot( - hm, - plt, - fig, - grids, - iterNum, - iterNum2, - perGroup, - averageType, - plot_type, - yAxisLabel, - color_list, - yMin, - yMax, - None, - None, - colorbar_position, - label_rotation, - ) - - if legend_location != "none": - ax = ax_list[-1] # beisi - box = ax.get_position() - ax.set_position( - [box.x0, box.y0 - box.height * 0.1, box.width, box.height * 0.9] - ) - legend = ax.legend( - loc="lower right", - shadow=False, - fontsize="x-large", - bbox_to_anchor=(0, 1.3, 1, 0.22), - ncol=10, - frameon=False, - prop=fontP, - ) # beisi, legend line - ax.add_artist(legend) - # ax_list[-1].legend(loc=legend_location.replace('-', ' '), ncol=1, prop=fontP, - # frameon=False, markerscale=0.5) + ax_list = addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, None, None, colorbar_position, label_rotation) + + if legend_location != "none": #beisi start + legend = ax_list[-1].legend(loc='lower right', shadow=False, fontsize='x-large', bbox_to_anchor=(0.75, 1.35, 0.5, .50), ncol=10, frameon=False, prop=fontP) + legend.set_in_layout(False) #beisi end first_group = 0 # helper variable to place the title per sample/group for sample in range(hm.matrix.get_num_samples()): @@ -838,41 +646,37 @@ def plotMatrix( if group == first_group and not showSummaryPlot and not perGroup: title = hm.matrix.sample_labels[sample] - ax.set_title(autobreaklinetitle(title)) + ax.set_title(autobreaklinetitle(title)) # beisi if box_around_heatmaps is False: # Turn off the boxes around the individual heatmaps - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - ax.spines["bottom"].set_visible(False) - ax.spines["left"].set_visible(False) - rows, cols = sub_matrix["matrix"].shape + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.spines['bottom'].set_visible(False) + ax.spines['left'].set_visible(False) + rows, cols = sub_matrix['matrix'].shape # if the number of rows is too large, then the 'nearest' method simply # drops rows. A better solution is to relate the threshold to the DPI of the image - if interpolation_method == "auto": + if interpolation_method == 'auto': if rows >= 1000: - interpolation_method = "bilinear" + interpolation_method = 'bilinear' else: - interpolation_method = "nearest" + interpolation_method = 'nearest' # if np.clip is not used, then values of the matrix that exceed the zmax limit are # highlighted. Usually, a significant amount of pixels are equal or above the zmax and # the default behaviour produces images full of large highlighted dots. # If interpolation='nearest' is used, this has no effect - sub_matrix["matrix"] = np.clip( - sub_matrix["matrix"], zMin[zmin_idx], zMax[zmax_idx] - ) - img = ax.imshow( - sub_matrix["matrix"], - aspect="auto", - interpolation=interpolation_method, - origin="upper", - vmin=zMin[zmin_idx], - vmax=zMax[zmax_idx], - cmap=cmap[cmap_idx], - alpha=alpha, - extent=[0, cols, rows, 0], - ) + sub_matrix['matrix'] = np.clip(sub_matrix['matrix'], zMin[zmin_idx], zMax[zmax_idx]) + img = ax.imshow(sub_matrix['matrix'], + aspect='auto', + interpolation=interpolation_method, + origin='upper', + vmin=zMin[zmin_idx], + vmax=zMax[zmax_idx], + cmap=cmap[cmap_idx], + alpha=alpha, + extent=[0, cols, rows, 0]) img.set_rasterized(True) # plot border at the end of the regions # if ordered by length @@ -880,19 +684,14 @@ def plotMatrix( x_lim = ax.get_xlim() y_lim = ax.get_ylim() - ax.plot( - regions_length_in_bins[sample][group_idx], - np.arange(len(regions_length_in_bins[sample][group_idx])), - "--", - color="black", - linewidth=0.5, - dashes=(3, 2), - ) + ax.plot(regions_length_in_bins[sample][group_idx], + np.arange(len(regions_length_in_bins[sample][group_idx])), + '--', color='black', linewidth=0.5, dashes=(3, 2)) ax.set_xlim(x_lim) ax.set_ylim(y_lim) if perGroup: - ax.axes.set_xlabel(sub_matrix["group"]) + ax.axes.set_xlabel(sub_matrix['group']) if sample < hm.matrix.get_num_samples() - 1: ax.axes.get_xaxis().set_visible(False) else: @@ -900,45 +699,32 @@ def plotMatrix( ax.axes.set_xlabel(xAxisLabel) ax.axes.set_yticks([]) if perGroup and group == 0: - ax.axes.set_ylabel( - sub_matrix["sample"], rotation=75, labelpad=0, fontsize=15 - ) + ax.axes.set_ylabel(sub_matrix['sample']) elif not perGroup and sample == 0: - ax.axes.set_ylabel( - sub_matrix["group"], - rotation=75, - labelpad=0, - horizontalalignment="right", - fontsize=15, - ) + ax.axes.set_ylabel(sub_matrix['group']) # Plot vertical lines at tick marks if desired if linesAtTickMarks: xticks_heat, xtickslabel_heat = hm.getTicks(sample) - xticks_heat = [ - x + 0.5 for x in xticks_heat - ] # There's an offset of 0.5 compared to the profile plot - if np.ceil(max(xticks_heat)) != float(sub_matrix["matrix"].shape[1]): - tickscale = float(sub_matrix["matrix"].shape[1]) / max(xticks_heat) + xticks_heat = [x + 0.5 for x in xticks_heat] # There's an offset of 0.5 compared to the profile plot + if np.ceil(max(xticks_heat)) != float(sub_matrix['matrix'].shape[1]): + tickscale = float(sub_matrix['matrix'].shape[1]) / max(xticks_heat) xticks_heat_use = [x * tickscale for x in xticks_heat] else: xticks_heat_use = xticks_heat for x in xticks_heat_use: - ax.axvline(x=x, color="black", linewidth=0.5, dashes=(3, 2)) + ax.axvline(x=x, color='black', linewidth=0.5, dashes=(3, 2)) # add labels to last block in a column - if (perGroup and sample == numsamples - 1) or ( - not perGroup and group_idx == numgroups - 1 - ): + if (perGroup and sample == numsamples - 1) or \ + (not perGroup and group_idx == numgroups - 1): # add xticks to the bottom heatmap (last group) ax.axes.get_xaxis().set_visible(True) xticks_heat, xtickslabel_heat = hm.getTicks(sample) - xticks_heat = [ - x + 0.5 for x in xticks_heat - ] # There's an offset of 0.5 compared to the profile plot - if np.ceil(max(xticks_heat)) != float(sub_matrix["matrix"].shape[1]): - tickscale = float(sub_matrix["matrix"].shape[1]) / max(xticks_heat) + xticks_heat = [x + 0.5 for x in xticks_heat] # There's an offset of 0.5 compared to the profile plot + if np.ceil(max(xticks_heat)) != float(sub_matrix['matrix'].shape[1]): + tickscale = float(sub_matrix['matrix'].shape[1]) / max(xticks_heat) xticks_heat_use = [x * tickscale for x in xticks_heat] ax.axes.set_xticks(xticks_heat_use) else: @@ -949,12 +735,15 @@ def plotMatrix( # such that they don't fall off # the heatmap sides ticks = ax.xaxis.get_major_ticks() - ticks[0].label1.set_horizontalalignment("left") - ticks[-1].label1.set_horizontalalignment("right") + ticks[0].label1.set_horizontalalignment('left') + ticks[-1].label1.set_horizontalalignment('right') - ax.get_xaxis().set_tick_params(which="both", top=False, direction="out") + ax.get_xaxis().set_tick_params( + which='both', + top=False, + direction='out') - if showColorbar and colorbar_position == "below": + if showColorbar and colorbar_position == 'below': # draw a colormap per each heatmap below the last block if perGroup: col = group_idx @@ -962,24 +751,22 @@ def plotMatrix( col = sample ax = fig.add_subplot(grids[-1, col]) tick_locator = ticker.MaxNLocator(nbins=3) - cbar = fig.colorbar( - img, cax=ax, orientation="horizontal", ticks=tick_locator - ) + cbar = fig.colorbar(img, cax=ax, orientation='horizontal', ticks=tick_locator) labels = cbar.ax.get_xticklabels() ticks = cbar.ax.get_xticks() if ticks[0] == 0: # if the label is at the start of the colobar # move it a bit inside to avoid overlapping # with other labels - labels[0].set_horizontalalignment("left") + labels[0].set_horizontalalignment('left') if ticks[-1] == 1: # if the label is at the end of the colobar # move it a bit inside to avoid overlapping # with other labels - labels[-1].set_horizontalalignment("right") + labels[-1].set_horizontalalignment('right') # cbar.ax.set_xticklabels(labels, rotation=90) - if showColorbar and colorbar_position != "below": + if showColorbar and colorbar_position != 'below': if showSummaryPlot: # we don't want to colorbar to extend # over the profiles and spacer top rows @@ -991,18 +778,12 @@ def plotMatrix( fig.colorbar(img, cax=ax) if box_around_heatmaps: - fig.get_layout_engine().set( - wspace=0.10, hspace=0.025, rect=(0.04, 0, 0.96, 0.85) - ) + fig.get_layout_engine().set(w_pad=0.04,h_pad=0.01,wspace=0, hspace=0, rect=(0.04, 0, 0.96, 0.95)) else: # When no box is plotted the space between heatmaps is reduced - fig.get_layout_engine().set( - wspace=0.05, hspace=0.01, rect=(0.04, 0, 0.96, 0.85) - ) + fig.get_layout_engine().set(wspace=0.05, hspace=0.01, rect=(0.04, 0, 0.96, 0.85)) - plt.savefig( - outFileName, bbox_inches="tight", pad_inches=0.1, dpi=dpi, format=image_format - ) + plt.savefig(outFileName, bbox_inches='tight', pad_inches=0.1, dpi=dpi, format=image_format) plt.close() @@ -1021,7 +802,8 @@ def mergeSmallGroups(matrixDict): if len(to_merge): to_merge.append(label) new_label = " ".join(to_merge) - new_ma = np.concatenate([matrixDict[item] for item in to_merge], axis=0) + new_ma = np.concatenate([matrixDict[item] + for item in to_merge], axis=0) else: new_label = label new_ma = matrixDict[label] @@ -1048,39 +830,24 @@ def main(args=None): args.matrixFile.close() hm.read_matrix_file(matrix_file) - if ( - hm.parameters["min threshold"] is not None or - hm.parameters["max threshold"] is not None - ): - filterHeatmapValues( - hm, hm.parameters["min threshold"], hm.parameters["max threshold"] - ) + if hm.parameters['min threshold'] is not None or hm.parameters['max threshold'] is not None: + filterHeatmapValues(hm, hm.parameters['min threshold'], hm.parameters['max threshold']) - if args.sortRegions == "keep": - args.sortRegions = "no" # These are the same thing + if args.sortRegions == 'keep': + args.sortRegions = 'no' # These are the same thing if args.kmeans is not None: - hm.matrix.hmcluster( - args.kmeans, method="kmeans", clustering_samples=args.clusterUsingSamples - ) + hm.matrix.hmcluster(args.kmeans, method='kmeans', clustering_samples=args.clusterUsingSamples) elif args.hclust is not None: - print( - "Performing hierarchical clustering." - "Please note that it might be very slow for large datasets.\n" - ) - hm.matrix.hmcluster( - args.hclust, - method="hierarchical", - clustering_samples=args.clusterUsingSamples, - ) + print("Performing hierarchical clustering." + "Please note that it might be very slow for large datasets.\n") + hm.matrix.hmcluster(args.hclust, method='hierarchical', clustering_samples=args.clusterUsingSamples) group_len_ratio = np.diff(hm.matrix.group_boundaries) / len(hm.matrix.regions) if np.any(group_len_ratio < 5.0 / 1000): problem = np.flatnonzero(group_len_ratio < 5.0 / 1000) - sys.stderr.write( - "WARNING: Group '{}' is too small for plotting, you might want to remove it. " - "There will likely be an error message from matplotlib regarding this " - "below.\n".format(hm.matrix.group_labels[problem[0]]) - ) + sys.stderr.write("WARNING: Group '{}' is too small for plotting, you might want to remove it. " + "There will likely be an error message from matplotlib regarding this " + "below.\n".format(hm.matrix.group_labels[problem[0]])) if args.regionsLabel: hm.matrix.set_group_labels(args.regionsLabel) @@ -1088,25 +855,19 @@ def main(args=None): if args.samplesLabel and len(args.samplesLabel): hm.matrix.set_sample_labels(args.samplesLabel) - if args.sortRegions != "no": + if args.sortRegions != 'no': sortUsingSamples = [] if args.sortUsingSamples is not None: for i in args.sortUsingSamples: - if i > 0 and i <= hm.matrix.get_num_samples(): + if (i > 0 and i <= hm.matrix.get_num_samples()): sortUsingSamples.append(i - 1) else: - exit( - "The value {0} for --sortSamples is not valid. Only values from 1 to {1} are allowed.".format( - args.sortUsingSamples, hm.matrix.get_num_samples() - ) - ) - print("Samples used for ordering within each group: ", sortUsingSamples) - - hm.matrix.sort_groups( - sort_using=args.sortUsing, - sort_method=args.sortRegions, - sample_list=sortUsingSamples, - ) + exit("The value {0} for --sortSamples is not valid. Only values from 1 to {1} are allowed.".format(args.sortUsingSamples, hm.matrix.get_num_samples())) + print('Samples used for ordering within each group: ', sortUsingSamples) + + hm.matrix.sort_groups(sort_using=args.sortUsing, + sort_method=args.sortRegions, + sample_list=sortUsingSamples) if args.silhouette: if args.kmeans is not None: @@ -1120,40 +881,31 @@ def main(args=None): if args.outFileSortedRegions: hm.save_BED(args.outFileSortedRegions) - colormap_dict = { - "colorMap": args.colorMap, - "colorList": args.colorList, - "colorNumber": args.colorNumber, - "missingDataColor": args.missingDataColor, - "alpha": args.alpha, - } - - plotMatrix( - hm, - args.outFileName, - colormap_dict, - args.plotTitle, - args.xAxisLabel, - args.yAxisLabel, - args.regionsLabel, - args.zMin, - args.zMax, - args.yMin, - args.yMax, - args.averageTypeSummaryPlot, - args.refPointLabel, - args.startLabel, - args.endLabel, - args.heatmapHeight, - args.heatmapWidth, - args.perGroup, - args.whatToShow, - linesAtTickMarks=args.linesAtTickMarks, - plot_type=args.plotType, - image_format=args.plotFileFormat, - legend_location=args.legendLocation, - box_around_heatmaps=args.boxAroundHeatmaps, - label_rotation=args.label_rotation, - dpi=args.dpi, - interpolation_method=args.interpolationMethod, - ) + colormap_dict = {'colorMap': args.colorMap, + 'colorList': args.colorList, + 'colorNumber': args.colorNumber, + 'missingDataColor': args.missingDataColor, + 'alpha': args.alpha} + + plotMatrix(hm, + args.outFileName, + colormap_dict, args.plotTitle, + args.xAxisLabel, args.yAxisLabel, args.regionsLabel, + args.zMin, args.zMax, + args.yMin, args.yMax, + args.averageTypeSummaryPlot, + args.refPointLabel, + args.startLabel, + args.endLabel, + args.heatmapHeight, + args.heatmapWidth, + args.perGroup, + args.whatToShow, + linesAtTickMarks=args.linesAtTickMarks, + plot_type=args.plotType, + image_format=args.plotFileFormat, + legend_location=args.legendLocation, + box_around_heatmaps=args.boxAroundHeatmaps, + label_rotation=args.label_rotation, + dpi=args.dpi, + interpolation_method=args.interpolationMethod)