From 52fddd6fc05aeaac3422411e444de10a8d0e6e31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Crist=C3=B3bal=20Gallardo?= Date: Mon, 28 Feb 2022 19:26:42 +0100 Subject: [PATCH 1/6] Fix label options --- galaxy/wrapper/multiBigwigSummary.xml | 26 ++-- .../test-data/multiBigwigSummary_result1.npz | Bin 404 -> 465 bytes .../test-data/multiBigwigSummary_result2.npz | 121 ++++++++++++++++++ .../multiBigwigSummary_result2.tabular | 121 ++++++++++++++++++ 4 files changed, 256 insertions(+), 12 deletions(-) create mode 100644 galaxy/wrapper/test-data/multiBigwigSummary_result2.npz create mode 100644 galaxy/wrapper/test-data/multiBigwigSummary_result2.tabular diff --git a/galaxy/wrapper/multiBigwigSummary.xml b/galaxy/wrapper/multiBigwigSummary.xml index 6d929f04f2..e4fe7a6631 100644 --- a/galaxy/wrapper/multiBigwigSummary.xml +++ b/galaxy/wrapper/multiBigwigSummary.xml @@ -1,4 +1,4 @@ - + calculates average scores for a list of two or more bigwig files multiBigwigSummary @@ -11,7 +11,6 @@ #set labels=[] @multiple_input_bigwigs@ - @BINARY@ $mode.modeOpt @@ -20,8 +19,10 @@ --outFileName $outFile --bwfiles #echo ' '.join($files)# - --labels #echo ' '.join($labels)# - + + #if $custom_sample_labels_conditional.custom_labels_select == 'Yes' + --labels #echo ' '.join($custom_sample_labels_conditional.labels)# + #end if #if $outRawCounts: --outRawCounts '$outFileRawCounts' #end if @@ -70,7 +71,6 @@ help="Correlation is computed for the number of reads that overlap such regions."/> - @@ -91,18 +91,20 @@ - - + + HnV zTf+2Hs?Ckfipy=571vg-6ehj4g^V&EF3Z0Od>Lr4iLsrz?pM35mT!K;yts=m*)zUq zNJJ%={Ox6W#J~`M;#gh}s{=_uXFLF6kYjTalTvevVU7iff&fSb2w-=ta8||=bk|NQ zKy|H~71*UunAZuhv9*OL=&>aUMDuq{ZrCBy@wo8@gKy7-DrXZzpz9c!M3`{}Cd3sW ovVjpqB7zoOAF97VY9XM3aV?O6@MVBED;r3L2?z^-v^UsX06vU*Qvd(} literal 404 zcmWIWW@Zs#U|`^2;F@0RwRB2UnlF$S2gF=JoRgT8np3QoS5P_ojrc2Rfs-d591B=G zZ&K8pm?e2j7sLtYtXsEu!MthlAwg4Sh|iCoGKov5-205CA8VI*x{BBmrk_%7RV=0h{@L+&9D;r3T2?z^-G#?{~2LRDoc?kdj diff --git a/galaxy/wrapper/test-data/multiBigwigSummary_result2.npz b/galaxy/wrapper/test-data/multiBigwigSummary_result2.npz new file mode 100644 index 0000000000..471e3e00c0 --- /dev/null +++ b/galaxy/wrapper/test-data/multiBigwigSummary_result2.npz @@ -0,0 +1,121 @@ +#'chr' 'start' 'end' 'sample1' 'sample2' +ch1 0 10 0.0 0.0 +ch1 10 20 0.0 0.0 +ch1 20 30 0.0 0.0 +ch1 30 40 0.0 0.0 +ch1 40 50 0.0 0.0 +ch1 50 60 0.0 0.0 +ch1 60 70 0.0 0.0 +ch1 70 80 0.0 0.0 +ch1 80 90 0.0 0.0 +ch1 90 100 0.0 0.0 +ch1 100 110 2.0 2.0 +ch1 110 120 2.0 2.0 +ch1 120 130 1.0 1.0 +ch1 130 140 0.0 0.0 +ch1 140 150 0.0 0.0 +ch1 150 160 0.0 0.0 +ch1 160 170 0.0 0.0 +ch1 170 180 0.0 0.0 +ch1 180 190 0.0 0.0 +ch1 190 200 0.0 0.0 +ch1 200 210 0.0 0.0 +ch1 210 220 0.0 0.0 +ch1 220 230 0.0 0.0 +ch1 230 240 0.0 0.0 +ch1 240 250 0.0 0.0 +ch1 250 260 0.0 0.0 +ch1 260 270 0.0 0.0 +ch1 270 280 0.0 0.0 +ch1 280 290 0.0 0.0 +ch1 290 300 0.0 0.0 +ch1 300 310 0.0 0.0 +ch1 310 320 0.0 0.0 +ch1 320 330 0.0 0.0 +ch1 330 340 0.0 0.0 +ch1 340 350 0.0 0.0 +ch1 350 360 0.0 0.0 +ch1 360 370 0.0 0.0 +ch1 370 380 0.0 0.0 +ch1 380 390 0.0 0.0 +ch1 390 400 0.0 0.0 +ch2 0 10 0.0 0.0 +ch2 10 20 0.0 0.0 +ch2 20 30 0.0 0.0 +ch2 30 40 0.0 0.0 +ch2 40 50 0.0 0.0 +ch2 50 60 3.0 3.0 +ch2 60 70 3.0 3.0 +ch2 70 80 1.5 1.5 +ch2 80 90 0.0 0.0 +ch2 90 100 0.0 0.0 +ch2 100 110 0.0 0.0 +ch2 110 120 0.0 0.0 +ch2 120 130 0.0 0.0 +ch2 130 140 0.0 0.0 +ch2 140 150 0.0 0.0 +ch2 150 160 1.0 1.0 +ch2 160 170 1.0 1.0 +ch2 170 180 0.5 0.5 +ch2 180 190 0.0 0.0 +ch2 190 200 0.0 0.0 +ch2 200 210 0.0 0.0 +ch2 210 220 0.0 0.0 +ch2 220 230 0.0 0.0 +ch2 230 240 0.0 0.0 +ch2 240 250 0.0 0.0 +ch2 250 260 0.0 0.0 +ch2 260 270 0.0 0.0 +ch2 270 280 0.0 0.0 +ch2 280 290 0.0 0.0 +ch2 290 300 0.0 0.0 +ch2 300 310 0.0 0.0 +ch2 310 320 0.0 0.0 +ch2 320 330 0.0 0.0 +ch2 330 340 0.0 0.0 +ch2 340 350 0.0 0.0 +ch2 350 360 0.0 0.0 +ch2 360 370 0.0 0.0 +ch2 370 380 0.0 0.0 +ch2 380 390 0.0 0.0 +ch2 390 400 0.0 0.0 +ch3 0 10 0.0 0.0 +ch3 10 20 0.0 0.0 +ch3 20 30 0.0 0.0 +ch3 30 40 0.0 0.0 +ch3 40 50 0.0 0.0 +ch3 50 60 3.0 3.0 +ch3 60 70 3.0 3.0 +ch3 70 80 1.5 1.5 +ch3 80 90 0.0 0.0 +ch3 90 100 0.0 0.0 +ch3 100 110 0.0 0.0 +ch3 110 120 0.0 0.0 +ch3 120 130 0.0 0.0 +ch3 130 140 0.0 0.0 +ch3 140 150 0.0 0.0 +ch3 150 160 1.0 1.0 +ch3 160 170 1.0 1.0 +ch3 170 180 0.5 0.5 +ch3 180 190 0.0 0.0 +ch3 190 200 0.0 0.0 +ch3 200 210 0.0 0.0 +ch3 210 220 0.0 0.0 +ch3 220 230 0.0 0.0 +ch3 230 240 0.0 0.0 +ch3 240 250 0.0 0.0 +ch3 250 260 0.0 0.0 +ch3 260 270 0.0 0.0 +ch3 270 280 0.0 0.0 +ch3 280 290 0.0 0.0 +ch3 290 300 0.0 0.0 +ch3 300 310 0.0 0.0 +ch3 310 320 0.0 0.0 +ch3 320 330 0.0 0.0 +ch3 330 340 0.0 0.0 +ch3 340 350 0.0 0.0 +ch3 350 360 0.0 0.0 +ch3 360 370 0.0 0.0 +ch3 370 380 0.0 0.0 +ch3 380 390 0.0 0.0 +ch3 390 400 0.0 0.0 diff --git a/galaxy/wrapper/test-data/multiBigwigSummary_result2.tabular b/galaxy/wrapper/test-data/multiBigwigSummary_result2.tabular new file mode 100644 index 0000000000..471e3e00c0 --- /dev/null +++ b/galaxy/wrapper/test-data/multiBigwigSummary_result2.tabular @@ -0,0 +1,121 @@ +#'chr' 'start' 'end' 'sample1' 'sample2' +ch1 0 10 0.0 0.0 +ch1 10 20 0.0 0.0 +ch1 20 30 0.0 0.0 +ch1 30 40 0.0 0.0 +ch1 40 50 0.0 0.0 +ch1 50 60 0.0 0.0 +ch1 60 70 0.0 0.0 +ch1 70 80 0.0 0.0 +ch1 80 90 0.0 0.0 +ch1 90 100 0.0 0.0 +ch1 100 110 2.0 2.0 +ch1 110 120 2.0 2.0 +ch1 120 130 1.0 1.0 +ch1 130 140 0.0 0.0 +ch1 140 150 0.0 0.0 +ch1 150 160 0.0 0.0 +ch1 160 170 0.0 0.0 +ch1 170 180 0.0 0.0 +ch1 180 190 0.0 0.0 +ch1 190 200 0.0 0.0 +ch1 200 210 0.0 0.0 +ch1 210 220 0.0 0.0 +ch1 220 230 0.0 0.0 +ch1 230 240 0.0 0.0 +ch1 240 250 0.0 0.0 +ch1 250 260 0.0 0.0 +ch1 260 270 0.0 0.0 +ch1 270 280 0.0 0.0 +ch1 280 290 0.0 0.0 +ch1 290 300 0.0 0.0 +ch1 300 310 0.0 0.0 +ch1 310 320 0.0 0.0 +ch1 320 330 0.0 0.0 +ch1 330 340 0.0 0.0 +ch1 340 350 0.0 0.0 +ch1 350 360 0.0 0.0 +ch1 360 370 0.0 0.0 +ch1 370 380 0.0 0.0 +ch1 380 390 0.0 0.0 +ch1 390 400 0.0 0.0 +ch2 0 10 0.0 0.0 +ch2 10 20 0.0 0.0 +ch2 20 30 0.0 0.0 +ch2 30 40 0.0 0.0 +ch2 40 50 0.0 0.0 +ch2 50 60 3.0 3.0 +ch2 60 70 3.0 3.0 +ch2 70 80 1.5 1.5 +ch2 80 90 0.0 0.0 +ch2 90 100 0.0 0.0 +ch2 100 110 0.0 0.0 +ch2 110 120 0.0 0.0 +ch2 120 130 0.0 0.0 +ch2 130 140 0.0 0.0 +ch2 140 150 0.0 0.0 +ch2 150 160 1.0 1.0 +ch2 160 170 1.0 1.0 +ch2 170 180 0.5 0.5 +ch2 180 190 0.0 0.0 +ch2 190 200 0.0 0.0 +ch2 200 210 0.0 0.0 +ch2 210 220 0.0 0.0 +ch2 220 230 0.0 0.0 +ch2 230 240 0.0 0.0 +ch2 240 250 0.0 0.0 +ch2 250 260 0.0 0.0 +ch2 260 270 0.0 0.0 +ch2 270 280 0.0 0.0 +ch2 280 290 0.0 0.0 +ch2 290 300 0.0 0.0 +ch2 300 310 0.0 0.0 +ch2 310 320 0.0 0.0 +ch2 320 330 0.0 0.0 +ch2 330 340 0.0 0.0 +ch2 340 350 0.0 0.0 +ch2 350 360 0.0 0.0 +ch2 360 370 0.0 0.0 +ch2 370 380 0.0 0.0 +ch2 380 390 0.0 0.0 +ch2 390 400 0.0 0.0 +ch3 0 10 0.0 0.0 +ch3 10 20 0.0 0.0 +ch3 20 30 0.0 0.0 +ch3 30 40 0.0 0.0 +ch3 40 50 0.0 0.0 +ch3 50 60 3.0 3.0 +ch3 60 70 3.0 3.0 +ch3 70 80 1.5 1.5 +ch3 80 90 0.0 0.0 +ch3 90 100 0.0 0.0 +ch3 100 110 0.0 0.0 +ch3 110 120 0.0 0.0 +ch3 120 130 0.0 0.0 +ch3 130 140 0.0 0.0 +ch3 140 150 0.0 0.0 +ch3 150 160 1.0 1.0 +ch3 160 170 1.0 1.0 +ch3 170 180 0.5 0.5 +ch3 180 190 0.0 0.0 +ch3 190 200 0.0 0.0 +ch3 200 210 0.0 0.0 +ch3 210 220 0.0 0.0 +ch3 220 230 0.0 0.0 +ch3 230 240 0.0 0.0 +ch3 240 250 0.0 0.0 +ch3 250 260 0.0 0.0 +ch3 260 270 0.0 0.0 +ch3 270 280 0.0 0.0 +ch3 280 290 0.0 0.0 +ch3 290 300 0.0 0.0 +ch3 300 310 0.0 0.0 +ch3 310 320 0.0 0.0 +ch3 320 330 0.0 0.0 +ch3 330 340 0.0 0.0 +ch3 340 350 0.0 0.0 +ch3 350 360 0.0 0.0 +ch3 360 370 0.0 0.0 +ch3 370 380 0.0 0.0 +ch3 380 390 0.0 0.0 +ch3 390 400 0.0 0.0 From a8d6e5c62553d4cdd5f3863f98dd615aa9ae52ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gr=C3=BCning?= Date: Mon, 28 Feb 2022 19:54:07 +0100 Subject: [PATCH 2/6] add pool --- .azure-pipelines/test-template.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.azure-pipelines/test-template.yml b/.azure-pipelines/test-template.yml index 36e1f4fbb7..a5352d8c39 100644 --- a/.azure-pipelines/test-template.yml +++ b/.azure-pipelines/test-template.yml @@ -1,3 +1,5 @@ +pool: + vmImage: 'ubuntu-latest' steps: - bash: conda create -n foo -q --yes -c conda-forge -c bioconda python=$(python.version) numpy scipy matplotlib==3.1.1 nose flake8 plotly pysam pyBigWig py2bit deeptoolsintervals displayName: Installing dependencies From c96f87e0d0a3f916782b55eeedfb2abf9abde27e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gr=C3=BCning?= Date: Mon, 28 Feb 2022 19:57:01 +0100 Subject: [PATCH 3/6] trigger test --- galaxy/wrapper/multiBigwigSummary.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/galaxy/wrapper/multiBigwigSummary.xml b/galaxy/wrapper/multiBigwigSummary.xml index e4fe7a6631..40e1cb3b63 100644 --- a/galaxy/wrapper/multiBigwigSummary.xml +++ b/galaxy/wrapper/multiBigwigSummary.xml @@ -4,7 +4,7 @@ multiBigwigSummary deepTools_macros.xml - + - From 534c722c63976d00c47bb1acf754b51c63f90b0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gr=C3=BCning?= Date: Mon, 28 Feb 2022 20:08:30 +0100 Subject: [PATCH 4/6] new azure syntax? --- .azure-pipelines/test-template.yml | 44 ++++++++++++++++++------------ 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/.azure-pipelines/test-template.yml b/.azure-pipelines/test-template.yml index a5352d8c39..19ed2dbd66 100644 --- a/.azure-pipelines/test-template.yml +++ b/.azure-pipelines/test-template.yml @@ -1,17 +1,27 @@ -pool: - vmImage: 'ubuntu-latest' -steps: -- bash: conda create -n foo -q --yes -c conda-forge -c bioconda python=$(python.version) numpy scipy matplotlib==3.1.1 nose flake8 plotly pysam pyBigWig py2bit deeptoolsintervals - displayName: Installing dependencies -- bash: | - source activate foo - python -m pip install . --no-deps --ignore-installed -vvv - displayName: Installing deeptools -- bash: | - source activate foo - flake8 . --exclude=.venv,.build,build --ignore=E501,F403,E402,F999,F405,E722,W504,W605 - displayName: flake8 -- bash: | - source activate foo - nosetests --with-doctest -sv deeptools - displayName: Test deepTools +trigger: + branches: + include: + - '*' +pr: + branches: + include: + - '*' +jobs: +- job: install_deeptools_run_tests + pool: + vmImage: 'ubuntu-latest' + steps: + - bash: conda create -n foo -q --yes -c conda-forge -c bioconda python=$(python.version) numpy scipy matplotlib==3.1.1 nose flake8 plotly pysam pyBigWig py2bit deeptoolsintervals + displayName: Installing dependencies + - bash: | + source activate foo + python -m pip install . --no-deps --ignore-installed -vvv + displayName: Installing deeptools + - bash: | + source activate foo + flake8 . --exclude=.venv,.build,build --ignore=E501,F403,E402,F999,F405,E722,W504,W605 + displayName: flake8 + - bash: | + source activate foo + nosetests --with-doctest -sv deeptools + displayName: Test deepTools From 96d0bc99eea480a1ddadba288e174c2af36b44f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gr=C3=BCning?= Date: Mon, 28 Feb 2022 20:16:51 +0100 Subject: [PATCH 5/6] run also on PR --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cb0973ca9d..b8b360d9ff 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,5 +1,5 @@ name: Test -on: [push] +on: [push, pull_request] jobs: build-linux: name: Test on Linux From 6b624bc62ed4183759e791110ad363d6747000aa Mon Sep 17 00:00:00 2001 From: Josh Burdick Date: Tue, 1 Mar 2022 10:59:26 -0500 Subject: [PATCH 6/6] Sketch allowing pairs of stranded files in computeMatrix --- deeptools/computeMatrix.py | 1 + deeptools/heatmapper.py | 12 +-- deeptools/parserCommon.py | 17 ++++ deeptools/pyBigWigStranded.py | 143 ++++++++++++++++++++++++++++++++++ 4 files changed, 168 insertions(+), 5 deletions(-) create mode 100644 deeptools/pyBigWigStranded.py diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 345bbc79a3..8d50bfd764 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -19,6 +19,7 @@ def parse_arguments(args=None): argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=""" +(This version should work for stranded bigWig files.) This tool calculates scores per genome regions and prepares an intermediate file that can be used with ``plotHeatmap`` and ``plotProfiles``. Typically, the genome regions are genes, but any other regions defined in a BED file can be used. diff --git a/deeptools/heatmapper.py b/deeptools/heatmapper.py index 6a95ba0052..8973e35720 100644 --- a/deeptools/heatmapper.py +++ b/deeptools/heatmapper.py @@ -10,6 +10,7 @@ from deeptools.utilities import toString, toBytes, smartLabels from deeptools.heatmapper_utilities import getProfileTicks +from deeptools.pyBigWigStranded import openBigWigGuessingStrandedness old_settings = np.seterr(all='ignore') @@ -364,8 +365,8 @@ def compute_sub_matrix_worker(self, chrom, start, end, score_file_list, paramete # read BAM or scores file score_file_handles = [] for sc_file in score_file_list: - score_file_handles.append(pyBigWig.open(sc_file)) - + score_file_handles.append( + openBigWigGuessingStrandedness(sc_file)) # determine the number of matrix columns based on the lengths # given by the user, times the number of score files matrix_cols = len(score_file_list) * \ @@ -531,7 +532,7 @@ def compute_sub_matrix_worker(self, chrom, start, end, score_file_list, paramete for sc_handler in score_file_handles: # We're only supporting bigWig files at this point cov = heatmapper.coverage_from_big_wig( - sc_handler, feature_chrom, zones, + sc_handler, feature_chrom, feature_strand, zones, parameters['bin size'], parameters['bin avg type'], parameters['missing data as zero'], @@ -652,7 +653,7 @@ def change_chrom_names(chrom): return chrom @staticmethod - def coverage_from_big_wig(bigwig, chrom, zones, binSize, avgType, nansAsZeros=False, verbose=True): + def coverage_from_big_wig(bigwig, chrom, strand, zones, binSize, avgType, nansAsZeros=False, verbose=True): """ uses pyBigWig @@ -716,7 +717,8 @@ def coverage_from_big_wig(bigwig, chrom, zones, binSize, avgType, nansAsZeros=Fa endIdx += end - start if start < end: # This won't be the case if we extend off the front of a chromosome, such as (-100, 0) - values_array[startIdx:endIdx] = bigwig.values(chrom, start, end) + # jtb: also adding in strand + values_array[startIdx:endIdx] = bigwig.values(chrom, start, end, strand) if end < region[1]: startIdx = endIdx endIdx += region[1] - end diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index ef4f4d0748..8eed12b8e1 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -914,3 +914,20 @@ def __call__(self, parser, args, values, option_string=None): raise argparse.ArgumentTypeError(msg) setattr(args, self.dest, values) return RequiredLength + + +def stranded_bigwig_file_pair(): + """Common arguments related to pairs of stranded bigWig files. + """ + parser = argparse.ArgumentParser(add_help=False) + group = parser.add_argument_group('Options for stranded pairs of bigWig files') + + group.add_argument('--strandedBigWigSuffix', + help='If specified, then files ending with the ' + 'first suffix will be treated as the + strand ' + 'of a pair of bigWig files, and the second ' + 'suffix will be used for the - strand.', + default='.fwd.bw,.rev.bw', + type=str, + required=False) + return parser diff --git a/deeptools/pyBigWigStranded.py b/deeptools/pyBigWigStranded.py new file mode 100644 index 0000000000..ee98e54cbd --- /dev/null +++ b/deeptools/pyBigWigStranded.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import re + +import numpy as np +import pyBigWig + +class pyBigWig1: + """Wrapper around pyBigWig to ignore strand. + + This modifies some pyBigWig methods to expect (and ignore) strand + information. This way, callers can get data either from + a pyBigWig object, or a strandedPyBigWig object, without + knowing whether the file is stranded. + + Possibly this module belongs in pyBigWig. + + Also, this module (currently) requires numpy support in pyBigWig. + """ + def __init__(self, bigWigFile): + """Constructor is a wrapper around pyBigWig.pyBigWig. + """ + self.bigWig = pyBigWig.open(bigWigFile) + + def values(self, chrom, start, end, strand='+'): + """Gets values at a region. + + This is like pyBigWig.values(), except that expects the strand argument + (which it ignores). + ??? possibly this should reverse values, if strand is '-' ? + """ + return self.bigWig.values(chrom, start, end) + + def chroms(self, chroms=None): + """This is just forwarded. + """ + if chroms: + return self.bigWig.chroms(chroms) + else: + return self.bigWig.chroms() + + +class strandedPyBigWig: + """Wrapper for accessing pairs of 'stranded' bigWig files. + + This has almost the same API as pyBigWig, except that + most methods include a 'strand' argument. When these are + called, this gets the result from the appropriate stranded file. + + Note that that these methods return the absolute value of the + contents of the bigWig. This is because the - strand file is + sometimes stored negated. It seems simpler to just always + return the absolute value, than to add a flag indicating + whether the - strand file is stored negated. + """ + + def __init__(self, plusStrandFile, + suffixPairs=['fwd.bw,rev.bw', 'plus.bw,minus.bw', 'pl.bw,mn.bw', 'p.bw,m.bw'], + minusStrandFile=None): + """Constructor opens a pair of bigWig files for reading. + + suffixPairs: these define the + and - filename pairing. + minusStrandFile: the minus strand file can also simply be given + explicitly (this overrides the name computed using suffixPairs) + """ + # if - strand file isn't given, attempt to compute it + if not minusStrandFile: + minusStrandFile = getMinusStrandFile(plusStrandFile, suffixPairs) + # if we still don't have a - strand filename, throw an error + if not minusStrandFile: + raise ValueError('couldn\'t compute - strand filename') + # open files + self.plusBigWig = pyBigWig1(plusStrandFile) + self.minusBigWig = pyBigWig1(minusStrandFile) + + def chroms(self, chroms=None): + """Gets chromosomes. + + This just uses the + strand file's chromosomes (without + checking that they agree with the - strand file's). + """ + if chroms: + return self.plusBigWig.chroms(chroms) + else: + return self.plusBigWig.chroms() + + def values(self, chrom, start, end, strand='+'): + """Gets values at a region specified by an object. + + This will get values from plusBigWig or minusBigWig, + depending on the strand. + Returns: absolute value of values at that region. + If strand is '-', these will be in descending coordinate + order; otherwise they'll be in ascending coordinate order. + """ + # get the relevant bigWig object + bigWig = self.minusBigWig if strand == '-' else self.plusBigWig + # forward this call to that object, taking absolute value + return np.abs(bigWig.values(chrom, start, end, strand)) + +def getMinusStrandFile(plusStrandFile, + suffixPairs=['fwd.bw,rev.bw', 'plus.bw,minus.bw', 'pl.bw,mn.bw', 'p.bw,m.bw']): + """Converts the + strand filename to the - strand filename. + + plusStrandFilename: name of the + strand file + suffixPairs: these define the + and - filename pairing. + For each A,B pair, if plusStrandFile ends with A, then minusStrandFile is + assumed to end with B. + Returns: name of the - strand file, or None if the + strand file's + name didn't match any of the known + strand file's suffixes. + """ + # loop through + suffix, until a matching one is found + for suffixPair in suffixPairs: + (plusSuffix, minusSuffix) = suffixPair.split(',') + # try replacing the suffix (at the end of the filename) + minusStrandFile = re.sub(re.escape(plusSuffix) + '$', minusSuffix, + plusStrandFile) + # if this is different, then the replacement happened + if minusStrandFile != plusStrandFile: + return minusStrandFile + # at this point, none of the suffixes matched + return None + +def openBigWigGuessingStrandedness(bigWigFile, + suffixPairs=['fwd.bw,rev.bw', 'plus.bw,minus.bw', 'pl.bw,mn.bw', 'p.bw,m.bw']): + """Opens a bigWig file/files, guessing whether they're stranded. + + bigWigFile: name of the file (or the + strand file) + suffixPairs: these define the and - filename pairing + (as elsehwere) + Returns: an object for reading from a bigWig file, either stranded + (if bigWigFile ends with one of the suffixes), or not + (if it doesn't). + """ + # see if bigWigFile "looks" stranded + minusStrandFile = getMinusStrandFile(bigWigFile, suffixPairs) + if minusStrandFile: + # if so, assume it's a stranded pair + return strandedPyBigWig(bigWigFile, minusStrandFile=minusStrandFile) + else: + # otherwise, assume it's unstranded + return pyBigWig1(bigWigFile, suffixPairs)