Skip to content

Commit 6992e65

Browse files
authored
Merge pull request #373 from AdamaJava/qcoverage_blacklist
Qcoverage blacklist
2 parents 3f0470f + 85d83b4 commit 6992e65

20 files changed

+869
-82
lines changed

qcoverage/src/org/qcmg/coverage/Algorithm.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
interface Algorithm {
1212
public String getName();
13+
public CoverageType getCoverageType();
1314
public void applyTo(final SAMRecord read, Object coverageCounter);
1415
public void applyTo(final SAMRecord read, Object coverageCounter, boolean fullyPopulated);
1516
// public void applyTo(final SAMRecord read, final int[] perBaseCoverages);

qcoverage/src/org/qcmg/coverage/Configuration.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import org.qcmg.qbamfilter.query.QueryExecutor;
1414

1515
public final class Configuration {
16-
private final boolean perFeatureFlag;
16+
private boolean perFeatureFlag;
1717
private final int numberThreads;
1818
private final String type;
1919
private final String outputFileName;
@@ -47,6 +47,9 @@ public Configuration(final Options options) throws Exception {
4747
} else if (type.equals("physical") || type.equals("phys")) {
4848
coverageType = CoverageType.PHYSICAL;
4949
algorithm = new PhysicalCoverageAlgorithm();
50+
} else if (type.equals("low_readdepth")) {
51+
coverageType = CoverageType.LOW_READDEPTH;
52+
algorithm = new LowReadDepthAlgorithm(options.getLowReadDepthCutoff());
5053
} else {
5154
throw new Exception("Unknown coverage type: '" + type + "'");
5255
}
@@ -63,6 +66,12 @@ public Configuration(final Options options) throws Exception {
6366
inferMissingBaiFileNames();
6467

6568
perFeatureFlag = options.hasPerFeatureOption();
69+
70+
//Set to by feature for low read depth option
71+
if (type.equals("low_readdepth")) {
72+
perFeatureFlag = true;
73+
}
74+
6675
if (options.hasNumberThreadsOption()) {
6776
numberThreads = options.getNumberThreads()[0];
6877
} else {

qcoverage/src/org/qcmg/coverage/Coverage.java

Lines changed: 51 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@
2525
import java.math.BigInteger;
2626
import java.text.DateFormat;
2727
import java.text.SimpleDateFormat;
28-
import java.util.ArrayList;
29-
import java.util.Calendar;
30-
import java.util.List;
28+
import java.util.*;
3129

3230
public final class Coverage {
3331
private final Options options;
@@ -38,7 +36,7 @@ public Coverage(final Options options) throws Exception {
3836
this.options = options;
3937
Configuration invariants = new Configuration(options);
4038
jobQueue = new JobQueue(invariants);
41-
saveCoverageReport();
39+
saveCoverageReport(invariants.getCoverageType());
4240
}
4341

4442
/**
@@ -200,24 +198,57 @@ private static VcfRecord convertCoverageToVCFRecord(org.qcmg.coverage.CoverageRe
200198
return vcf;
201199
}
202200

203-
private void saveCoverageReport() throws Exception {
204-
final QCoverageStats stats = new QCoverageStats();
205-
for (final CoverageReport report : jobQueue.getCoverageReport()) {
206-
stats.getCoverageReport().add(report);
207-
}
208-
209-
if (options.hasVcfFlag() && options.hasPerFeatureOption()) {
210-
writeVCFReport(stats);
211-
}
212-
213-
if (options.hasXmlFlag()) {
214-
writeXMLCoverageReport(stats);
201+
private void saveCoverageReport(CoverageType coverageType) throws Exception {
202+
203+
//save low read depth bed file
204+
if (coverageType.equals(CoverageType.LOW_READDEPTH)) {
205+
206+
String outfile = options.getOutputFileNames()[0];
207+
//rename .bed extension if present to add the mnin coverage value
208+
if (outfile.endsWith(".bed")) {
209+
outfile = outfile.substring(0, outfile.length() - 4);
210+
}
211+
outfile = String.format("%s.low_read_depth.%s.bed", outfile, options.getLowReadDepthCutoff());
212+
213+
final HashMap<String, List<LowReadDepthRegion>> lowReadDepthResultsFinalMap = jobQueue.getLowReadDepthResultsFinalMap();
214+
LinkedHashSet<String> refNamesOrdered = jobQueue.getRefNamesOrdered();
215+
216+
try (final BufferedWriter out = new BufferedWriter(new FileWriter(outfile))) {
217+
for (String refName : refNamesOrdered) {
218+
List<LowReadDepthRegion> lowReadDepthValues = lowReadDepthResultsFinalMap.get(refName);
219+
if (lowReadDepthValues == null) {
220+
continue;
221+
}
222+
//start and end so that bed is in correct order
223+
lowReadDepthValues.sort(new LowReadDepthRegionComparator(refNamesOrdered));
224+
for (final LowReadDepthRegion region : lowReadDepthValues) {
225+
out.write(region.toBedString() + StringUtils.RETURN);
226+
}
227+
}
228+
}
229+
} else {
230+
//save coverage report
231+
final QCoverageStats stats = new QCoverageStats();
232+
for (final CoverageReport report : jobQueue.getCoverageReport()) {
233+
stats.getCoverageReport().add(report);
234+
}
235+
if (options.hasVcfFlag() && options.hasPerFeatureOption()) {
236+
writeVCFReport(stats);
237+
}
238+
239+
if (options.hasXmlFlag()) {
240+
writeXMLCoverageReport(stats);
241+
}
242+
243+
if (options.hasTxtFlag()) {
244+
if( options.hasPerFeatureOption()) writePerFeatureTabDelimitedCoverageReport(stats);
245+
else writePerTypeTabDelimitedCoverageReport(stats);
246+
}
215247
}
216248

217-
if (options.hasTxtFlag()) {
218-
if( options.hasPerFeatureOption()) writePerFeatureTabDelimitedCoverageReport(stats);
219-
else writePerTypeTabDelimitedCoverageReport(stats);
220-
}
249+
250+
251+
221252

222253
}
223254

qcoverage/src/org/qcmg/coverage/CoverageJob.java

Lines changed: 77 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,10 @@
66
*/
77
package org.qcmg.coverage;
88

9+
910
import java.io.File;
10-
import java.util.Arrays;
11-
import java.util.HashMap;
12-
import java.util.HashSet;
13-
import java.util.Iterator;
11+
import java.io.IOException;
12+
import java.util.*;
1413
import java.util.concurrent.atomic.AtomicLong;
1514

1615
import htsjdk.samtools.SamReader;
@@ -29,15 +28,20 @@ class CoverageJob implements Job {
2928
private final HashSet<Gff3Record> features;
3029
private int[] perBaseCoverages; // Uses 0-based coordinate indexing
3130
private final HashMap<String, HashMap<Integer, AtomicLong>> idToCoverageToBaseCountMap = new HashMap<String, HashMap<Integer, AtomicLong>>();
31+
private final HashMap<String, List<LowReadDepthRegion>> lowReadDepthMap = new HashMap<>();
3232
private final QLogger logger;
3333
private final QueryExecutor filter;
3434
private final boolean perFeatureFlag;
35-
private final HashSet<SamReader> fileReaders = new HashSet<SamReader>();
35+
36+
private final HashSet<SamReader> fileReaders = new HashSet<>();
3637
private final Algorithm alg;
3738
private final ReadsNumberCounter counterIn;
3839
private final ReadsNumberCounter counterOut;
3940
private boolean fullyPopulated;
4041

42+
43+
44+
4145
CoverageJob(final String refName, final int refLength, final HashMap<String, HashSet<Gff3Record>> refToFeaturesMap,
4246
final HashSet<Pair<File, File>> filePairs, final QueryExecutor filter,
4347
final boolean perFeatureFlag, final Algorithm algorithm, final ReadsNumberCounter counterIn,final ReadsNumberCounter counterOut) throws Exception {
@@ -72,6 +76,11 @@ synchronized public HashMap<String, HashMap<Integer, AtomicLong>> getResults() {
7276
return idToCoverageToBaseCountMap;
7377
}
7478

79+
@Override
80+
synchronized public HashMap<String, List<LowReadDepthRegion>> getLowReadDepthResults() {
81+
return lowReadDepthMap;
82+
}
83+
7584
@Override
7685
public String toString() {
7786
return refName + " coverage";
@@ -86,7 +95,7 @@ synchronized public void run() throws Exception{
8695
logger.info("performing coverage for: " + refName);
8796
performCoverage();
8897
logger.info("assembling results for: " + refName);
89-
assembleResults();
98+
assembleResultsByAlgorithm();
9099
logger.debug("assembled results for: " + refName + " are: " + getResults());
91100
logger.info("ending job for: " + refName);
92101
} catch (Exception ex) {
@@ -97,6 +106,7 @@ synchronized public void run() throws Exception{
97106

98107
void constructCoverageMap() {
99108
perBaseCoverages = new int[refLength]; // All elements default to zero
109+
100110
boolean isArrayFull = true;
101111
// Initially set all values to -1 for no coverage at that coordinate
102112
Arrays.fill(perBaseCoverages, -1);
@@ -122,6 +132,14 @@ void constructCoverageMap() {
122132
logger.info("fully populated: " + isArrayFull);
123133
}
124134

135+
private void assembleResultsByAlgorithm() throws IOException {
136+
if (alg.getCoverageType().equals(CoverageType.LOW_READDEPTH)) {
137+
assembleLowReadDepthResults();
138+
} else {
139+
assembleResults( );
140+
}
141+
}
142+
125143
private void performCoverage() throws Exception {
126144
for (final SamReader fileReader : fileReaders) {
127145

@@ -180,17 +198,56 @@ private void performCoverage() throws Exception {
180198

181199
private void assembleResults() {
182200
for (Gff3Record feature : features) {
183-
String id = null;
201+
String id;
184202
if (perFeatureFlag) {
185203
id = feature.getRawData();
186204
} else {
187205
id = feature.getType();
188206
}
189-
HashMap<Integer, AtomicLong> covToBaseCountMap = idToCoverageToBaseCountMap.get(id);
190-
if (null == covToBaseCountMap) {
191-
covToBaseCountMap = new HashMap<Integer, AtomicLong>();
192-
idToCoverageToBaseCountMap.put(id, covToBaseCountMap);
207+
HashMap<Integer, AtomicLong> covToBaseCountMap = idToCoverageToBaseCountMap.computeIfAbsent(id, k -> new HashMap<>());
208+
for (int pos = feature.getStart(); pos <= feature.getEnd(); pos++) {
209+
// GFF3 format uses 1-based feature coordinates; avoid problem
210+
// of GFF3 accidentally containing 0 coordinate
211+
if (pos > 0 && (pos - 1) < perBaseCoverages.length) {
212+
// Adjust from 1-based to 0-based indexing
213+
int cov = perBaseCoverages[pos - 1];
214+
if (-1 >= cov) {
215+
throw new IllegalStateException(
216+
"Malformed internal state. -1 coverage values are invalid. Report this bug.");
217+
}
218+
covToBaseCountMap.computeIfAbsent(cov, v -> new AtomicLong()).incrementAndGet();
219+
}
220+
}
221+
}
222+
// Attempt to release coverage memory by nullifying
223+
perBaseCoverages = null;
224+
}
225+
226+
private int addLowReadDepthRegionIfNeeded(int cov, int pos, int coverageLimit, int startPos, HashMap<String, List<LowReadDepthRegion>> lowRDepthMap) {
227+
if (cov < coverageLimit) {
228+
if (startPos == -1) {
229+
startPos = pos;
230+
}
231+
} else {
232+
//Already a low read depth position previously, but now is higher coverage, so time
233+
//to create the low read depth region and reset the startPos
234+
if (startPos != -1) {
235+
int endPos = pos - 1;//the end is the pos - 1
236+
lowRDepthMap.get(refName).add(new LowReadDepthRegion(refName, startPos, endPos, coverageLimit));
237+
startPos = -1;
193238
}
239+
}
240+
return(startPos);
241+
}
242+
243+
private void assembleLowReadDepthResults() throws IOException {
244+
for (Gff3Record feature : features) {
245+
//If low read depth flag is being requested, then we need to find regions with <=8 and <=12 coverage
246+
LowReadDepthAlgorithm lowRdepthAlg = (LowReadDepthAlgorithm) alg;
247+
lowReadDepthMap.computeIfAbsent(refName, k -> new ArrayList<>());
248+
249+
int lowReadDepthStart = -1;
250+
194251
for (int pos = feature.getStart(); pos <= feature.getEnd(); pos++) {
195252
// GFF3 format uses 1-based feature coordinates; avoid problem
196253
// of GFF3 accidentally containing 0 coordinate
@@ -201,7 +258,15 @@ private void assembleResults() {
201258
throw new IllegalStateException(
202259
"Malformed internal state. -1 coverage values are invalid. Report this bug.");
203260
}
204-
covToBaseCountMap.computeIfAbsent(cov, v -> new AtomicLong()).incrementAndGet();
261+
262+
lowReadDepthStart = addLowReadDepthRegionIfNeeded(cov,pos, lowRdepthAlg.getReaddepthCutoff(), lowReadDepthStart, lowReadDepthMap);
263+
264+
//add final low read depth region if we are at the end of the feature
265+
if (pos == feature.getEnd()) {
266+
if (lowReadDepthStart != -1) {
267+
lowReadDepthMap.get(refName).add(new LowReadDepthRegion(refName, lowReadDepthStart, pos, lowRdepthAlg.getReaddepthCutoff()));
268+
}
269+
}
205270
}
206271
}
207272
}

qcoverage/src/org/qcmg/coverage/CoverageType.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
public enum CoverageType {
77

88
SEQUENCE("sequence"),
9-
PHYSICAL("physical");
9+
PHYSICAL("physical"),
10+
LOW_READDEPTH("low_readdepth");
11+
12+
1013
private final String value;
1114

1215
CoverageType(String v) {

qcoverage/src/org/qcmg/coverage/Job.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
package org.qcmg.coverage;
55

66
import java.util.HashMap;
7+
import java.util.List;
78
import java.util.concurrent.atomic.AtomicLong;
89

910
interface Job {
1011
public HashMap<String, HashMap<Integer, AtomicLong>> getResults();
12+
public HashMap<String, List<LowReadDepthRegion>> getLowReadDepthResults();
1113
public void run() throws Exception;
1214
public String toString();
1315
}

0 commit comments

Comments
 (0)