66 */
77package org .qcmg .coverage ;
88
9+
910import java .io .File ;
10- import java .util .Arrays ;
11- import java .util .HashMap ;
12- import java .util .HashSet ;
13- import java .util .Iterator ;
11+ import java .io .IOException ;
12+ import java .util .*;
1413import java .util .concurrent .atomic .AtomicLong ;
1514
1615import htsjdk .samtools .SamReader ;
@@ -29,15 +28,20 @@ class CoverageJob implements Job {
2928 private final HashSet <Gff3Record > features ;
3029 private int [] perBaseCoverages ; // Uses 0-based coordinate indexing
3130 private final HashMap <String , HashMap <Integer , AtomicLong >> idToCoverageToBaseCountMap = new HashMap <String , HashMap <Integer , AtomicLong >>();
31+ private final HashMap <String , List <LowReadDepthRegion >> lowReadDepthMap = new HashMap <>();
3232 private final QLogger logger ;
3333 private final QueryExecutor filter ;
3434 private final boolean perFeatureFlag ;
35- private final HashSet <SamReader > fileReaders = new HashSet <SamReader >();
35+
36+ private final HashSet <SamReader > fileReaders = new HashSet <>();
3637 private final Algorithm alg ;
3738 private final ReadsNumberCounter counterIn ;
3839 private final ReadsNumberCounter counterOut ;
3940 private boolean fullyPopulated ;
4041
42+
43+
44+
4145 CoverageJob (final String refName , final int refLength , final HashMap <String , HashSet <Gff3Record >> refToFeaturesMap ,
4246 final HashSet <Pair <File , File >> filePairs , final QueryExecutor filter ,
4347 final boolean perFeatureFlag , final Algorithm algorithm , final ReadsNumberCounter counterIn ,final ReadsNumberCounter counterOut ) throws Exception {
@@ -72,6 +76,11 @@ synchronized public HashMap<String, HashMap<Integer, AtomicLong>> getResults() {
7276 return idToCoverageToBaseCountMap ;
7377 }
7478
79+ @ Override
80+ synchronized public HashMap <String , List <LowReadDepthRegion >> getLowReadDepthResults () {
81+ return lowReadDepthMap ;
82+ }
83+
7584 @ Override
7685 public String toString () {
7786 return refName + " coverage" ;
@@ -86,7 +95,7 @@ synchronized public void run() throws Exception{
8695 logger .info ("performing coverage for: " + refName );
8796 performCoverage ();
8897 logger .info ("assembling results for: " + refName );
89- assembleResults ();
98+ assembleResultsByAlgorithm ();
9099 logger .debug ("assembled results for: " + refName + " are: " + getResults ());
91100 logger .info ("ending job for: " + refName );
92101 } catch (Exception ex ) {
@@ -97,6 +106,7 @@ synchronized public void run() throws Exception{
97106
98107 void constructCoverageMap () {
99108 perBaseCoverages = new int [refLength ]; // All elements default to zero
109+
100110 boolean isArrayFull = true ;
101111 // Initially set all values to -1 for no coverage at that coordinate
102112 Arrays .fill (perBaseCoverages , -1 );
@@ -122,6 +132,14 @@ void constructCoverageMap() {
122132 logger .info ("fully populated: " + isArrayFull );
123133 }
124134
135+ private void assembleResultsByAlgorithm () throws IOException {
136+ if (alg .getCoverageType ().equals (CoverageType .LOW_READDEPTH )) {
137+ assembleLowReadDepthResults ();
138+ } else {
139+ assembleResults ( );
140+ }
141+ }
142+
125143 private void performCoverage () throws Exception {
126144 for (final SamReader fileReader : fileReaders ) {
127145
@@ -180,17 +198,56 @@ private void performCoverage() throws Exception {
180198
181199 private void assembleResults () {
182200 for (Gff3Record feature : features ) {
183- String id = null ;
201+ String id ;
184202 if (perFeatureFlag ) {
185203 id = feature .getRawData ();
186204 } else {
187205 id = feature .getType ();
188206 }
189- HashMap <Integer , AtomicLong > covToBaseCountMap = idToCoverageToBaseCountMap .get (id );
190- if (null == covToBaseCountMap ) {
191- covToBaseCountMap = new HashMap <Integer , AtomicLong >();
192- idToCoverageToBaseCountMap .put (id , covToBaseCountMap );
207+ HashMap <Integer , AtomicLong > covToBaseCountMap = idToCoverageToBaseCountMap .computeIfAbsent (id , k -> new HashMap <>());
208+ for (int pos = feature .getStart (); pos <= feature .getEnd (); pos ++) {
209+ // GFF3 format uses 1-based feature coordinates; avoid problem
210+ // of GFF3 accidentally containing 0 coordinate
211+ if (pos > 0 && (pos - 1 ) < perBaseCoverages .length ) {
212+ // Adjust from 1-based to 0-based indexing
213+ int cov = perBaseCoverages [pos - 1 ];
214+ if (-1 >= cov ) {
215+ throw new IllegalStateException (
216+ "Malformed internal state. -1 coverage values are invalid. Report this bug." );
217+ }
218+ covToBaseCountMap .computeIfAbsent (cov , v -> new AtomicLong ()).incrementAndGet ();
219+ }
220+ }
221+ }
222+ // Attempt to release coverage memory by nullifying
223+ perBaseCoverages = null ;
224+ }
225+
226+ private int addLowReadDepthRegionIfNeeded (int cov , int pos , int coverageLimit , int startPos , HashMap <String , List <LowReadDepthRegion >> lowRDepthMap ) {
227+ if (cov < coverageLimit ) {
228+ if (startPos == -1 ) {
229+ startPos = pos ;
230+ }
231+ } else {
232+ //Already a low read depth position previously, but now is higher coverage, so time
233+ //to create the low read depth region and reset the startPos
234+ if (startPos != -1 ) {
235+ int endPos = pos - 1 ;//the end is the pos - 1
236+ lowRDepthMap .get (refName ).add (new LowReadDepthRegion (refName , startPos , endPos , coverageLimit ));
237+ startPos = -1 ;
193238 }
239+ }
240+ return (startPos );
241+ }
242+
243+ private void assembleLowReadDepthResults () throws IOException {
244+ for (Gff3Record feature : features ) {
245+ //If low read depth flag is being requested, then we need to find regions with <=8 and <=12 coverage
246+ LowReadDepthAlgorithm lowRdepthAlg = (LowReadDepthAlgorithm ) alg ;
247+ lowReadDepthMap .computeIfAbsent (refName , k -> new ArrayList <>());
248+
249+ int lowReadDepthStart = -1 ;
250+
194251 for (int pos = feature .getStart (); pos <= feature .getEnd (); pos ++) {
195252 // GFF3 format uses 1-based feature coordinates; avoid problem
196253 // of GFF3 accidentally containing 0 coordinate
@@ -201,7 +258,15 @@ private void assembleResults() {
201258 throw new IllegalStateException (
202259 "Malformed internal state. -1 coverage values are invalid. Report this bug." );
203260 }
204- covToBaseCountMap .computeIfAbsent (cov , v -> new AtomicLong ()).incrementAndGet ();
261+
262+ lowReadDepthStart = addLowReadDepthRegionIfNeeded (cov ,pos , lowRdepthAlg .getReaddepthCutoff (), lowReadDepthStart , lowReadDepthMap );
263+
264+ //add final low read depth region if we are at the end of the feature
265+ if (pos == feature .getEnd ()) {
266+ if (lowReadDepthStart != -1 ) {
267+ lowReadDepthMap .get (refName ).add (new LowReadDepthRegion (refName , lowReadDepthStart , pos , lowRdepthAlg .getReaddepthCutoff ()));
268+ }
269+ }
205270 }
206271 }
207272 }
0 commit comments