88
99import  static  java .util .Comparator .comparing ;
1010
11+ import  java .io .BufferedReader ;
1112import  java .io .File ;
1213import  java .io .IOException ;
1314import  java .nio .charset .StandardCharsets ;
3940import  org .apache .commons .math3 .util .Pair ;
4041import  org .qcmg .common .log .QLogger ;
4142import  org .qcmg .common .log .QLoggerFactory ;
43+ import  org .qcmg .common .model .PositionRange ;
4244import  org .qcmg .common .string .StringUtils ;
4345import  org .qcmg .common .util .BaseUtils ;
4446import  org .qcmg .common .util .ChrPositionCache ;
@@ -336,11 +338,14 @@ public static float getVAF(int[] counts, String ref) {
336338};
337339	}
338340
339- 	public  static  Pair <SigMeta , TMap <String , TIntByteHashMap >> loadSignatureGenotype (File  file , int  minCoverage , int  minRGCoverage ) throws  IOException  {
340- 		return  loadSignatureGenotype (file , minCoverage , minRGCoverage , HOM_CUTOFF , HET_UPPER_CUTOFF , HET_LOWER_CUTOFF );
341+     public  static  Pair <SigMeta , TMap <String , TIntByteHashMap >> loadSignatureGenotype (File  file , int  minCoverage , int  minRGCoverage ) throws  IOException  {
342+         return  loadSignatureGenotype (file , minCoverage , minRGCoverage , HOM_CUTOFF , HET_UPPER_CUTOFF , HET_LOWER_CUTOFF , null );
343+     }
344+ 	public  static  Pair <SigMeta , TMap <String , TIntByteHashMap >> loadSignatureGenotype (File  file , int  minCoverage , int  minRGCoverage , Map <String , List <PositionRange >> blockedPositions ) throws  IOException  {
345+ 		return  loadSignatureGenotype (file , minCoverage , minRGCoverage , HOM_CUTOFF , HET_UPPER_CUTOFF , HET_LOWER_CUTOFF , blockedPositions );
341346	}
342347
343- 	public  static  Pair <SigMeta , TMap <String , TIntByteHashMap >> loadSignatureGenotype (File  file , int  minCoverage , int  minRGCoverage , float  homCutoff , float  upperHetCutoff , float  lowerHetCutoff ) throws  IOException  {
348+ 	public  static  Pair <SigMeta , TMap <String , TIntByteHashMap >> loadSignatureGenotype (File  file , int  minCoverage , int  minRGCoverage , float  homCutoff , float  upperHetCutoff , float  lowerHetCutoff ,  Map < String ,  List < PositionRange >>  blockedPositions ) throws  IOException  {
344349		if  (null  == file ) {
345350			throw  new  IllegalArgumentException ("Null file object passed to loadSignatureGenotype" );
346351		}
@@ -361,16 +366,55 @@ public static Pair<SigMeta, TMap<String, TIntByteHashMap>> loadSignatureGenotype
361366			}
362367
363368			if  (null  != sm  && sm .isValid ()) {
364- 				getDataFromBespokeLayout (file , minCoverage , minRGCoverage , ratios , rgRatios , rgIds , reader , homCutoff , upperHetCutoff , lowerHetCutoff );
369+ 				getDataFromBespokeLayout (file , minCoverage , minRGCoverage , ratios , rgRatios , rgIds , reader , homCutoff , upperHetCutoff , lowerHetCutoff ,  blockedPositions );
365370			} else  {
366371				rgRatios .put ("all" , loadSignatureRatiosFloatGenotypeNew (file , MINIMUM_COVERAGE , homCutoff , upperHetCutoff , lowerHetCutoff ));
367372			}
368373		}
369374		return  new  Pair <>(sm , rgRatios );
370375	}
376+ 
377+     public  static  void  loadBlockListIntoMap (String  blocklistFile , Map <String , List <PositionRange >> map ) {
378+         try  {
379+             // Use buffered reading with larger buffer for better I/O performance 
380+             try  (BufferedReader  reader  = Files .newBufferedReader (Paths .get (blocklistFile ), StandardCharsets .UTF_8 )) {
381+ 
382+                 String  line ;
383+                 while  ((line  = reader .readLine ()) != null ) {
384+                     // Skip comments and empty lines early 
385+                     if  (line .isEmpty () || line .charAt (0 ) == '#' ) continue ;
386+                     
387+                     // Use indexOf instead of split for better performance 
388+                     int  firstTab  = line .indexOf ('\t' );
389+                     if  (firstTab  == -1 ) continue ;
390+                     
391+                     int  secondTab  = line .indexOf ('\t' , firstTab  + 1 );
392+                     if  (secondTab  == -1 ) continue ;
393+                     
394+                     // Check if there's a third tab (tokens.length >= 3 equivalent) 
395+                     int  thirdTab  = line .indexOf ('\t' , secondTab  + 1 );
396+                     if  (thirdTab  == -1  && secondTab  == line .length () - 1 ) continue ;
397+                     
398+                     try  {
399+                         String  contig  = line .substring (0 , firstTab );
400+                         int  start  = Integer .parseInt (line , firstTab  + 1 , secondTab , 10 );
401+                         int  stop  = Integer .parseInt (line , secondTab  + 1 ,
402+                             thirdTab  == -1  ? line .length () : thirdTab , 10 );
403+ 
404+                         map .computeIfAbsent (contig , v  -> new  ArrayList <>()).add (new  PositionRange (start , stop ));
405+                     } catch  (NumberFormatException  e ) {
406+                         // Skip malformed lines silently or log if needed 
407+                         logger .debug ("Skipping malformed line: "  + line );
408+                     }
409+                 }
410+             }
411+         } catch  (IOException  e ) {
412+             logger .error ("Error reading blocklist file: "  + blocklistFile , e );
413+         }
414+     }
371415
372416	public  static  void  getDataFromBespokeLayout (File  file , int  minCoverage , int  minRGCoverage , TIntByteHashMap  ratios ,
373- 				TMap <String , TIntByteHashMap > rgRatios , Map <String , String > rgIds , StringFileReader  reader , float  homCutoff , float  upperHetCutoff , float  lowerHetCutoff ) {
417+ 				TMap <String , TIntByteHashMap > rgRatios , Map <String , String > rgIds , StringFileReader  reader , float  homCutoff , float  upperHetCutoff , float  lowerHetCutoff ,  Map < String ,  List < PositionRange >>  blockedPositions ) {
374418		int  noOfRGs  = rgIds .size ();
375419		logger .debug ("Number of rgs for  "  + file .getAbsolutePath () + " is "  + noOfRGs );
376420
@@ -386,9 +430,26 @@ public static void getDataFromBespokeLayout(File file, int minCoverage, int minR
386430
387431			String  coverage  = line .substring (line .lastIndexOf (Constants .TAB_STRING ));
388432			String  chrPosString  = line .substring (0 , line .indexOf (Constants .TAB_STRING , line .indexOf (Constants .TAB_STRING ) + 1 ));
389- 			
390- 			
391- 			/* 
433+ 
434+             if  (null  != blockedPositions ) {
435+                 /* 
436+                 get chr and position 
437+                  */ 
438+                 int  tabIndex  = chrPosString .indexOf (Constants .TAB );
439+                 String  chr  = chrPosString .substring (0 , tabIndex );
440+                 List <PositionRange > list  = blockedPositions .get (chr );
441+                 if  (null  != list ) {
442+                     int  pos  = Integer .parseInt (chrPosString , tabIndex  + 1 , chrPosString .length (), 10 );
443+                     boolean  blocked  = list .stream ().anyMatch (r  -> r .containsPosition (pos ));
444+                     if  (blocked ) {
445+                         logger .debug ("Found blocked position for "  + chrPosString );
446+                         continue ;
447+                     }
448+                 }
449+             }
450+ 
451+ 
452+             /* 
392453			 * This should be in the QAF=t:5-0-0-0,rg4:2-0-0-0,rg1:1-0-0-0,rg2:2-0-0-0 format 
393454			 * Need to tease out the pertinent bits 
394455			 */ 
@@ -402,7 +463,6 @@ public static void getDataFromBespokeLayout(File file, int minCoverage, int minR
402463
403464				if  (isCodedGenotypeValid (genotype1 )) {
404465					cachePosition .set (ChrPositionCache .getStringIndex (chrPosString ));
405- 					
406466					ratios .put (cachePosition .get (), genotype1 );
407467					/* 
408468					 * Get rg data if we have more than 1 rg 
0 commit comments