diff --git a/qannotate/test/au/edu/qimr/qannotate/modes/ConfidenceModeTest.java b/qannotate/test/au/edu/qimr/qannotate/modes/ConfidenceModeTest.java index cf7c0db4b..2a57137f4 100644 --- a/qannotate/test/au/edu/qimr/qannotate/modes/ConfidenceModeTest.java +++ b/qannotate/test/au/edu/qimr/qannotate/modes/ConfidenceModeTest.java @@ -294,6 +294,10 @@ public void checkMIUN() { ConfidenceMode.checkMIUN(new String[]{"A"}, 90, "A3;C8", sb, 2, 3, null); assertEquals("", sb.toString()); + sb = new StringBuilder(); + ConfidenceMode.checkMIUN(new String[]{"AA"}, 90, "AA13;AC8", sb, 2, 3, null); + assertEquals("MIUN", sb.toString()); + // } @@ -811,6 +815,28 @@ public void realLifeMIUN2() { assertEquals("PASS", vcf.getSampleFormatRecord(2).getField(VcfHeaderUtils.FORMAT_FILTER)); assertEquals("PASS", vcf.getSampleFormatRecord(3).getField(VcfHeaderUtils.FORMAT_FILTER)); assertEquals("PASS", vcf.getSampleFormatRecord(4).getField(VcfHeaderUtils.FORMAT_FILTER)); + + } + + @Test + public void compoundSnpMIUN() { + /* + chr1 205931 . AA GG . . . GT:AD:DP:FF:FT:INF:NNS:OABS 0/0:6,0:6:AA47;AT1;A_1;GG3:.:.:.:AA3[]3[] 0/1:20,4:25:AA182;GG2;TA1:.:SOMATIC:4:AA +7[]13[];GA1[]0[];GG0[]4[] + */ + VcfRecord vcf = VcfUtils.createVcfRecord(ChrPositionUtils.getChrPosition("chr1", 205931, 205931), ".", "CA", "GG"); + vcf.setInfo("FLANK=GTAAAACTGGA;BaseQRankSum=0.325;ClippingRankSum=0.000;DP=58;ExcessHet=3.0103;FS=4.683;MQ=55.10;MQRankSum=-6.669;QD=4.63;ReadPosRankSum=-0.352;SOR=1.425;IN=1;DB;VLD;HOM=3,TATATGTAAAgCTGGATTAAT;EFF=downstream_gene_variant(MODIFIER||914|||MST1P2|unprocessed_pseudogene|NON_CODING|ENST00000457982||1),intergenic_region(MODIFIER||||||||||1)"); + vcf.setFormatFields(java.util.Arrays.asList( + "GT:AD:DP:FF:FT:INF:NNS:OABS", + "0/0:36,0:36:CA12;GG4;_A2:.:.:.:CA17[]19[];C_1[]0[]", + "0/1:102,11:114:AA1;CA30;CC1;CT1;C_3;GG50;GT1:.:SOMATIC:10:AA1[]0[];CA61[]41[];GG4[]7[];G_1[]0[];_A0[]2[]")); + ConfidenceMode cm = new ConfidenceMode(TWO_SAMPLE_ONE_CALLER_META); + cm.positionRecordMap.put(vcf.getChrPosition(), List.of(vcf)); + cm.addAnnotation(); + vcf = cm.positionRecordMap.get(vcf.getChrPosition()).getFirst(); + assertEquals("MIUN", vcf.getSampleFormatRecord(1).getField(VcfHeaderUtils.FORMAT_FILTER)); + assertEquals("PASS", vcf.getSampleFormatRecord(2).getField(VcfHeaderUtils.FORMAT_FILTER)); + } @Test @@ -1345,6 +1371,30 @@ public void confidenceRealLifeMerged9() { assertEquals("PASS", vcf.getSampleFormatRecord(4).getField(VcfHeaderUtils.FORMAT_FILTER)); } + @Test + public void realLifeCSMIUN() { + /* + chr1 11445731 rs386628485 AG GC . . IN=1;DB;HOM=0,ACAGAGAGACagAGAGTCAGAG GT:AD:DP:FF:FT:INF:NNS:OABS 0/0:18,1:20:AC1;AG6;AGC1;AT1;A_1;CC1;GC7:PASS:.:.:AG7[]11[];GC0[]1[];GG0 +[]1[];_C1[]0[] 0/1:32,4:36:AA1;AG11;A_1;GA1;GC18;G_1:MR:SOMATIC:4:AG21[]11[];A_1[]0[];GC2[]2[];_C1[]0[] ./.:.:.:.:COV:.:.:. ./.:.:.:.:COV:.:.:. + */ + VcfRecord vcf = new VcfRecord(new String[]{"chr1", "11445731", "rs386628485", "AG", "GC", ".", ".", "IN=1;DB;HOM=0,ACAGAGAGACagAGAGTCAGAG" + , "GT:AD:DP:FF:FT:INF:NNS:OABS" + , "0/0:18,1:20:AC1;AG6;AGC1;AT1;A_1;CC1;GC7:.:.:.:AG7[]11[];GC0[]1[];GG0[]1[];_C1[]0[]" + , "0/1:32,4:36:AA1;AG11;A_1;GA1;GC18;G_1:.:SOMATIC:4:AG21[]11[];A_1[]0[];GC2[]2[];_C1[]0[]" + , "./.:.:.:.:.:.:.:." + , "./.:.:.:.:.:.:.:."}); + ConfidenceMode cm = new ConfidenceMode(TWO_SAMPLE_TWO_CALLER_META); + cm.positionRecordMap.put(vcf.getChrPosition(), List.of(vcf)); + cm.addAnnotation(); + + vcf = cm.positionRecordMap.get(vcf.getChrPosition()).getFirst(); + assertEquals("MIUN", vcf.getSampleFormatRecord(1).getField(VcfHeaderUtils.FORMAT_FILTER)); + assertEquals("MR", vcf.getSampleFormatRecord(2).getField(VcfHeaderUtils.FORMAT_FILTER)); + assertEquals("COV", vcf.getSampleFormatRecord(3).getField(VcfHeaderUtils.FORMAT_FILTER)); + assertEquals("COV", vcf.getSampleFormatRecord(4).getField(VcfHeaderUtils.FORMAT_FILTER)); + + } + @Test public void applyMRFilter() { assertFalse(ConfidenceMode.applyMutantReadFilter(null, null, -1)); diff --git a/qcommon/src/org/qcmg/common/model/Accumulator.java b/qcommon/src/org/qcmg/common/model/Accumulator.java index b5b1d66de..8ad6ebc9d 100644 --- a/qcommon/src/org/qcmg/common/model/Accumulator.java +++ b/qcommon/src/org/qcmg/common/model/Accumulator.java @@ -43,10 +43,10 @@ public class Accumulator { private final int position; - private short failedFilterACount = 0; - private short failedFilterCCount = 0; - private short failedFilterGCount = 0; - private short failedFilterTCount = 0; + private TLongList failedFilterACount; + private TLongList failedFilterCCount; + private TLongList failedFilterGCount; + private TLongList failedFilterTCount; private TLongList readNameHashStrandBasePositionQualities; @@ -58,19 +58,23 @@ public int getPosition() { return position; } - public void addFailedFilterBase(final byte base) { + public void addFailedFilterBase(final byte base, long readNameHash) { switch (base) { case A_BYTE: - failedFilterACount++; + if (null == failedFilterACount) failedFilterACount = new TLongArrayList(); + failedFilterACount.add(readNameHash); break; case C_BYTE: - failedFilterCCount++; + if (null == failedFilterCCount) failedFilterCCount = new TLongArrayList(); + failedFilterCCount.add(readNameHash); break; case G_BYTE: - failedFilterGCount++; + if (null == failedFilterGCount) failedFilterGCount = new TLongArrayList(); + failedFilterGCount.add(readNameHash); break; case T_BYTE: - failedFilterTCount++; + if (null == failedFilterTCount) failedFilterTCount = new TLongArrayList(); + failedFilterTCount.add(readNameHash); break; default: /* do nothing */ break; @@ -120,17 +124,17 @@ public String toString() { public String getFailedFilterPileup() { StringBuilder sb = new StringBuilder(); - if (failedFilterACount > 0) { - StringUtils.updateStringBuilder(sb, A_STRING + failedFilterACount, Constants.SEMI_COLON); + if (null != failedFilterACount && ! failedFilterACount.isEmpty()) { + StringUtils.updateStringBuilder(sb, A_STRING + failedFilterACount.size(), Constants.SEMI_COLON); } - if (failedFilterCCount > 0) { - StringUtils.updateStringBuilder(sb, C_STRING + failedFilterCCount, Constants.SEMI_COLON); + if (null != failedFilterCCount && ! failedFilterCCount.isEmpty()) { + StringUtils.updateStringBuilder(sb, C_STRING + failedFilterCCount.size(), Constants.SEMI_COLON); } - if (failedFilterGCount > 0) { - StringUtils.updateStringBuilder(sb, G_STRING + failedFilterGCount, Constants.SEMI_COLON); + if (null != failedFilterGCount && ! failedFilterGCount.isEmpty()) { + StringUtils.updateStringBuilder(sb, G_STRING + failedFilterGCount.size(), Constants.SEMI_COLON); } - if (failedFilterTCount > 0) { - StringUtils.updateStringBuilder(sb, T_STRING + failedFilterTCount, Constants.SEMI_COLON); + if (null != failedFilterTCount && ! failedFilterTCount.isEmpty()) { + StringUtils.updateStringBuilder(sb, T_STRING + failedFilterTCount.size(), Constants.SEMI_COLON); } return !sb.isEmpty() ? sb.toString() : Constants.MISSING_DATA_STRING; } @@ -140,4 +144,23 @@ public int getCoverage() { return null == readNameHashStrandBasePositionQualities ? 0 : readNameHashStrandBasePositionQualities.size() / 2; } + public boolean isEmpty() { + return null == readNameHashStrandBasePositionQualities && null == failedFilterACount && null == failedFilterCCount && null == failedFilterGCount && null == failedFilterTCount; + } + + public TLongList getFailedFilterACount() { + return failedFilterACount; + } + + public TLongList getFailedFilterCCount() { + return failedFilterCCount; + } + + public TLongList getFailedFilterGCount() { + return failedFilterGCount; + } + + public TLongList getFailedFilterTCount() { + return failedFilterTCount; + } } diff --git a/qcommon/src/org/qcmg/common/util/AccumulatorUtils.java b/qcommon/src/org/qcmg/common/util/AccumulatorUtils.java index 44d357230..79e0653b2 100644 --- a/qcommon/src/org/qcmg/common/util/AccumulatorUtils.java +++ b/qcommon/src/org/qcmg/common/util/AccumulatorUtils.java @@ -60,7 +60,6 @@ public class AccumulatorUtils { public static final long T_BASE_BIT = 0x400000000000000L; public static final int T_BASE_BIT_POSITION = 58; - public static final long STRAND_BIT = 0x8000000000000000L; public static final int STRAND_BIT_POSITION = 63; public static final long END_OF_READ_BIT = 0x4000000000000000L; @@ -69,7 +68,7 @@ public class AccumulatorUtils { /** * This removes reads that have the same read name hash from the accumulator. - * If + *

* If the duplicates have the same base, then 1 is left, if they have different bases, they are both (all?) removed *

* This method updates the Accumulator object that is passed in, and is therefore not side-effect free @@ -236,6 +235,7 @@ public static int[] getBaseCountByStrand(int[] array, char c) { * strand (bit 63) * end of read (bit 62) * base (bits 58-61) + * passedFilter (bit 57) * quality (bits 32-40) * position (bits 0-31) * @@ -1014,8 +1014,9 @@ public static TLongIntMap getReadNameHashStartPositionMap(Accumulator acc) { if (null != acc) { TLongList list = acc.getData(); if (null != list) { - TLongIntMap map = new TLongIntHashMap(list.size() * 2); - for (int i = 0, len = list.size(); i < len; i += 2) { + int len = list.size(); + TLongIntMap map = new TLongIntHashMap(len); + for (int i = 0; i < len; i += 2) { int startPosition = (int) list.get(i + 1); if (((list.get(i + 1) >>> STRAND_BIT_POSITION) & 1) == 0) { @@ -1038,8 +1039,9 @@ public static TLongCharMap getReadNameHashBaseMap(Accumulator acc) { if (null != acc) { TLongList list = acc.getData(); if (null != list) { - TLongCharMap map = new TLongCharHashMap(list.size() * 2); - for (int i = 0, len = list.size(); i < len; i += 2) { + int len = list.size(); + TLongCharMap map = new TLongCharHashMap(len); + for (int i = 0; i < len; i += 2) { char base = getBaseAsCharFromLong(list.get(i + 1)); if (((list.get(i + 1) >>> STRAND_BIT_POSITION) & 1) == 0) { diff --git a/qcommon/test/org/qcmg/common/model/AccumulatorTest.java b/qcommon/test/org/qcmg/common/model/AccumulatorTest.java index 3994106a6..31e8c0e89 100644 --- a/qcommon/test/org/qcmg/common/model/AccumulatorTest.java +++ b/qcommon/test/org/qcmg/common/model/AccumulatorTest.java @@ -81,11 +81,12 @@ public void endOfReads() { public void testUnfilteredPileup() { Accumulator acc = new Accumulator(1); String basesString = "ACGT"; - for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b); + long readNameHash = 1; + for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b, readNameHash); assertEquals("A1;C1;G1;T1", acc.getFailedFilterPileup()); - for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b); + for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b, readNameHash + 1); assertEquals("A2;C2;G2;T2", acc.getFailedFilterPileup()); - for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b); + for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b, readNameHash + 2); assertEquals("A3;C3;G3;T3", acc.getFailedFilterPileup()); } @@ -97,8 +98,9 @@ public void testUnfilteredPileupPercentage() { } String basesString = "GG"; + long readNameHash = 1; for (byte b : basesString.getBytes()) { - acc.addFailedFilterBase(b); + acc.addFailedFilterBase(b, readNameHash++); } assertEquals("G2", acc.getFailedFilterPileup()); /* @@ -106,7 +108,7 @@ public void testUnfilteredPileupPercentage() { */ basesString = "G"; for (byte b : basesString.getBytes()) { - acc.addFailedFilterBase(b); + acc.addFailedFilterBase(b, readNameHash++); } assertEquals("G3", acc.getFailedFilterPileup()); } @@ -115,31 +117,32 @@ public void testUnfilteredPileupPercentage() { @Test public void singleUnfilteredPileup() { Accumulator acc = new Accumulator(1); - for (byte b : "ACGT".getBytes()) acc.addFailedFilterBase(b); + long readNameHash = 1; + for (byte b : "ACGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++); assertEquals("A1;C1;G1;T1", acc.getFailedFilterPileup()); acc = new Accumulator(1); - for (byte b : "ACGTA".getBytes()) acc.addFailedFilterBase(b); + for (byte b : "ACGTA".getBytes()) acc.addFailedFilterBase(b, readNameHash++); assertEquals("A2;C1;G1;T1", acc.getFailedFilterPileup()); acc = new Accumulator(1); - for (byte b : "ACCGT".getBytes()) acc.addFailedFilterBase(b); + for (byte b : "ACCGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++); assertEquals("A1;C2;G1;T1", acc.getFailedFilterPileup()); acc = new Accumulator(1); - for (byte b : "ATTTGT".getBytes()) acc.addFailedFilterBase(b); + for (byte b : "ATTTGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++); assertEquals("A1;G1;T4", acc.getFailedFilterPileup()); acc = new Accumulator(1); - for (byte b : "AAAATTTGT".getBytes()) acc.addFailedFilterBase(b); + for (byte b : "AAAATTTGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++); assertEquals("A4;G1;T4", acc.getFailedFilterPileup()); acc = new Accumulator(1); - for (byte b : "AAAACTTTCGT".getBytes()) acc.addFailedFilterBase(b); + for (byte b : "AAAACTTTCGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++); assertEquals("A4;C2;G1;T4", acc.getFailedFilterPileup()); acc = new Accumulator(1); - for (byte b : "AAAACTTTCGTG".getBytes()) acc.addFailedFilterBase(b); + for (byte b : "AAAACTTTCGTG".getBytes()) acc.addFailedFilterBase(b, readNameHash++); assertEquals("A4;C2;G2;T4", acc.getFailedFilterPileup()); } @@ -217,7 +220,7 @@ public void getGenotypeRealLife() { Accumulator acc = new Accumulator(1); for (int i = 1; i <= 60; i++) acc.addBase((byte) 'G', (byte) 40, false, 1, 1, 2, i); for (int i = 1; i <= 5; i++) acc.addBase((byte) 'C', (byte) 42, false, 1, 1, 2, i + 61); - for (int i = 1; i <= 1; i++) acc.addBase((byte) 'C', (byte) 42, true, 1, 1, 2, i + 67); + for (int i = 1; i == 1; i++) acc.addBase((byte) 'C', (byte) 42, true, 1, 1, 2, i + 67); assertEquals("C1[42]5[42];G0[0]60[40]", AccumulatorUtils.getOABS(acc)); /* diff --git a/qsnp/src/org/qcmg/snp/Pipeline.java b/qsnp/src/org/qcmg/snp/Pipeline.java index 83ccb9f8e..86179232e 100644 --- a/qsnp/src/org/qcmg/snp/Pipeline.java +++ b/qsnp/src/org/qcmg/snp/Pipeline.java @@ -28,6 +28,7 @@ import java.io.File; import java.io.IOException; +import java.nio.ByteBuffer; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -35,7 +36,6 @@ import java.util.Calendar; import java.util.Collections; import java.util.HashSet; -import java.util.Iterator; import java.util.List; import java.util.Optional; import java.util.Queue; @@ -133,9 +133,7 @@ public abstract class Pipeline { int[] testStartPositions; int noOfControlFiles; int noOfTestFiles; - boolean includeIndels; - int mutationId; - + List controlRules = new ArrayList<>(4); List testRules = new ArrayList<>(4); @@ -354,13 +352,13 @@ void writeVCF(String outputFileName) throws Exception { try (FastaSequenceFile fsf = new FastaSequenceFile(new File(referenceFile), true)) { if (null != fsf.getSequenceDictionary()) { refFileContigs = fsf.getSequenceDictionary().getSequences() - .stream().map(ssr -> ssr.getSequenceName()).collect(Collectors.toList()); + .stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.toList()); } } } snps.sort(refFileContigs.isEmpty() ? null : ChrPositionComparator.getVcfRecordComparator(refFileContigs)); - try (RecordWriter writer = new RecordWriter<>(new File(outputFileName));) { + try (RecordWriter writer = new RecordWriter<>(new File(outputFileName))) { final VcfHeader header = getHeaderForQSnp(patientId, controlSampleId, testSampleId, "qSNP v" + Main.version, normalBamIds, tumourBamIds, qexec.getUuid().getValue()); VcfHeaderUtils.addQPGLineToHeader(header, qexec.getToolName().getValue(), qexec.getToolVersion().getValue(), qexec.getCommandLine().getValue() + (StringUtils.isNullOrEmpty(runMode) ? "" : " [runMode: " + runMode + "]")); @@ -578,7 +576,7 @@ static boolean isPileupRecordAKeeper(int variantCount, int coverage, Rule rule, static boolean isVariantOnBothStrands(List baseCounts) { final PileupElement pe = PileupElementUtil.getLargestVariant(baseCounts); - return null == pe ? false : pe.isFoundOnBothStrands(); + return null != pe && pe.isFoundOnBothStrands(); } /** @@ -653,19 +651,16 @@ void walkBams(boolean includeDups) throws Exception { Accumulator [] controlAccs = new Accumulator[1024 * 1024 * 256]; Accumulator [] testAccs = new Accumulator[1024 * 1024 * 256]; - final CyclicBarrier barrier = new CyclicBarrier(noOfThreads, new Runnable() { - @Override - public void run() { - // reset the minStartPositions values to zero - controlMinStart.set(0); - testMinStart.set(0); - - // update the reference bases array - loadNextReferenceSequence(); - - logger.info("barrier has been reached by all threads - moving onto next chromosome"); - } - }); + final CyclicBarrier barrier = new CyclicBarrier(noOfThreads, () -> { + // reset the minStartPositions values to zero + controlMinStart.set(0); + testMinStart.set(0); + + // update the reference bases array + loadNextReferenceSequence(); + + logger.info("barrier has been reached by all threads - moving onto next chromosome"); + }); final ExecutorService service = Executors.newFixedThreadPool(noOfThreads); final CountDownLatch consumerLatch = new CountDownLatch(consumerLatchSize); final CountDownLatch controlProducerLatch = new CountDownLatch(1); @@ -737,7 +732,7 @@ public class Producer implements Runnable { private final CyclicBarrier barrier; private final boolean includeDups; private final boolean runqBamFilter; - private Accumulator [] accum; + private final Accumulator [] accum; private XXHash64 xxhash64; private final static int seed = 0x9747b28c; // used to initialize the hash value, use whatever value you want, but always the same private final static int ONE_MILLION = 1_000_000; @@ -746,7 +741,7 @@ public Producer(final String[] bamFiles, final CountDownLatch latch, final boole final Queue samQueue, final Thread mainThread, final String query, final CyclicBarrier barrier, boolean includeDups, Accumulator [] accum) throws Exception { this.latch = latch; - final Set bams = new HashSet(); + final Set bams = new HashSet<>(); for (final String bamFile : bamFiles) { bams.add(new File(bamFile)); } @@ -776,7 +771,7 @@ public void run() { try { boolean keepRunning = true; - + while (keepRunning) { @@ -796,13 +791,13 @@ public void run() { while (iter.hasNext()) { final SAMRecord record = iter.next(); - + if (++ counter > ONE_MILLION) { higherOrderCounter++; counter = 0; int qSize = queue.size(); logger.info("hit " + higherOrderCounter + "M sam records, passed filter: " + passedFilterCount + ", qsize: " + qSize); - if (passedFilterCount == 0 && (counter + (ONE_MILLION * higherOrderCounter)) >= noOfRecordsFailingFilter) { + if (passedFilterCount == 0 && (counter + ((long) ONE_MILLION * higherOrderCounter)) >= noOfRecordsFailingFilter) { throw new SnpException("INVALID_FILTER", ""+ (counter + (ONE_MILLION * higherOrderCounter))); } while (qSize > 10000) { @@ -894,10 +889,23 @@ private void addRecordToQueue(final SAMRecord record, final boolean passesFilte passedFilterCount++; } record.getCigar(); // cache cigar for all records - record.getAlignmentEnd(); // cache alignment end for all records - - final SAMRecordFilterWrapper wrapper = new SAMRecordFilterWrapper(record, xxhash64.hash(record.getReadName().getBytes(), 0, record.getReadNameLength(), seed)); + int end = record.getAlignmentEnd(); // cache alignment end for all records + int start = record.getAlignmentStart(); + final SAMRecordFilterWrapper wrapper = new SAMRecordFilterWrapper(record, xxhash64.hash(record.getReadName().getBytes(),0, record.getReadName().length(), seed)); wrapper.setPassesFilter(passesFilter); + + /* + setup Accumulators for this read + */ + int startPosition = Math.min(start, end); + int endPosition = Math.max(start, end); + for (int i = startPosition; i <= endPosition; i++) { + Accumulator acc = accum[i]; + if (null == acc) { + accum[i] = new Accumulator(i); + } + } + queue.add(wrapper); } } @@ -935,12 +943,14 @@ public Consumer(final CountDownLatch consumerLatch, final CountDownLatch normalL public void processSAMRecord(final SAMRecordFilterWrapper record) { final SAMRecord sam = record.getRecord(); final boolean forwardStrand = ! sam.getReadNegativeStrandFlag(); + final boolean passesFilter = record.getPassesFilter(); final int startPosition = sam.getAlignmentStart(); // endPosition is just that for reverse strand, but for forward strand reads it is start position final int endPosition = sam.getAlignmentEnd(); final byte[] bases = sam.getReadBases(); - final byte[] qualities = record.getPassesFilter() ? sam.getBaseQualities() : null; + final byte[] qualities = passesFilter ? sam.getBaseQualities() : null; final Cigar cigar = sam.getCigar(); + final long readNameHash = record.getPosition(); int referenceOffset = 0, offset = 0; @@ -951,8 +961,8 @@ public void processSAMRecord(final SAMRecordFilterWrapper record) { if (co.consumesReferenceBases() && co.consumesReadBases()) { // we have a number (length) of bases that can be advanced. updateMapWithAccums(startPosition, bases, - qualities, forwardStrand, offset, length, referenceOffset, - record.getPassesFilter(), endPosition, record.getPosition()); + qualities, forwardStrand, offset, length, referenceOffset, + passesFilter, endPosition, readNameHash); // advance offsets referenceOffset += length; offset += length; @@ -984,16 +994,18 @@ public void updateMapWithAccums(int startPosition, final byte[] bases, final byt final int startPosAndRefOffset = startPosition + referenceOffset; for (int i = 0 ; i < length ; i++) { - Accumulator acc = array[i + startPosAndRefOffset]; + int currentPos = i + startPosAndRefOffset; + Accumulator acc = array[currentPos]; if (null == acc) { - acc = new Accumulator(i + startPosAndRefOffset); - array[i + startPosAndRefOffset] = acc; + acc = new Accumulator(currentPos); + array[currentPos] = acc; } - if (passesFilter && qualities[i + offset] >= minBaseQual) { - acc.addBase(bases[i + offset], qualities[i + offset], forwardStrand, - startPosition, i + startPosAndRefOffset, readEndPosition, readNameHash); + int iPlusOffset = i + offset; + if (passesFilter && qualities[iPlusOffset] >= minBaseQual) { + acc.addBase(bases[iPlusOffset], qualities[iPlusOffset], forwardStrand, + startPosition, currentPos, readEndPosition, readNameHash); } else { - acc.addFailedFilterBase(bases[i + offset]); + acc.addFailedFilterBase(bases[iPlusOffset], readNameHash); } } } @@ -1045,7 +1057,7 @@ public void run() { if (barrier.getNumberWaiting() >= (singleSampleMode ? 1 : 2)) { // logger.info("null record, barrier count > 2 - what now??? q.size: " + queue.size()); // just me left - if (queue.size() == 0 ) { + if (queue.isEmpty()) { logger.info("Consumer: Processed all records in " + currentChr + ", waiting at barrier"); try { @@ -1243,8 +1255,15 @@ private void processControlAndTest(Accumulator controlAcc, Accumulator testAcc) } - private void interrogateAccumulations(final Accumulator control, final Accumulator test) { - + private void interrogateAccumulations(Accumulator control, Accumulator test) { + + if (null != control && control.isEmpty()) { + control = null; + } + if (null != test && test.isEmpty()) { + test = null; + } + // get coverage for both normal and tumour int controlCoverage = null != control ? control.getCoverage() : 0; int testCoverage = null != test ? test.getCoverage() : 0; @@ -1262,9 +1281,9 @@ private void interrogateAccumulations(final Accumulator control, final Accumulat final int position = control != null ? control.getPosition() : test.getPosition(); // if we are over the length of this particular sequence - return - if (position-1 >= referenceBasesLength) return; + if (position - 1 >= referenceBasesLength) return; - char ref = (char) referenceBases[position-1]; + char ref = (char) referenceBases[position - 1]; if ( ! BaseUtils.isACGT(ref)) { logger.warn("ignoring potential snp at " + currentChr + ":" + position + " - don't deal with ref values of: " + ref); } else { @@ -1343,7 +1362,7 @@ private void interrogateAccumulations(final Accumulator control, final Accumulat /* * attempt to add format field information */ - List ff = new ArrayList(4); + List ff = new ArrayList<>(4); ff.add(header); if ( ! singleSampleMode) { @@ -1363,15 +1382,7 @@ private void interrogateAccumulations(final Accumulator control, final Accumulat } } } - - /** - * Overloaded method - * @see compoundSnps(boolean complete) - */ - void compoundSnps() { - compoundSnps(true); - } - + public static List getRecordsAtPosition(SamReader reader, String contig, int position) { SAMRecordIterator iter = reader.query(contig, position, position, false); List recs = new ArrayList<>(); @@ -1450,15 +1461,9 @@ void compoundSnps(boolean complete) { } } - if (toRemove.size() > 0) { + if (!toRemove.isEmpty()) { logger.info("About to call remove with toRemove size: " + toRemove.size()); - Iterator iter = snps.iterator(); - while (iter.hasNext()) { - VcfRecord v = iter.next(); - if (toRemove.contains(v)) { - iter.remove(); - } - } + snps.removeIf(toRemove::contains); logger.info("About to call remove with toRemove size: " + toRemove.size() + " - DONE"); } diff --git a/qsnp/src/org/qcmg/snp/util/GenotypeUtil.java b/qsnp/src/org/qcmg/snp/util/GenotypeUtil.java index d19f229e3..e0724e75e 100644 --- a/qsnp/src/org/qcmg/snp/util/GenotypeUtil.java +++ b/qsnp/src/org/qcmg/snp/util/GenotypeUtil.java @@ -76,7 +76,7 @@ public static String getFormatValues(Accumulator acc, String gt, String alt, cha StringBuilder sb = new StringBuilder(); sb.append(null != gt ? gt : Constants.MISSING_GT).append(Constants.COLON); - sb.append(VcfUtils.getAD(""+ref, alt, oabs)).append(Constants.COLON); + sb.append(VcfUtils.getAD("" + ref, alt, oabs)).append(Constants.COLON); sb.append(null == acc ? Constants.MISSING_DATA_STRING :acc.getCoverage()).append(Constants.COLON); /* * adding EOR (end of reads -similar in format to FF) diff --git a/qsnp/src/org/qcmg/snp/util/PipelineUtil.java b/qsnp/src/org/qcmg/snp/util/PipelineUtil.java index 385270cce..34cc942c0 100644 --- a/qsnp/src/org/qcmg/snp/util/PipelineUtil.java +++ b/qsnp/src/org/qcmg/snp/util/PipelineUtil.java @@ -1,16 +1,10 @@ package org.qcmg.snp.util; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Optional; +import java.util.*; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +import gnu.trove.set.hash.TLongHashSet; import org.qcmg.common.log.QLogger; import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.model.Accumulator; @@ -41,7 +35,6 @@ public class PipelineUtil { public static final String OPEN_CLOSE_BRACKETS = "[]"; - public static final String ZERO_ZERO_GT = "0/0"; private static final QLogger logger = QLoggerFactory.getLogger(PipelineUtil.class); public static List> listOfListOfAdjacentVcfs(List snps) { @@ -103,15 +96,15 @@ public static List> listOfListOfAdjacentVcfs(List snp */ public static List getAltStringAndGenotypes(List control, List test, String reference) { if (StringUtils.isNullOrEmpty(reference)) { - throw new IllegalArgumentException("Null or empty reference passed to PipelIneUtil.getAltStringAndGenotypes"); + throw new IllegalArgumentException("Null or empty reference passed to PipelineUtil.getAltStringAndGenotypes"); } Listallels = new ArrayList<>(5); allels.add(reference); if (null != control) { - allels.addAll(control.stream().distinct().filter(s -> isAltFreeOfRef(s, reference)).collect(Collectors.toList())); + allels.addAll(control.stream().distinct().filter(s -> isAltFreeOfRef(s, reference)).toList()); } if (null != test) { - allels.addAll(test.stream().distinct().filter(s -> isAltFreeOfRef(s, reference)).collect(Collectors.toList())); + allels.addAll(test.stream().distinct().filter(s -> isAltFreeOfRef(s, reference)).toList()); } allels = allels.stream().distinct().collect(Collectors.toList()); @@ -131,7 +124,7 @@ public static List getAltStringAndGenotypes(List control, List -1 ? Constants.MISSING_GT : controlGT[0] + Constants.SLASH_STRING + controlGT[1]; String tgt = Arrays.binarySearch(testGT, (short) -1) > -1 ? Constants.MISSING_GT : testGT[0] + Constants.SLASH_STRING + testGT[1]; - return Arrays.asList(allels.isEmpty() ? Constants.MISSING_DATA_STRING : allels.stream().collect(Collectors.joining(Constants.COMMA_STRING)), cgt, tgt); + return Arrays.asList(allels.isEmpty() ? Constants.MISSING_DATA_STRING : String.join(Constants.COMMA_STRING, allels), cgt, tgt); } /** @@ -167,28 +160,32 @@ public static short[] getGenotypeArray(List sampleAlleles, List } /** - * Checks each character in the alt string against the corresponding character in the alt string. If any are the same, returns true. False otherwise - * @param alt - * @param ref - * @return + * Checks if the provided alternate string ('alt') is completely free of any reference bases ('ref'). + * The method ensures that all characters in 'alt' differ from their corresponding characters in 'ref'. + * If either 'alt' or 'ref' is null, empty, or contains '.', or if their lengths do not match, + * the method returns true, assuming 'alt' is free of any reference comparison. + * + * @param alt The alternate string to be checked. Must not be null, empty, or contain missing data ("."). + * @param ref The reference string to compare against. Must not be null, empty, or contain missing data ("."). + * @return true if 'alt' is free of any characters from 'ref' or the inputs are invalid; otherwise, false. */ public static boolean isAltFreeOfRef(String alt, String ref) { /* * alt and ref must be not null, empty, missing data, and must be the same length */ - if ( ! StringUtils.isNullOrEmptyOrMissingData(alt) && ! StringUtils.isNullOrEmptyOrMissingData(ref)) { - int len = alt.length(); - if (len == ref.length()) { - for (int i = 0 ; i < len ; i++) { - if (alt.charAt(i) == ref.charAt(i)) { - return false; - } - } + if (alt == null || ref == null || alt.isEmpty() || ref.isEmpty() || ".".equals(alt) || ".".equals(ref) || alt.length() != ref.length()) { + return true; + } + + int len = alt.length(); + for (int i = 0 ; i < len ; i++) { + if (alt.charAt(i) == ref.charAt(i)) { + return false; } } - - return true; + + return true; // No matches found, alt is free of ref. } /** @@ -202,14 +199,11 @@ public static List getBasesForGenotype(Map basesAndCoun if (null != basesAndCounts) { List genotypeBases = basesAndCounts.entrySet().stream() - .filter(e -> ! e.getKey().contains("_")) - .filter(e -> e.getValue().length == 4) - .filter(e -> (e.getValue()[0] + e.getValue()[2]) >= minimumCoverage ) - .filter(e -> e.getKey().equals(ref) || isAltFreeOfRef(e.getKey(), ref)) + .filter(e -> e.getValue().length == 4 && (e.getValue()[0] + e.getValue()[2]) >= minimumCoverage && (e.getKey().equals(ref) || isAltFreeOfRef(e.getKey(), ref)) && ! e.getKey().contains("_")) .sorted( Comparator.comparing((Map.Entry e) -> e.getValue()[0] + e.getValue()[2], Comparator.reverseOrder()) .thenComparing(e -> e.getValue()[0] > 0 && e.getValue()[2] > 0, Comparator.reverseOrder())) - .map(e -> e.getKey()) + .map(Map.Entry::getKey) .collect(Collectors.toList()); if (genotypeBases.size() > 2) { @@ -223,19 +217,22 @@ public static List getBasesForGenotype(Map basesAndCoun /** * Returns the Observed Alleles By Strand for this map of bases and counts. - * - * @param basesAndCounts - * @return + * + * @param basesAndCounts a map where the key is a string and the value is an array of + * short integers; used to generate a formatted string if certain + * conditions are met + * @return an Optional containing the formatted string if the input map is not null and + * contains matching entries, otherwise an empty Optional */ public static Optional getOABS(Map basesAndCounts) { if (null != basesAndCounts) { String oabs = basesAndCounts.entrySet().stream() .filter(e -> e.getValue().length == 4) - .sorted((e1, e2) -> e1.getKey().compareTo(e2.getKey())) + .sorted(Map.Entry.comparingByKey()) .map(e -> e.getKey() + e.getValue()[0] + OPEN_CLOSE_BRACKETS + e.getValue()[2] + OPEN_CLOSE_BRACKETS) .collect(Collectors.joining(Constants.SEMI_COLON_STRING)); - return Optional.ofNullable(oabs.length() > 0 ? oabs : null); + return Optional.ofNullable(!oabs.isEmpty() ? oabs : null); } return Optional.empty(); } @@ -297,9 +294,7 @@ public static Map getBasesFromAccumulators(List ac StringBuilder sb = moReadIdsAndBases.get(j); if (null == sb) { sb = new StringBuilder(); - for (int k = 0 ; k < ai.get() ; k++) { - sb.append('_'); - } + sb.append("_".repeat(Math.max(0, ai.get()))); moReadIdsAndBases.put(j, sb ); } sb.append(c); @@ -416,10 +411,16 @@ public static int getUniqueCount(TIntIntMap map, TIntList list, boolean fs) { return set.size(); } + /** + * Determines if the given string contains at least one lowercase character. + * + * @param s the string to be checked for lowercase characters + * @return true if the string contains at least one lowercase character, false otherwise + */ public static boolean isStringLowerCase(String s) { if (null != s) { for (char c : s.toCharArray()) { - if (Character.isLetter(c) && Character.isLowerCase(c)) { + if (Character.isLowerCase(c)) { return true; } } @@ -428,11 +429,17 @@ public static boolean isStringLowerCase(String s) { } /** - * for each element in the list, get the corresponding value in the map, and return the unique count of these values - * In this map, strandedness is the sign in the int value, the long key is the readname hashcode, and all 64 bits are used for this. - * @param map - * @param list - * @return + * Computes the count of unique integers based on the given map and list. + * The method evaluates the elements in the list, retrieves their corresponding values + * from the map, and filters them based on the specified boolean flag. + * Only absolute values of the integers matching the condition are considered, and duplicates + * are ignored. + * + * @param map a mapping of long keys to integer values that is used to determine the start positions + * @param list a list of long values to be evaluated + * @param fs a flag that determines the criteria for filtering the map values; true for positive + * values and false for negative values + * @return the count of unique absolute values matching the specified criteria */ public static int getUniqueCount(TLongIntMap map, TLongList list, boolean fs) { TIntSet set = new TIntHashSet(); @@ -446,96 +453,100 @@ public static int getUniqueCount(TLongIntMap map, TLongList list, boolean fs) { return set.size(); } -// /** -// * Returns a VcfRecord with just the positional and ref and alt information provided. Does not contain filter, info, format etc. -// * @param vcfs -// * @return -// */ -// public static VcfRecord createSkeletonCompoundSnp(List vcfs) { -// /* -// * sort list -// */ -// vcfs.sort(null); -// StringBuilder ref = new StringBuilder(); -// StringBuilder alt = new StringBuilder(); -// ChrPosition startPosition = vcfs.get(0).getChrPosition(); -// -// for (VcfRecord v : vcfs) { -// ref.append(v.getRefChar()); -// alt.append(v.getAlt()); -// } -// return VcfUtils.createVcfRecord(startPosition, null, ref.toString(), alt.toString()); -// } -// - + /** + * Processes a map of VcfRecord to a pair of Accumulator objects and separates them into two lists: + * one for control accumulators and another for test accumulators. Entries with non-null accumulators + * are added to their respective lists. + * + * @param vcfs A map where keys are VcfRecord objects and values are pairs of Accumulator objects. + * The left value in the pair corresponds to the control accumulator, and the right value + * corresponds to the test accumulator. + * @return A pair of lists, where the first list contains the control accumulators and the second + * list contains the test accumulators. + */ public static Pair, List> getAccs(Map> vcfs) { - /* - * sort keys in map - */ - List l = new ArrayList<>(vcfs.keySet()); - l.sort(null); - - /* - * extract left for control, right for test - */ + // Preallocate lists for control and test accumulators based on map size List cAccs = new ArrayList<>(vcfs.size() + 1); List tAccs = new ArrayList<>(vcfs.size() + 1); - - for (VcfRecord v : l) { - Pair p = vcfs.get(v); - if (null != p.left()) { - cAccs.add(p.left()); - } - if (null != p.right()) { - tAccs.add(p.right()); - } - } - - return new Pair<>(cAccs,tAccs); + + // Stream and process the map entries directly + vcfs.entrySet() + .stream() + .sorted(Map.Entry.comparingByKey()) // Optional: sorting based on keys + .forEach(entry -> { + Pair p = entry.getValue(); + if (p.left() != null) { + cAccs.add(p.left()); + } + if (p.right() != null) { + tAccs.add(p.right()); + } + }); + + // Return the final pair + return new Pair<>(cAccs, tAccs); + } public static Optional getReference(Collection vcfs) { - return Optional.ofNullable(vcfs.stream().sorted().map(VcfRecord::getRef).collect(Collectors.joining())); + return Optional.of(vcfs.stream().sorted().map(VcfRecord::getRef).collect(Collectors.joining())); } /** - * REturns a count of either the novel starts - * - * map contains bases as key, and short array contains 4 elements, which are (in this order): - * forward strand count - * forward strand novel starts count - * reverse strand count - * reverse strand novel starts count - * - * Offset dictates whether you are getting novel starts (offset = 1), or counts (offset = 0) - * - * - * @param map - * @param key - * @param offset - * @return + * Retrieves a specific count from the short array associated with the given key in the map. + * The short array contains coverage data, and the count is determined by the offset and an adjacent index. + * If the key is not found or the short array is null, the method returns 0. + * + * @param map A map where keys are strings representing specific identifiers, and values are short arrays + * containing coverage data. + * @param key The key for which the count is to be retrieved. + * @param offset The offset within the short array to determine the count. + * @return The sum of the value at the offset index and the value at the adjacent index (offset + 2) + * in the short array corresponding to the specified key. If the key is not found or the associated + * array is null, 0 is returned. */ public static int getCount(Map map, String key, int offset) { short[] sa =map.get(key); - return (null != sa) ? sa[0 + offset] + sa[2 + offset] : 0; + return (null != sa) ? sa[offset] + sa[2 + offset] : 0; } /** - * returns the novel starts counts for both strands for this base (key) - * @see getCount(Map map, String key, int offset) - * @param map - * @param key - * @return + * Retrieves the count of novel starts for the specified key from the given map. + * The map contains bases as keys, and each key corresponds to a short array + * that holds count information for various metrics. The novel starts count is + * calculated by summing the forward strand novel starts count and the reverse strand + * novel starts count. + * + * @param map A map where keys are strings representing bases, and values are + * short arrays containing count data. The short array has + * 4 elements in the following order: + * - Forward strand count + * - Forward strand novel starts count + * - Reverse strand count + * - Reverse strand novel starts count + * @param key The specific base key for which to retrieve the novel starts count. + * @return The sum of forward strand and reverse strand novel starts counts for + * the specified base key. If the key is not found in the map, 0 is returned. */ public static int getNovelStartsCounts(Map map, String key) { return getCount(map, key, 1); } /** - * Returns the total count for both strands for this base - * @see getCount(Map map, String key, int offset) - * @param map - * @param key - * @return + * Retrieves the total counts for the specified key from the given map. + * The map contains bases as keys, and each key corresponds to a short array + * that holds count information for various metrics. The total count is + * calculated by summing the counts from both forward and reverse strands. + * + * @param map A map where keys are strings representing bases, and values + * are short arrays containing count data. The short array has + * 4 elements in the following order: + * - Forward strand count + * - Forward strand novel starts count + * - Reverse strand count + * - Reverse strand novel starts count + * @param key The specific base key for which to retrieve the total counts. + * @return The total counts (sum of forward and reverse strand counts) for + * the specified base key. If the key is not found in the map, 0 is returned. */ public static int getTotalCounts(Map map, String key) { return getCount(map, key, 0); @@ -543,10 +554,10 @@ public static int getTotalCounts(Map map, String key) { public static String[] getMR(Map map, String[] aAlts, int firstG, int secondG) { if (null == aAlts) { - throw new IllegalArgumentException("Null or empty alts passed to PipelIneUtil.getMR"); + throw new IllegalArgumentException("Null or empty alts passed to PipeLineUtil.getMR"); } if (null == map) { - throw new IllegalArgumentException("Null map passed to PipelIneUtil.getMR"); + throw new IllegalArgumentException("Null map passed to PipeLineUtil.getMR"); } /* @@ -578,10 +589,10 @@ public static String[] getMR(Map map, String[] aAlts, int first /* * if string builder are empty, put missing data in there */ - if (mr.length() == 0) { + if (mr.isEmpty()) { mr.append(Constants.MISSING_DATA_STRING); } - if (nns.length() == 0) { + if (nns.isEmpty()) { nns.append(Constants.MISSING_DATA_STRING); } @@ -600,9 +611,9 @@ public static int getCoverage(Map map) { /** * Create compound snp based purely on GATK vcf information. - * Classification (ie. SOMATIC) must be same for all snps - thats about the only rule... + * Classification (i.e. SOMATIC) must be same for all snps - that's about the only rule... * oh, and the genotypes need to be the same for all control samples and for all test samples - * eg. 0/0 ->0/1 for all snps in cs + * e.g. 0/0 ->0/1 for all snps in cs * * @param vcfs * @return @@ -636,7 +647,7 @@ public static Optional createCompoundSnpGATK(List vcfs, bo String [] gtArr = ffMap.get(VcfHeaderUtils.FORMAT_GENOTYPE); String [] dpArr = ffMap.get(VcfHeaderUtils.FORMAT_READ_DEPTH); if (null == dpArr) { - logger.warn("null dp array for rec: " + v.toString()); + logger.warn("null dp array for rec: " + v); } String [] adArr = ffMap.get(VcfHeaderUtils.FORMAT_ALLELIC_DEPTHS); String [] gqArr = ffMap.get(VcfHeaderUtils.FORMAT_GENOTYPE_QUALITY); @@ -675,11 +686,11 @@ public static Optional createCompoundSnpGATK(List vcfs, bo // if gts are the same, alls well if (singleSampleMode || (controlGTs.stream().distinct().count() == 1 && testGTs.stream().distinct().count() == 1 )) { - VcfRecord firstRec = vcfs.get(0); + VcfRecord firstRec = vcfs.getFirst(); VcfRecord v = VcfUtils.createVcfRecord(firstRec.getChrPosition(), null, csRef, csAlt); /* - * sort collections to get lowest value first - thats what we will use + * sort collections to get lowest value first - that's what we will use */ if ( ! singleSampleMode) { controlDPs.sort(null); @@ -705,26 +716,26 @@ public static Optional createCompoundSnpGATK(List vcfs, bo */ StringBuilder cSB = null; if ( ! singleSampleMode) { - cSB = new StringBuilder(controlGTs.get(0)); //GT - StringUtils.updateStringBuilder(cSB, controlADs.get(0), Constants.COLON); //AD - StringUtils.updateStringBuilder(cSB, controlDPs.get(0), Constants.COLON); //DP + cSB = new StringBuilder(controlGTs.getFirst()); //GT + StringUtils.updateStringBuilder(cSB, controlADs.getFirst(), Constants.COLON); //AD + StringUtils.updateStringBuilder(cSB, controlDPs.getFirst(), Constants.COLON); //DP StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON); //FT - StringUtils.updateStringBuilder(cSB,controlGQs.get(0), Constants.COLON); // GQ field + StringUtils.updateStringBuilder(cSB,controlGQs.getFirst(), Constants.COLON); // GQ field StringUtils.updateStringBuilder(cSB,cINF, Constants.COLON); // INF field StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON); //NNS StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON); //OABS - StringUtils.updateStringBuilder(cSB, controlQLs.get(0), Constants.COLON); //QL + StringUtils.updateStringBuilder(cSB, controlQLs.getFirst(), Constants.COLON); //QL } - StringBuilder tSB = new StringBuilder(testGTs.get(0)); - StringUtils.updateStringBuilder(tSB, testADs.get(0), Constants.COLON); - StringUtils.updateStringBuilder(tSB, testDPs.get(0), Constants.COLON); + StringBuilder tSB = new StringBuilder(testGTs.getFirst()); + StringUtils.updateStringBuilder(tSB, testADs.getFirst(), Constants.COLON); + StringUtils.updateStringBuilder(tSB, testDPs.getFirst(), Constants.COLON); StringUtils.updateStringBuilder(tSB, Constants.MISSING_DATA_STRING, Constants.COLON); - StringUtils.updateStringBuilder(tSB, testGQs.get(0), Constants.COLON); // GQ field + StringUtils.updateStringBuilder(tSB, testGQs.getFirst(), Constants.COLON); // GQ field StringUtils.updateStringBuilder(tSB, somCount > 0 ? "SOMATIC" : Constants.MISSING_DATA_STRING, Constants.COLON); // INF field StringUtils.updateStringBuilder(tSB, Constants.MISSING_DATA_STRING, Constants.COLON); //NNS StringUtils.updateStringBuilder(tSB, Constants.MISSING_DATA_STRING, Constants.COLON); //OABS - StringUtils.updateStringBuilder(tSB, testQLs.get(0), Constants.COLON); //QL + StringUtils.updateStringBuilder(tSB, testQLs.getFirst(), Constants.COLON); //QL if ( singleSampleMode) { v.setFormatFields(Arrays.asList("GT:AD:DP:FT:GQ:INF:NNS:OABS:QL", tSB.toString())); @@ -739,12 +750,36 @@ public static Optional createCompoundSnpGATK(List vcfs, bo return Optional.empty(); } - + + public static String formatCBasesCountsNNS(Map cBasesCountsNNS) { + if (cBasesCountsNNS == null || cBasesCountsNNS.isEmpty()) { + return "cBasesCountsNNS is empty or null"; + } + + // Prepare StringBuilder to format content + StringBuilder sb = new StringBuilder("{"); + cBasesCountsNNS.forEach((key, counts) -> { + sb.append(key) + .append(": [") + .append(Arrays.toString(counts)) // Format short[] as a string + .append("], "); + }); + + // Remove trailing ", " if it exists and close the string + if (sb.length() > 1) { + sb.setLength(sb.length() - 2); // Remove last ", " + } + sb.append("}"); + + return sb.toString(); + } + + public static Optional createCompoundSnp(Map> vcfs, List controlRules, List testRules, boolean runSBias, int sBiasCov, int sBiasAlt) { Pair, List> p = getAccs(vcfs); Optional refO = getReference(vcfs.keySet()); - String ref = refO.isPresent() ? refO.get() : null; + String ref = refO.orElse(null); Map cBasesCountsNNS = getBasesFromAccumulators(p.left()); Map tBasesCountsNNS = getBasesFromAccumulators(p.right()); @@ -753,7 +788,7 @@ public static Optional createCompoundSnp(Map createCompoundSnp(Map createCompoundSnp(Map createCompoundSnp(Map oOabs = getOABS(cBasesCountsNNS); - String oabs = oOabs.isPresent() ? oOabs.get() : Constants.MISSING_DATA_STRING; - + String oabs = oOabs.orElse(Constants.MISSING_DATA_STRING); + + String failedFilter = getFailedFilterCS(p.left()); + StringBuilder cSB = new StringBuilder(altsAndGTs.get(1)); - StringUtils.updateStringBuilder(cSB, VcfUtils.getAD(ref, altsAndGTs.get(0), oabs), Constants.COLON); - StringUtils.updateStringBuilder(cSB, controlCov > 0 ? controlCov+"" : "0", Constants.COLON); + StringUtils.updateStringBuilder(cSB, VcfUtils.getAD(ref, altsAndGTs.get(0), oabs), Constants.COLON);//GT and + StringUtils.updateStringBuilder(cSB, controlCov > 0 ? controlCov + "" : "0", Constants.COLON); //DP + StringUtils.updateStringBuilder(cSB, failedFilter, Constants.COLON); // FF (failed filter) /* * filters are applied in qannotate now */ - StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON); + StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON); // FT field String [] mrNNS = getMR(cBasesCountsNNS, aAlts, controlFirstG, controlSecondG); - StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON); // INF field - StringUtils.updateStringBuilder(cSB, mrNNS[1], Constants.COLON); - StringUtils.updateStringBuilder(cSB, oabs, Constants.COLON); + StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON); // INF field + StringUtils.updateStringBuilder(cSB, mrNNS[1], Constants.COLON); // NNS field + StringUtils.updateStringBuilder(cSB, oabs, Constants.COLON); // OABS field oOabs = getOABS(tBasesCountsNNS); - oabs = oOabs.isPresent() ? oOabs.get() : Constants.MISSING_DATA_STRING; + oabs = oOabs.orElse(Constants.MISSING_DATA_STRING); StringBuilder tSB = new StringBuilder(altsAndGTs.get(2)); - StringUtils.updateStringBuilder(tSB, VcfUtils.getAD(ref, altsAndGTs.get(0), oabs), Constants.COLON); - StringUtils.updateStringBuilder(tSB, testCov > 0 ? testCov +"" : "0", Constants.COLON); + StringUtils.updateStringBuilder(tSB, VcfUtils.getAD(ref, altsAndGTs.get(0), oabs), Constants.COLON);//GT and + StringUtils.updateStringBuilder(tSB, testCov > 0 ? testCov + "" : "0", Constants.COLON); //DP + failedFilter = getFailedFilterCS(p.right()); + StringUtils.updateStringBuilder(tSB, failedFilter, Constants.COLON); // FF (failed filter) /* * filters are applied in qannotate now */ - StringUtils.updateStringBuilder(tSB, Constants.MISSING_DATA_STRING, Constants.COLON); + StringUtils.updateStringBuilder(tSB, Constants.MISSING_DATA_STRING, Constants.COLON); // FT field StringUtils.updateStringBuilder(tSB, (c == Classification.SOMATIC ? VcfHeaderUtils.INFO_SOMATIC : Constants.MISSING_DATA_STRING), Constants.COLON); // INF field mrNNS = getMR(tBasesCountsNNS,aAlts, testFirstG, testSecondG); - StringUtils.updateStringBuilder(tSB, mrNNS[1], Constants.COLON); - StringUtils.updateStringBuilder(tSB, oabs, Constants.COLON); + StringUtils.updateStringBuilder(tSB, mrNNS[1], Constants.COLON); // NNS field + StringUtils.updateStringBuilder(tSB, oabs, Constants.COLON); // OABS field - v.setFormatFields(Arrays.asList("GT:AD:DP:FT:INF:NNS:OABS", cSB.toString(), tSB.toString())); + v.setFormatFields(Arrays.asList("GT:AD:DP:FF:FT:INF:NNS:OABS", cSB.toString(), tSB.toString())); - return Optional.ofNullable(v); + return Optional.of(v); + } + + public static String getFailedFilterCS(List accumulators) { + if (null == accumulators || accumulators.isEmpty()) { + return Constants.MISSING_DATA_STRING; + } + Map failedFilters = new THashMap<>(); + int x = 0; + int runningTally = 0; + for (Accumulator acc : accumulators) { + if (null != acc) { + String padding = "_".repeat(x); + TLongList count = acc.getFailedFilterACount(); + if (null != count && ! count.isEmpty()) { + TLongHashSet failedFilterSet = new TLongHashSet(count); + long[] array = failedFilterSet.toArray(); + for (long l : array) { + failedFilters.computeIfAbsent(l, k -> new StringBuilder(padding)).append("A"); + } + } + count = acc.getFailedFilterCCount(); + if (null != count && ! count.isEmpty()) { + TLongHashSet failedFilterSet = new TLongHashSet(count); + long[] array = failedFilterSet.toArray(); + for (long l : array) { + failedFilters.computeIfAbsent(l, k -> new StringBuilder(padding)).append("C"); + } + } + count = acc.getFailedFilterGCount(); + if (null != count && ! count.isEmpty()) { + TLongHashSet failedFilterSet = new TLongHashSet(count); + long[] array = failedFilterSet.toArray(); + for (long l : array) { + failedFilters.computeIfAbsent(l, k -> new StringBuilder(padding)).append("G"); + } + } + count = acc.getFailedFilterTCount(); + if (null != count && ! count.isEmpty()) { + TLongHashSet failedFilterSet = new TLongHashSet(count); + long[] array = failedFilterSet.toArray(); + for (long l : array) { + failedFilters.computeIfAbsent(l, k -> new StringBuilder(padding)).append("T"); + } + } + } + if (x >= 1) { + /* + add padding to any entry in the map that has length less than x + */ + for (Map.Entry e : failedFilters.entrySet()) { + if (e.getValue().length() < (x + 1)) { + e.getValue().append("_".repeat((x + 1) - e.getValue().length())); + } + } + } + x++; +// logger.info("runningTally: " + runningTally + ", failedFilters.size(): " + failedFilters.size()); + + /* + purge any entries in the map that have a value greater than length (x + 1) + */ + Iterator> iter = failedFilters.entrySet().iterator(); + while (iter.hasNext()) { + Map.Entry entry = iter.next(); + if (entry.getValue().length() >= (x + 1)) { + iter.remove(); + } + } + + } + for (Map.Entry e : failedFilters.entrySet()) { + if (e.getValue().length() < x) { + e.getValue().append("_".repeat(x - e.getValue().length())); + } + } + + /* + * now we have a map of readIds and failed filters, we need to turn this into a string + */ + Map failedFilterCounts = new HashMap<>(); + failedFilters.forEach((k,v) -> failedFilterCounts.computeIfAbsent(v.toString(), k1 -> new AtomicInteger()).incrementAndGet()); + + StringBuilder outputSB = new StringBuilder(); + failedFilterCounts.entrySet().stream() .sorted(Map.Entry.comparingByKey()) + .forEach(e -> StringUtils.updateStringBuilder(outputSB, e.getKey() + e.getValue().get(), ';')); + + return outputSB.isEmpty() ? Constants.MISSING_DATA_STRING : outputSB.toString(); } } diff --git a/qsnp/test/org/qcmg/snp/PipelineTest.java b/qsnp/test/org/qcmg/snp/PipelineTest.java index cfe168dc2..a4e79d91b 100644 --- a/qsnp/test/org/qcmg/snp/PipelineTest.java +++ b/qsnp/test/org/qcmg/snp/PipelineTest.java @@ -3,10 +3,7 @@ import static org.junit.Assert.assertEquals; import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; @@ -122,4 +119,45 @@ private static SAMRecord createSamRec(String s, SAMFileHeader header) { return rec; } + + @Test + public void testNameHashing() { + List names = Arrays.asList( "HGMY7DSX5230415:3:1538:31602:12446", + "HGN5JDSX5230415:4:1456:2709:30452", + "HGNCTDSX5230415:3:2520:4191:1266", + "HGNCTDSX5230415:3:1162:4490:7044", + "HGN5JDSX5230415:4:1148:16197:31015", + "HGNCTDSX5230415:3:2431:27805:36338", + "HGMY7DSX5230415:3:1318:28899:23234", + "HGN5JDSX5230415:4:1547:1759:7075", + "HGMWYDSX5230415:3:1236:30590:22561", + "HGMWYDSX5230415:3:2639:10818:19727", + "HGN5JDSX5230415:4:2371:13114:3474", + "HGMY7DSX5230415:3:1555:25482:29653", + "HGMY7DSX5230415:3:1644:20365:15029", + "HGMY7DSX5230415:3:1561:27091:11741", + "HGNCTDSX5230415:3:2515:16857:21919", + "HGMWYDSX5230415:3:1126:2166:31610", + "HGN5JDSX5230415:4:1456:2709:30452", + "HGNCTDSX5230415:3:1423:11342:20603", + "HGMWYDSX5230415:3:1555:18195:3411", + "HGMWYDSX5230415:3:2573:27751:31422", + "HGN5JDSX5230415:4:2208:8232:13088", + "HGNCTDSX5230415:3:1262:31783:25034", + "HGNCTDSX5230415:3:1262:31783:25034", + "HGMWYDSX5230415:3:2671:30002:6668", + "HGMWYDSX5230415:3:2344:28357:27524", + "HGMY7DSX5230415:3:2623:25364:17268", + "HGN5JDSX5230415:4:2368:32660:33818"); + assertEquals(27, names.size()); + /* + put into a set to check for uniqueness + */ + Set nameSet = new HashSet<>(names); + assertEquals(25, nameSet.size()); + /* + now hash them and check we get the same amount of unique hashes + */ + + } } diff --git a/qsnp/test/org/qcmg/snp/ProcessBamRecordTest.java b/qsnp/test/org/qcmg/snp/ProcessBamRecordTest.java index 6bebb2b4b..f6533691a 100644 --- a/qsnp/test/org/qcmg/snp/ProcessBamRecordTest.java +++ b/qsnp/test/org/qcmg/snp/ProcessBamRecordTest.java @@ -166,7 +166,7 @@ public void updateMapWithAccums(int startPosition, final byte[] bases, final byt if (passesFilter) acc.addBase(bases[i + offset], qualities[i + offset], forwardStrand, startPosition, startPosition + i + referenceOffset, endPosition, 1); else - acc.addFailedFilterBase(bases[i + offset]); + acc.addFailedFilterBase(bases[i + offset], 2); } } diff --git a/qsnp/test/org/qcmg/snp/util/GenotypeUtilTest.java b/qsnp/test/org/qcmg/snp/util/GenotypeUtilTest.java index 140eda41c..be45f6f00 100644 --- a/qsnp/test/org/qcmg/snp/util/GenotypeUtilTest.java +++ b/qsnp/test/org/qcmg/snp/util/GenotypeUtilTest.java @@ -78,7 +78,7 @@ public void isSomatic3() { /* * chr10 54817257 rs386743785 AG GA . . IN=1,2;DB;HOM=0,TTTAACCTTCgaCTTGCCCACA;EFF=intergenic_region(MODIFIER||||||||||1) GT:AD:CCC:CCM:DP:FT:INF:MR:NNS:OABS 1/1:0,19:Germline:32:34:PASS:.:19:19:AA8[]6[];GA8[]11[];TA0[]1[] 0/0:2,0:ReferenceNoVariant:32:55:PASS:SOMATIC:.:.:AA33[]20[];AG1[]1[];A_2[]0[] 1/1:0,19:Germline:32:34:PASS:.:19:19:AA8[]6[];GA8[]11[];TA0[]1[] 0/0:2,0:ReferenceNoVariant:32:55:PASS:SOMATIC:.:.:AA33[]20[];AG1[]1[];A_2[]0[] * - * need to decomose this cs to see if the individual snps are somatic - should be germline + * need to decompose this cs to see if the individual snps are somatic - should be germline */ //AA8[]6[];GA8[]11[];TA0[]1[] @@ -126,8 +126,8 @@ public void getFilters() { control = AccumulatorUtils.createFromOABS(cOABS, 13118); test = AccumulatorUtils.createFromOABS(tOABS, 13118); gt = "0/1"; - assertEquals("0/1:10,6:"+control.getCoverage()+":.:.:.:.:2:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "T", 'C', true, 5, 5,Classification.GERMLINE, true)); - assertEquals("0/1:10,4:"+test.getCoverage()+":.:.:.:.:1:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "T", 'C', true, 5, 5,Classification.GERMLINE, false)); + assertEquals("0/1:10,6:" + control.getCoverage() + ":.:.:.:.:2:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "T", 'C', true, 5, 5,Classification.GERMLINE, true)); + assertEquals("0/1:10,4:" + test.getCoverage() + ":.:.:.:.:1:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "T", 'C', true, 5, 5,Classification.GERMLINE, false)); } @@ -143,9 +143,9 @@ public void mutationEqualsReference() { Accumulator control = AccumulatorUtils.createFromOABS(cOABS, 16534); Accumulator test = AccumulatorUtils.createFromOABS(tOABS, 16534); String gt = "1/1"; - assertEquals(gt + ":0,3:"+control.getCoverage()+":.:.:.:.:2:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "T", 'C', true, 5, 5,Classification.SOMATIC, true)); + assertEquals(gt + ":0,3:" + control.getCoverage()+":.:.:.:.:2:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "T", 'C', true, 5, 5,Classification.SOMATIC, true)); gt = "0/0"; - assertEquals(gt + ":3,2:"+test.getCoverage()+":.:.:.:SOMATIC:.:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "T", 'C', true, 5, 5,Classification.SOMATIC, false)); + assertEquals(gt + ":3,2:" + test.getCoverage()+":.:.:.:SOMATIC:.:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "T", 'C', true, 5, 5,Classification.SOMATIC, false)); } @Test @@ -159,16 +159,16 @@ public void san3() { Accumulator test = AccumulatorUtils.createFromOABS(tOABS, 16571); assertEquals(Classification.GERMLINE, GenotypeUtil.getClassification("ACG", ".", "1/1","A")); String gt = "1/1"; - assertEquals("./.:2,1:"+control.getCoverage()+":.:.:.:.:.:"+cOABS, GenotypeUtil.getFormatValues(control, null, "A", 'G', true, 5, 5,Classification.GERMLINE, true)); - assertEquals(gt + ":2,3:"+test.getCoverage()+":.:.:.:.:1:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, false)); + assertEquals("./.:2,1:" + control.getCoverage() + ":.:.:.:.:.:" + cOABS, GenotypeUtil.getFormatValues(control, null, "A", 'G', true, 5, 5,Classification.GERMLINE, true)); + assertEquals(gt + ":2,3:" + test.getCoverage() + ":.:.:.:.:1:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, false)); cOABS = "A0[0]1[34];C0[0]1[1]"; tOABS = "A0[0]3[18];G0[0]2[30]"; control = AccumulatorUtils.createFromOABS(cOABS, 16571); test = AccumulatorUtils.createFromOABS(tOABS, 16571); assertEquals(Classification.GERMLINE, GenotypeUtil.getClassification("ACG", ".", "1/1","A")); - assertEquals("./.:0,1:"+control.getCoverage()+":.:.:.:.:.:"+cOABS, GenotypeUtil.getFormatValues(control, null, "A", 'G', true, 5, 5,Classification.GERMLINE, true)); - assertEquals(gt + ":2,3:"+test.getCoverage()+":.:.:.:.:1:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, false)); + assertEquals("./.:0,1:" + control.getCoverage()+":.:.:.:.:.:" + cOABS, GenotypeUtil.getFormatValues(control, null, "A", 'G', true, 5, 5,Classification.GERMLINE, true)); + assertEquals(gt + ":2,3:" + test.getCoverage()+":.:.:.:.:1:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, false)); } @Test @@ -182,16 +182,16 @@ public void sat3() { Accumulator test = AccumulatorUtils.createFromOABS(tOABS, 133129); assertEquals(Classification.GERMLINE, GenotypeUtil.getClassification("AG", "1/1", ".","A")); String gt = "1/1"; - assertEquals(gt + ":1,3:"+control.getCoverage()+":.:.:.:.:2:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, true)); - assertEquals("./.:2,1:"+test.getCoverage()+":.:.:.:.:.:"+tOABS, GenotypeUtil.getFormatValues(test, null, "A", 'G', true, 5, 5,Classification.GERMLINE, false)); + assertEquals(gt + ":1,3:" + control.getCoverage()+":.:.:.:.:2:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, true)); + assertEquals("./.:2,1:" + test.getCoverage()+":.:.:.:.:.:" + tOABS, GenotypeUtil.getFormatValues(test, null, "A", 'G', true, 5, 5,Classification.GERMLINE, false)); cOABS = "A2[30]1[2];G0[0]1[36]"; tOABS = "A1[21]0[0];G0[0]1[23]"; control = AccumulatorUtils.createFromOABS(cOABS, 133129); test = AccumulatorUtils.createFromOABS(tOABS, 133129); assertEquals(Classification.GERMLINE, GenotypeUtil.getClassification("AG", "1/1", ".","A")); - assertEquals(gt + ":1,3:"+control.getCoverage()+":.:.:.:.:2:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, true)); - assertEquals("./.:1,1:"+test.getCoverage()+":.:.:.:.:.:"+tOABS, GenotypeUtil.getFormatValues(test, null, "A", 'G', true, 5, 5,Classification.GERMLINE, false)); + assertEquals(gt + ":1,3:" + control.getCoverage()+":.:.:.:.:2:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, true)); + assertEquals("./.:1,1:" + test.getCoverage()+":.:.:.:.:.:" + tOABS, GenotypeUtil.getFormatValues(test, null, "A", 'G', true, 5, 5,Classification.GERMLINE, false)); } @Test @@ -205,9 +205,9 @@ public void doubleMIN() { Accumulator test = AccumulatorUtils.createFromOABS(tOABS, 15274); assertEquals(Classification.SOMATIC, GenotypeUtil.getClassification("GT", "2/2", "1/2","G,T")); String gt = "2/2"; - assertEquals(gt + ":0,4,61:"+control.getCoverage()+":.:.:.:.:2:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, true)); + assertEquals(gt + ":0,4,61:" + control.getCoverage()+":.:.:.:.:2:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, true)); gt = "1/2"; - assertEquals(gt + ":0,9,47:"+test.getCoverage()+":.:.:.:SOMATIC:1,2:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, false)); + assertEquals(gt + ":0,9,47:" + test.getCoverage()+":.:.:.:SOMATIC:1,2:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, false)); } @Test @@ -221,9 +221,9 @@ public void getAD() { Accumulator test = AccumulatorUtils.createFromOABS(tOABS, 15274); assertEquals(Classification.SOMATIC, GenotypeUtil.getClassification("GT", "2/2", "1/2","G,T")); String gt = "2/2"; - assertEquals(gt + ":0,4,61:"+control.getCoverage()+":.:.:.:.:2:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, true)); + assertEquals(gt + ":0,4,61:" + control.getCoverage() + ":.:.:.:.:2:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, true)); gt = "1/2"; - assertEquals(gt + ":0,9,47:"+test.getCoverage()+":.:.:.:SOMATIC:1,2:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, false)); + assertEquals(gt + ":0,9,47:" + test.getCoverage() + ":.:.:.:SOMATIC:1,2:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, false)); } @Test @@ -238,30 +238,31 @@ public void miun() { Classification c = Classification.SOMATIC; assertEquals(Classification.SOMATIC, GenotypeUtil.getClassification("GT", "2/2", "1/2","G,T")); String gt = "0/0"; - assertEquals(gt + ":356,6:"+control.getCoverage()+":.:.:.:.:.:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,c, true)); + assertEquals(gt + ":356,6:" + control.getCoverage()+":.:.:.:.:.:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,c, true)); gt = "0/1"; - assertEquals(gt + ":234,16:"+test.getCoverage()+":.:.:.:SOMATIC:2:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,c, false)); + assertEquals(gt + ":234,16:" + test.getCoverage()+":.:.:.:SOMATIC:2:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,c, false)); /* * add in the unfiltered alt - need 3% of these to trigger (which is 11 in this case */ - control.addFailedFilterBase((byte)'A'); - control.addFailedFilterBase((byte)'A'); + long readNameHash = 1; + control.addFailedFilterBase((byte)'A', readNameHash++); + control.addFailedFilterBase((byte)'A', readNameHash++); gt = "0/0"; - assertEquals(gt + ":356,6:"+control.getCoverage()+":.:A2:.:.:.:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,c, true)); + assertEquals(gt + ":356,6:" + control.getCoverage() + ":.:A2:.:.:.:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,c, true)); gt = "0/1"; - assertEquals(gt + ":234,16:"+test.getCoverage()+":.:.:.:SOMATIC:2:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,c, false)); + assertEquals(gt + ":234,16:" + test.getCoverage() + ":.:.:.:SOMATIC:2:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,c, false)); /* * add 9 more */ for (int i = 0 ; i < 9 ; i++) { - control.addFailedFilterBase((byte)'A'); + control.addFailedFilterBase((byte)'A', readNameHash++); } gt = "0/0"; - assertEquals(gt + ":356,6:"+control.getCoverage()+":.:A11:.:.:.:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,c, true)); + assertEquals(gt + ":356,6:"+control.getCoverage() + ":.:A11:.:.:.:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,c, true)); gt = "0/1"; - assertEquals(gt + ":234,16:"+test.getCoverage()+":.:.:.:SOMATIC:2:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,c, false)); + assertEquals(gt + ":234,16:"+test.getCoverage() + ":.:.:.:SOMATIC:2:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,c, false)); } @Test diff --git a/qsnp/test/org/qcmg/snp/util/PipelineUtilTest.java b/qsnp/test/org/qcmg/snp/util/PipelineUtilTest.java index de957f733..f28b30d13 100644 --- a/qsnp/test/org/qcmg/snp/util/PipelineUtilTest.java +++ b/qsnp/test/org/qcmg/snp/util/PipelineUtilTest.java @@ -1,16 +1,9 @@ package org.qcmg.snp.util; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; +import java.util.*; import org.junit.Assert; +import org.junit.Ignore; import org.junit.Test; import org.qcmg.common.model.Accumulator; import org.qcmg.common.model.ChrPointPosition; @@ -33,6 +26,8 @@ import gnu.trove.map.hash.TIntIntHashMap; import gnu.trove.map.hash.TLongIntHashMap; +import static org.junit.Assert.*; + public class PipelineUtilTest { public static final List cRules = Arrays.asList(new Rule(0,20,3), new Rule(21,50,4), new Rule(51,Integer.MAX_VALUE,10)); @@ -41,8 +36,8 @@ public class PipelineUtilTest { @Test public void getLoLoRecs() { - assertEquals(null, PipelineUtil.listOfListOfAdjacentVcfs(null)); - assertEquals(true, PipelineUtil.listOfListOfAdjacentVcfs(new ArrayList()).isEmpty()); + assertNull(PipelineUtil.listOfListOfAdjacentVcfs(null)); + assertTrue(PipelineUtil.listOfListOfAdjacentVcfs(new ArrayList<>()).isEmpty()); } @@ -111,8 +106,8 @@ public void getLoLoRecsDiffChr() { snps.add(VcfUtils.createVcfRecord("1", 100)); snps.add(VcfUtils.createVcfRecord("2", 100)); snps.add(VcfUtils.createVcfRecord("3", 100)); - - assertEquals(true, PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty()); + + assertTrue(PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty()); } @Test @@ -127,8 +122,8 @@ public void getLoLoRecsDiffClassification() { List snps = new ArrayList<>(); snps.add(v1); snps.add(v2); - - assertEquals(true, PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty()); + + assertTrue(PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty()); /* * both som */ @@ -147,7 +142,7 @@ public void getLoLoRecsDiffClassification() { * 1 germ 1 som */ v1.setInfo(VcfHeaderUtils.INFO_SOMATIC); - assertEquals(true, PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty()); + assertTrue(PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty()); } @Test @@ -196,8 +191,8 @@ public void getLoLoRecsSameChrLongWayAway() { snps.add(VcfUtils.createVcfRecord("1", 100)); snps.add(VcfUtils.createVcfRecord("1", 200)); snps.add(VcfUtils.createVcfRecord("1", 300)); - - assertEquals(true, PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty()); + + assertTrue(PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty()); } @Test public void getLoLoRecsSameChrShortWayAway() { @@ -207,8 +202,8 @@ public void getLoLoRecsSameChrShortWayAway() { snps.add(VcfUtils.createVcfRecord("1", 104)); snps.add(VcfUtils.createVcfRecord("1", 106)); snps.add(VcfUtils.createVcfRecord("1", 108)); - - assertEquals(true, PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty()); + + assertTrue(PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty()); } @Test @@ -219,7 +214,7 @@ public void getLoLoRecsSameChr() { snps.add(VcfUtils.createVcfRecord("1", 300)); assertEquals(1, PipelineUtil.listOfListOfAdjacentVcfs(snps).size()); - assertEquals(2, PipelineUtil.listOfListOfAdjacentVcfs(snps).get(0).size()); + assertEquals(2, PipelineUtil.listOfListOfAdjacentVcfs(snps).getFirst().size()); } @Test public void getLoLoRecsSameChr2() { @@ -232,7 +227,7 @@ public void getLoLoRecsSameChr2() { snps.add(VcfUtils.createVcfRecord("1", 300)); assertEquals(1, PipelineUtil.listOfListOfAdjacentVcfs(snps).size()); - assertEquals(5, PipelineUtil.listOfListOfAdjacentVcfs(snps).get(0).size()); + assertEquals(5, PipelineUtil.listOfListOfAdjacentVcfs(snps).getFirst().size()); } @Test @@ -246,7 +241,7 @@ public void getLoLoRecsSameChr3() { snps.add(VcfUtils.createVcfRecord("1", 105)); assertEquals(1, PipelineUtil.listOfListOfAdjacentVcfs(snps).size()); - assertEquals(6, PipelineUtil.listOfListOfAdjacentVcfs(snps).get(0).size()); + assertEquals(6, PipelineUtil.listOfListOfAdjacentVcfs(snps).getFirst().size()); } @Test @@ -260,7 +255,7 @@ public void getLoLoRecsSameChr4() { snps.add(VcfUtils.createVcfRecord("1", 106)); assertEquals(1, PipelineUtil.listOfListOfAdjacentVcfs(snps).size()); - assertEquals(4, PipelineUtil.listOfListOfAdjacentVcfs(snps).get(0).size()); + assertEquals(4, PipelineUtil.listOfListOfAdjacentVcfs(snps).getFirst().size()); } @Test @@ -311,17 +306,17 @@ public void getRef() { @Test public void doesStringContainLC() { - assertEquals(false, PipelineUtil.isStringLowerCase(null)); - assertEquals(false, PipelineUtil.isStringLowerCase("")); - assertEquals(false, PipelineUtil.isStringLowerCase(".")); - assertEquals(false, PipelineUtil.isStringLowerCase("_")); - assertEquals(false, PipelineUtil.isStringLowerCase("_A")); - assertEquals(false, PipelineUtil.isStringLowerCase("A_")); - assertEquals(false, PipelineUtil.isStringLowerCase("A_B")); - assertEquals(true, PipelineUtil.isStringLowerCase("a")); - assertEquals(true, PipelineUtil.isStringLowerCase("_a")); - assertEquals(true, PipelineUtil.isStringLowerCase("_a_")); - assertEquals(true, PipelineUtil.isStringLowerCase("__x")); + assertFalse(PipelineUtil.isStringLowerCase(null)); + assertFalse(PipelineUtil.isStringLowerCase("")); + assertFalse(PipelineUtil.isStringLowerCase(".")); + assertFalse(PipelineUtil.isStringLowerCase("_")); + assertFalse(PipelineUtil.isStringLowerCase("_A")); + assertFalse(PipelineUtil.isStringLowerCase("A_")); + assertFalse(PipelineUtil.isStringLowerCase("A_B")); + assertTrue(PipelineUtil.isStringLowerCase("a")); + assertTrue(PipelineUtil.isStringLowerCase("_a")); + assertTrue(PipelineUtil.isStringLowerCase("_a_")); + assertTrue(PipelineUtil.isStringLowerCase("__x")); } @Test @@ -330,7 +325,7 @@ public void getMR() { acc1.addBase((byte)'G', (byte) 1, true, 100, 150, 200, 1); acc1.addBase((byte)'G', (byte) 1, true, 100, 150, 200, 2); acc1.addBase((byte)'G', (byte) 1, false, 100, 150, 200, 3); - List accs = Arrays.asList(acc1); + List accs = List.of(acc1); Map basesAndCounts = PipelineUtil.getBasesFromAccumulators(accs); /* @@ -359,7 +354,7 @@ public void getCount() { acc1.addBase((byte)'G', (byte) 1, true, 1, 1, 2, 1); acc1.addBase((byte)'G', (byte) 1, true, 1, 1, 2, 2); acc1.addBase((byte)'G', (byte) 1, false, 1, 1, 2, 3); - List accs = Arrays.asList(acc1); + List accs = List.of(acc1); Map basesAndCounts = PipelineUtil.getBasesFromAccumulators(accs); assertEquals(3, PipelineUtil.getCount(basesAndCounts, "G", 0)); @@ -382,10 +377,10 @@ public void nonAdjacentAccums() { try { PipelineUtil.getBasesFromAccumulators(accs); Assert.fail("Should have thrown an IAE"); - } catch (IllegalArgumentException iae){}; - - assertEquals(true, PipelineUtil.getBasesFromAccumulators(null).isEmpty()); - assertEquals(true, PipelineUtil.getBasesFromAccumulators(new ArrayList<>()).isEmpty()); + } catch (IllegalArgumentException iae){} + + assertTrue(PipelineUtil.getBasesFromAccumulators(null).isEmpty()); + assertTrue(PipelineUtil.getBasesFromAccumulators(new ArrayList<>()).isEmpty()); } @Test @@ -405,7 +400,7 @@ public void getAltsAndGTsNullInputs() { assertEquals(Constants.MISSING_GT, altsGTs.get(1)); assertEquals(Constants.MISSING_GT, altsGTs.get(2)); - List control = Arrays.asList("ABC"); + List control = List.of("ABC"); altsGTs = PipelineUtil.getAltStringAndGenotypes(control, null, "XYZ"); assertEquals(3, altsGTs.size()); @@ -413,7 +408,7 @@ public void getAltsAndGTsNullInputs() { assertEquals("1/1", altsGTs.get(1)); assertEquals(Constants.MISSING_GT, altsGTs.get(2)); - List test = Arrays.asList("123"); + List test = List.of("123"); altsGTs = PipelineUtil.getAltStringAndGenotypes(null, test, "XYZ"); assertEquals(3, altsGTs.size()); @@ -424,8 +419,8 @@ public void getAltsAndGTsNullInputs() { @Test public void getAltsAndGTs() { - List control = Arrays.asList("ABC"); - List test = Arrays.asList("ABC"); + List control = List.of("ABC"); + List test = List.of("ABC"); List altsGTs = PipelineUtil.getAltStringAndGenotypes(control, test, "XYZ"); assertEquals(3, altsGTs.size()); @@ -433,16 +428,16 @@ public void getAltsAndGTs() { assertEquals("1/1", altsGTs.get(1)); assertEquals("1/1", altsGTs.get(2)); - control = Arrays.asList("XYZ"); - test = Arrays.asList("ABC"); + control = List.of("XYZ"); + test = List.of("ABC"); altsGTs = PipelineUtil.getAltStringAndGenotypes(control, test, "XYZ"); assertEquals(3, altsGTs.size()); assertEquals("ABC", altsGTs.get(0)); assertEquals("0/0", altsGTs.get(1)); assertEquals("1/1", altsGTs.get(2)); - control = Arrays.asList("XYZ"); - test = Arrays.asList("XYZ"); + control = List.of("XYZ"); + test = List.of("XYZ"); altsGTs = PipelineUtil.getAltStringAndGenotypes(control, test, "XYZ"); assertEquals(3, altsGTs.size()); assertEquals(Constants.MISSING_DATA_STRING, altsGTs.get(0)); @@ -496,8 +491,8 @@ public void csAltsCantContainRef() { * The following cs is not a cs! * chr1 985449 . GG AG . . . GT:DP:MR:OABS 1/1:12:9:AG1[]8[];GA2[]0[];_G1[]0[] 1/1:10:6:AG3[]3[];GA2[]1[];_G1[]0[] */ - List control = Arrays.asList("AG"); - List test = Arrays.asList("AG"); + List control = List.of("AG"); + List test = List.of("AG"); List altsGTs = PipelineUtil.getAltStringAndGenotypes(control, test, "GG"); assertEquals(3, altsGTs.size()); @@ -508,15 +503,15 @@ public void csAltsCantContainRef() { @Test public void getEmptyOABS() { - assertEquals(false, PipelineUtil.getOABS(null).isPresent()); + assertFalse(PipelineUtil.getOABS(null).isPresent()); Map basesAndCounts = new HashMap<>(); - assertEquals(false, PipelineUtil.getOABS(basesAndCounts).isPresent()); + assertFalse(PipelineUtil.getOABS(basesAndCounts).isPresent()); basesAndCounts.put("XYZ", new short[]{}); - assertEquals(false, PipelineUtil.getOABS(basesAndCounts).isPresent()); + assertFalse(PipelineUtil.getOABS(basesAndCounts).isPresent()); basesAndCounts.put("XYZ", new short[4]); - assertEquals(true, PipelineUtil.getOABS(basesAndCounts).isPresent()); + assertTrue(PipelineUtil.getOABS(basesAndCounts).isPresent()); assertEquals("XYZ0[]0[]", PipelineUtil.getOABS(basesAndCounts).get()); basesAndCounts.get("XYZ")[0] = 1; @@ -535,14 +530,14 @@ public void getOABS() { } @Test - public void getBasesForGentype() { + public void getBasesForGenotype() { Map basesAndCounts = new HashMap<>(); basesAndCounts.put("XYZ", new short[]{10,3,11,10}); assertEquals(1, PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").size()); - assertEquals("XYZ", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").get(0)); + assertEquals("XYZ", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").getFirst()); basesAndCounts.put("ABC", new short[]{5,2,11,10}); assertEquals(1, PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").size()); - assertEquals("XYZ", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").get(0)); + assertEquals("XYZ", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").getFirst()); assertEquals(2, PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"DDD").size()); assertEquals("XYZ", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"DDD").get(0)); assertEquals("ABC", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"DDD").get(1)); @@ -551,6 +546,16 @@ public void getBasesForGentype() { assertEquals("XYZ", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").get(0)); assertEquals("HBH", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").get(1)); } + + @Test + public void getBasesForGenotype2() { + Map basesAndCounts = new HashMap<>(); + basesAndCounts.put("CG", new short[]{11,11,0,0}); + basesAndCounts.put("TG", new short[]{1,1,0,0}); + basesAndCounts.put("CA", new short[]{6,6,0,0}); + assertEquals(1, PipelineUtil.getBasesForGenotype(basesAndCounts, 3,"TG").size()); + assertEquals("CA", PipelineUtil.getBasesForGenotype(basesAndCounts, 3,"TG").getFirst()); + } @Test public void getBasesFromAccs() { @@ -618,12 +623,13 @@ public void createCSSinglePos() { map.put(origV, new Pair<>(controlAcc100, testAcc100)); VcfRecord v = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).orElse(null); - assertEquals(origV.getChrPosition(), v.getChrPosition()); + assert v != null; + assertEquals(origV.getChrPosition(), v.getChrPosition()); assertEquals("C", v.getAlt()); assertEquals("A", v.getRef()); - assertEquals("GT:AD:DP:FT:INF:NNS:OABS", v.getFormatFields().get(0)); - assertEquals("0/0:3,0:3:.:.:.:A2[]1[]", v.getFormatFields().get(1)); - assertEquals("1/1:0,3:3:.:SOMATIC:2:C2[]1[]", v.getFormatFields().get(2)); + assertEquals("GT:AD:DP:FF:FT:INF:NNS:OABS", v.getFormatFields().get(0)); + assertEquals("0/0:3,0:3:.:.:.:.:A2[]1[]", v.getFormatFields().get(1)); + assertEquals("1/1:0,3:3:.:.:SOMATIC:2:C2[]1[]", v.getFormatFields().get(2)); } @Test @@ -656,12 +662,13 @@ public void createCS2Pos() { map.put(origV2, new Pair<>(controlAcc101, testAcc101)); VcfRecord v = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).orElse(null); - assertEquals(2, v.getChrPosition().getLength()); + assert v != null; + assertEquals(2, v.getChrPosition().getLength()); assertEquals("GT", v.getAlt()); assertEquals("AC", v.getRef()); - assertEquals("GT:AD:DP:FT:INF:NNS:OABS", v.getFormatFields().get(0)); - assertEquals("0/0:3,0:3:.:.:.:AC2[]1[]", v.getFormatFields().get(1)); - assertEquals("1/1:0,3:3:.:SOMATIC:2:GT2[]1[]", v.getFormatFields().get(2)); + assertEquals("GT:AD:DP:FF:FT:INF:NNS:OABS", v.getFormatFields().get(0)); + assertEquals("0/0:3,0:3:.:.:.:.:AC2[]1[]", v.getFormatFields().get(1)); + assertEquals("1/1:0,3:3:.:.:SOMATIC:2:GT2[]1[]", v.getFormatFields().get(2)); /* * add some noise @@ -671,19 +678,20 @@ public void createCS2Pos() { controlAcc100.addBase((byte)'T', (byte) 1, false, 100, 100, 200, 6); v = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).orElse(null); - assertEquals(2, v.getChrPosition().getLength()); + assert v != null; + assertEquals(2, v.getChrPosition().getLength()); assertEquals("GT", v.getAlt()); // assertEquals("T_,GT", v.getAlt()); assertEquals("AC", v.getRef()); - assertEquals("GT:AD:DP:FT:INF:NNS:OABS", v.getFormatFields().get(0)); - assertEquals("0/0:3,0:3:.:.:.:AC2[]1[];T_0[]3[]", v.getFormatFields().get(1)); - assertEquals("1/1:0,3:3:.:SOMATIC:2:GT2[]1[]", v.getFormatFields().get(2)); + assertEquals("GT:AD:DP:FF:FT:INF:NNS:OABS", v.getFormatFields().get(0)); + assertEquals("0/0:3,0:3:.:.:.:.:AC2[]1[];T_0[]3[]", v.getFormatFields().get(1)); + assertEquals("1/1:0,3:3:.:.:SOMATIC:2:GT2[]1[]", v.getFormatFields().get(2)); } @Test public void containsRef() { /* - * chr1 16862501 . TA CG . PASS . ACCS CG,28,7,C_,1,0,TA,48,15,TG,65,38,_G,1,2,_A,0,1 CG,22,4,TA,37,14,TG,52,18,_G,1,1,C_,0,1 + * chr1 16862501 . TA CG . PASS . ACCS CG,28,7,C_,1,0,TA,48,15,TG,65,38,_G,1,2,_A,0,1 CG,22,4,TA,37,14,TG,52,18,_G,1,1,C_,0,1L); * * Largest 2 genotypes are TA (ref) and TG, but TG won't make it as it contains the ref! */ @@ -726,27 +734,25 @@ public void underscoresInAlt() { basesAndCounts.put("__G", new short[]{5,1,0,0}); basesForGenotype = PipelineUtil.getBasesForGenotype(basesAndCounts, 4,"NNN"); assertEquals(1, basesForGenotype.size()); - assertEquals("TAG", basesForGenotype.get(0)); + assertEquals("TAG", basesForGenotype.getFirst()); } @Test - public void compoundSnp() throws Exception { + public void compoundSnp() { VcfRecord v1 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 100),null,"A","C"); v1.setInfo(VcfHeaderUtils.INFO_SOMATIC); VcfRecord v2 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 101),null,"C","G"); v2.setInfo(VcfHeaderUtils.INFO_SOMATIC); - final Accumulator tumour100 = new Accumulator(100); tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 1); final Accumulator tumour101 = new Accumulator(101); tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 1); - Map> map = new HashMap<>(); map.put(v1, new Pair<>(null, tumour100)); map.put(v2, new Pair<>(null, tumour101)); - assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent()); + assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent()); // need 4 reads with the cs to register tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 2); @@ -755,17 +761,17 @@ public void compoundSnp() throws Exception { tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 3); tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 4); tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 4); - Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); - assertEquals(true, ov.isPresent()); + Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); + assertTrue(ov.isPresent()); VcfRecord v = ov.get(); List ff = v.getFormatFields(); - assertEquals(true, ff.get(2).contains("CG4[]0[]")); // tumour - assertEquals("./.:.:0:.:.:.:.", ff.get(1)); // control + assertTrue(ff.get(2).contains("CG4[]0[]")); // tumour + assertEquals("./.:.:0:.:.:.:.:.", ff.get(1)); // control } @Test - public void compoundSnpOneGermlineOneSomatic() throws Exception { + public void compoundSnpOneGermlineOneSomatic() { VcfRecord v1 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 100),null,"A","C"); VcfRecord v2 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 101),null,"C","G"); v2.setInfo(VcfHeaderUtils.INFO_SOMATIC); @@ -780,7 +786,7 @@ public void compoundSnpOneGermlineOneSomatic() throws Exception { Map> map = new HashMap<>(); map.put(v1, new Pair<>(null, tumour100)); map.put(v2, new Pair<>(null, tumour101)); - assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent()); + assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent()); // need 4 reads with the cs to register tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 2); @@ -789,17 +795,17 @@ public void compoundSnpOneGermlineOneSomatic() throws Exception { tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 3); tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 4); tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 4); - Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); - assertEquals(true, ov.isPresent()); + Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); + assertTrue(ov.isPresent()); VcfRecord v = ov.get(); List ff = v.getFormatFields(); - assertEquals(true, ff.get(2).contains("CG4[]0[]")); // tumour - assertEquals("./.:.:0:.:.:.:.", ff.get(1)); // control + assertTrue(ff.get(2).contains("CG4[]0[]")); // tumour + assertEquals("./.:.:0:.:.:.:.:.", ff.get(1)); // control } @Test - public void noCompoundSnpMissingAccs() throws Exception { + public void noCompoundSnpMissingAccs() { /* * chr4 8046419 . G T . . BaseQRankSum=0.694;ClippingRankSum=1.157;DP=24;FS=5.815;MQ=60.00;MQRankSum=-0.602;QD=15.87;ReadPosRankSum=0.787;SOR=2.258 GT:AD:DP:GQ:FT:INF:MR:NNS:OABS 0/1:5,14:19:99:SAN3:.:.:.:. 0/1:8,32:40:99:SBIASCOV;5BP=1;SAT3:.:1:1:T1[22]0[0] chr4 8046420 . A C . . BaseQRankSum=1.528;ClippingRankSum=0.787;DP=24;FS=5.815;MQ=60.00;MQRankSum=-1.713;QD=15.87;ReadPosRankSum=0.787;SOR=2.258 GT:AD:DP:GQ:FT:INF:MR:NNS:OABS 0/1:5,14:19:99:SAN3:.:.:.:. 0/1:10,28:38:99:SAT3:.:.:.:. @@ -826,13 +832,13 @@ public void noCompoundSnpMissingAccs() throws Exception { } @Test - public void noCompoundSnpRefInAlt() throws Exception { + public void noCompoundSnpRefInAlt() { /* * Don't want this happening * chr1 985449 . GG AG . . . GT:DP:MR:OABS 1/1:12:9:AG1[]8[];GA2[]0[];_G1[]0[] 1/1:10:6:AG3[]3[];GA2[]1[];_G1[]0[] * which was made up from - * chr1 985449 rs56255212 G A 421.77 PASS AC=2;AF=1.00;AN=2;DB;DP=10;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=31.82;SOR=2.303 GT:AD:DP:GQ:PL:GD:AC:OABS:MR:NNS 1/1:0,10:10:30:450,30,0:A/A:A1[37],8[35.12],G2[38],0[0]:.:9:9 0/1:3,6:9:99:208,0,138:A/G:A3[34],3[38.67],G2[37],1[35]:.:6:5 - * chr1 985450 . G A 67.77 MIN;MR;NNS SOMATIC GT:AD:DP:GQ:PL:GD:AC:OABS:MR:NNS .:.:.:.:.:G/G:A2[37],0[0],G2[37],8[36.5]:.:2:2 0/1:7,3:10:96:96,0,236:A/G:A2[35],1[35],G4[36.5],3[39]:.:3:3 + * chr1 985449 rs56255212 G A 421.77 PASS AC=2;AF=1.00;AN=2;DB;DP=10;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=31.82;SOR=2.303 GT:AD:DP:GQ:PL:GD:AC:OABS:MR:NNS 1/1:0,10:10:30:450,30,0:A/A:A1[37],8[35.12],G2[38],0[0]:.:9:9 0/1:3,6:9:99:208,0,138:A/G:A3[34],3[38.67],G2[37],1[35]:.:6:5L); + * chr1 985450 . G A 67.77 MIN;MR;NNS SOMATIC GT:AD:DP:GQ:PL:GD:AC:OABS:MR:NNS .:.:.:.:.:G/G:A2[37],0[0],G2[37],8[36.5]:.:2:2 0/1:7,3:10:96:96,0,236:A/G:A2[35],1[35],G4[36.5],3[39]:.:3:3L); */ VcfRecord v1 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 100),null,"G","A"); v1.setInfo(VcfHeaderUtils.INFO_SOMATIC); @@ -903,12 +909,12 @@ public void noCompoundSnpRefInAlt() throws Exception { Map> map = new HashMap<>(); map.put(v1, new Pair<>(control100, tumour100)); map.put(v2, new Pair<>(control101, tumour101)); - assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent()); + assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent()); } @Test - public void compoundSnpReverseStrand() throws Exception { + public void compoundSnpReverseStrand() { VcfRecord v1 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 100),null,"A","C"); v1.setInfo(VcfHeaderUtils.INFO_SOMATIC); VcfRecord v2 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 101),null,"C","G"); @@ -922,7 +928,7 @@ public void compoundSnpReverseStrand() throws Exception { Map> map = new HashMap<>(); map.put(v1, new Pair<>(null, tumour100)); map.put(v2, new Pair<>(null, tumour101)); - assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent()); + assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent()); // need 4 reads with the cs to register tumour100.addBase((byte)'C', (byte)30, false, 100, 100, 200, 2); @@ -931,17 +937,17 @@ public void compoundSnpReverseStrand() throws Exception { tumour101.addBase((byte)'G', (byte)30, false, 101, 101, 200, 3); tumour100.addBase((byte)'C', (byte)30, false, 100, 100, 200, 4); tumour101.addBase((byte)'G', (byte)30, false, 101, 101, 200, 4); - Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); - assertEquals(true, ov.isPresent()); + Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); + assertTrue(ov.isPresent()); VcfRecord v = ov.get(); List ff = v.getFormatFields(); - assertEquals(true, ff.get(2).contains("CG0[]4[]")); // tumour - assertEquals("./.:.:0:.:.:.:.", ff.get(1)); // control + assertTrue(ff.get(2).contains("CG0[]4[]")); // tumour + assertEquals("./.:.:0:.:.:.:.:.", ff.get(1)); // control } @Test - public void compoundSnpBothStrands() throws Exception { + public void compoundSnpBothStrands() { VcfRecord v1 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 100),null,"A","C"); v1.setInfo(VcfHeaderUtils.INFO_SOMATIC); VcfRecord v2 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 101),null,"C","G"); @@ -955,7 +961,7 @@ public void compoundSnpBothStrands() throws Exception { Map> map = new HashMap<>(); map.put(v1, new Pair<>(null, tumour100)); map.put(v2, new Pair<>(null, tumour101)); - assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent()); + assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent()); // need 4 reads with the cs to register tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 2); @@ -965,12 +971,12 @@ public void compoundSnpBothStrands() throws Exception { tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 4); tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 4); - Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); - assertEquals(true, ov.isPresent()); + Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); + assertTrue(ov.isPresent()); VcfRecord v = ov.get(); List ff = v.getFormatFields(); - assertEquals(true, ff.get(2).contains("CG2[]2[]")); // tumour - assertEquals("./.:.:0:.:.:.:.", ff.get(1)); // control + assertTrue(ff.get(2).contains("CG2[]2[]")); // tumour + assertEquals("./.:.:0:.:.:.:.:.", ff.get(1)); // control } @Test @@ -988,7 +994,7 @@ public void compoundSnpWithOverlappingReads() { Map> map = new HashMap<>(); map.put(v1, new Pair<>(null, tumour100)); map.put(v2, new Pair<>(null, tumour101)); - assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent()); + assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent()); // need 4 reads with the cs to register tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 2); @@ -999,12 +1005,12 @@ public void compoundSnpWithOverlappingReads() { tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 4); tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 5); - Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); - assertEquals(true, ov.isPresent()); + Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); + assertTrue(ov.isPresent()); VcfRecord v = ov.get(); List ff = v.getFormatFields(); - assertEquals("./.:.:0:.:.:.:.", ff.get(1)); // control - assertEquals(true, ff.get(2).contains("CG4[]0[];_G1[]0[]")); // tumour + assertEquals("./.:.:0:.:.:.:.:.", ff.get(1)); // control + assertTrue(ff.get(2).contains("CG4[]0[];_G1[]0[]")); // tumour } @Test @@ -1022,7 +1028,7 @@ public void compoundSnpWithOverlappingReadsOtherEnd() { Map> map = new HashMap<>(); map.put(v1, new Pair<>(null, tumour100)); map.put(v2, new Pair<>(null, tumour101)); - assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent()); + assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent()); // need 4 reads with the cs to register tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 2); @@ -1033,17 +1039,17 @@ public void compoundSnpWithOverlappingReadsOtherEnd() { tumour101.addBase((byte)'G', (byte)30, true, 100, 101, 200, 4); tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 5); - Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); - assertEquals(true, ov.isPresent()); + Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); + assertTrue(ov.isPresent()); VcfRecord v = ov.get(); assertEquals("AC", v.getRef()); assertEquals("CG", v.getAlt()); List ff = v.getFormatFields(); - assertEquals("./.:.:0:.:.:.:.", ff.get(1)); // control + assertEquals("./.:.:0:.:.:.:.:.", ff.get(1)); // control /* * filters are now applied in qannotate */ - assertEquals("1/1:0,4:4:.:SOMATIC:1:CG4[]0[];C_1[]0[]", ff.get(2)); // tumour + assertEquals("1/1:0,4:4:.:.:SOMATIC:1:CG4[]0[];C_1[]0[]", ff.get(2)); // tumour } @Test @@ -1082,13 +1088,13 @@ public void csRealLife() { map.put(v1, new Pair<>(null, tumour100)); map.put(v2, new Pair<>(null, tumour101)); - Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); - assertEquals(true, ov.isPresent()); + Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); + assertTrue(ov.isPresent()); VcfRecord v = ov.get(); assertEquals("AA", v.getAlt()); assertEquals("GG", v.getRef()); List ff = v.getFormatFields(); - assertEquals("1/1:0,65:67:.:SOMATIC:65:AA34[]31[];AC1[]0[];CA1[]0[]", ff.get(2)); // tumour + assertEquals("1/1:0,65:67:.:.:SOMATIC:65:AA34[]31[];AC1[]0[];CA1[]0[]", ff.get(2)); // tumour } @Test @@ -1123,8 +1129,8 @@ public void csRealLife2() { map.put(v1, new Pair<>(null, tumour100)); map.put(v2, new Pair<>(null, tumour101)); - Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); - assertEquals(true, ov.isPresent()); + Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); + assertTrue(ov.isPresent()); VcfRecord v = ov.get(); assertEquals("CA", v.getAlt()); assertEquals("TG", v.getRef()); @@ -1138,7 +1144,7 @@ public void csGATK() { VcfRecord v2 = new VcfRecord(new String[]{"chr1","39592385",".","G","A",".",".","BaseQRankSum=0.767;ClippingRankSum=-0.266;DP=46;FS=14.005;MQ=60.00;MQRankSum=-0.423;QD=31.52;ReadPosRankSum=-0.611;SOR=0.028;IN=2;HOM=0,CTTGAGCTTGaGAGGCAGAGA;","GT:AD:CCC:CCM:DP:FT:GQ:INF:QL","./.:.:Reference:13:.:PASS:.:NCIG:.","0/1:7,38:Somatic:13:45:PASS:99:SOMATIC:1449.77"}); List vcfs = Arrays.asList(v1,v2); Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs); - assertEquals(true, oVcf.isPresent()); + assertTrue(oVcf.isPresent()); VcfRecord v = oVcf.get(); assertEquals("GG", v.getRef()); assertEquals("TA", v.getAlt()); @@ -1169,7 +1175,7 @@ public void csGATK2() { VcfRecord v2 = new VcfRecord(new String[]{"chr1","40615302",".","C","T",".",".","BaseQRankSum=-0.104;ClippingRankSum=0.439;DP=69;FS=2.380;MQ=60.00;MQRankSum=-0.369;QD=30.87;ReadPosRankSum=-0.717;SOR=0.524;IN=2;HOM=0,ACCTGTAATCtCAGCTACTCG;EFF=intergenic_region(MODIFIER||||||||||1)","GT:AD:CCC:CCM:DP:FT:GQ:INF:QL","./.:.:Reference:13:.:PASS:.:NCIG:.","0/1:17,52:Somatic:13:69:PASS:99:SOMATIC:2129.77"}); List vcfs = Arrays.asList(v1,v2); Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs); - assertEquals(true, oVcf.isPresent()); + assertTrue(oVcf.isPresent()); VcfRecord v = oVcf.get(); assertEquals("CC", v.getRef()); assertEquals("AT", v.getAlt()); @@ -1200,7 +1206,7 @@ public void csGATK3() { VcfRecord v2 = new VcfRecord(new String[]{"chr1","47083666",".","A","T",".",".","BaseQRankSum=0.203;ClippingRankSum=0.452;DP=69;FS=1.235;MQ=60.00;MQRankSum=-0.717;QD=33.21;ReadPosRankSum=-0.733;SOR=0.436;IN=2;HOM=2,GAATACATAGtTACTAGGAGG","GT:AD:CCC:CCM:DP:FT:GQ:INF:QL","./.:.:Reference:13:.:PASS:.:NCIG:.","0/1:13,55:Somatic:13:68:PASS:99:SOMATIC:2291.77"}); List vcfs = Arrays.asList(v1,v2); Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs); - assertEquals(true, oVcf.isPresent()); + assertTrue(oVcf.isPresent()); VcfRecord v = oVcf.get(); assertEquals("GA", v.getRef()); assertEquals("TT", v.getAlt()); @@ -1231,7 +1237,7 @@ public void csGATK4() { VcfRecord v2 = new VcfRecord(new String[]{"chr1","169423270",".","G","A",".",".","DP=121;FS=0.000;MQ=60.00;QD=29.54;SOR=1.096;IN=2;HOM=2,CCTTCTTCAGaACCAAATAGA","GT:AD:CCC:CCM:DP:FT:GQ:INF:QL","./.:.:Reference:14:.:PASS:.:NCIG:.","1/1:0,121:SomaticNoReference:14:121:PASS:99:SOMATIC:5348.77"}); List vcfs = Arrays.asList(v1,v2); Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs); - assertEquals(true, oVcf.isPresent()); + assertTrue(oVcf.isPresent()); VcfRecord v = oVcf.get(); assertEquals("GG", v.getRef()); assertEquals("AA", v.getAlt()); @@ -1262,7 +1268,7 @@ public void csGATK5() { VcfRecord v2 = new VcfRecord(new String[]{"chr2","65487955",".","C","T",".",".","BaseQRankSum=2.173;ClippingRankSum=-0.100;DP=65;FS=2.303;MQ=59.61;MQRankSum=-0.186;QD=10.60;ReadPosRankSum=1.101;SOR=0.364;IN=2;HOM=0,GCTCTGCCTCtCGGGTTCACG","GT:AD:CCC:CCM:DP:FT:GQ:INF:QL","./.:.:Reference:13:.:PASS:.:NCIG:.","0/1:43,21:Somatic:13:64:PASS:99:SOMATIC:688.77"}); List vcfs = Arrays.asList(v1,v2); Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs); - assertEquals(true, oVcf.isPresent()); + assertTrue(oVcf.isPresent()); VcfRecord v = oVcf.get(); assertEquals("CC", v.getRef()); assertEquals("TT", v.getAlt()); @@ -1293,7 +1299,7 @@ public void csGATK6() { VcfRecord v2 = new VcfRecord(new String[]{"chr6","32495872",".","G","T","107.28",".","AC=2;AF=1.00;AN=2;DP=0;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=0.00;SOR=0.693","GT:AD:DP:GQ:INF:QL","./.:.:.:.:NCIG:.","1/1:.:.:9:.:."}); List vcfs = Arrays.asList(v1,v2); Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs); - assertEquals(true, oVcf.isPresent()); + assertTrue(oVcf.isPresent()); VcfRecord v = oVcf.get(); assertEquals("CG", v.getRef()); assertEquals("TT", v.getAlt()); @@ -1324,7 +1330,7 @@ public void csGATK6SingleSample() { VcfRecord v2 = new VcfRecord(new String[]{"chr6","32495872",".","G","T","107.28",".","AC=2;AF=1.00;AN=2;DP=0;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=0.00;SOR=0.693","GT:AD:DP:GQ:INF:QL","1/1:.:.:9:.:."}); List vcfs = Arrays.asList(v1,v2); Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs, true); - assertEquals(true, oVcf.isPresent()); + assertTrue(oVcf.isPresent()); VcfRecord v = oVcf.get(); assertEquals("CG", v.getRef()); assertEquals("TT", v.getAlt()); @@ -1349,7 +1355,7 @@ public void csGATK7() { VcfRecord v2 = new VcfRecord(new String[]{"chrY","13487854",".","C","A,T","107.28",".","AC=2;AF=1.00;AN=2;DP=0;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=0.00;SOR=0.693","GT:AD:DP:GQ:INF:QL","0/1:.:.:.:NCIG:.","0/2:.:.:9:.:."}); List vcfs = Arrays.asList(v1,v2); Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs); - assertEquals(false, oVcf.isPresent()); + assertFalse(oVcf.isPresent()); } @Test @@ -1358,7 +1364,7 @@ public void csGATKSingleSample() { VcfRecord v2 = new VcfRecord(new String[]{"chrY","13487854",".","C","A,T","107.28",".","AC=2;AF=1.00;AN=2;DP=0;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=0.00;SOR=0.693","GT:AD:DP:GQ:INF:QL","0/2:.:.:9:.:."}); List vcfs = Arrays.asList(v1,v2); Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs, true); - assertEquals(false, oVcf.isPresent()); + assertFalse(oVcf.isPresent()); } @Test @@ -1367,65 +1373,712 @@ public void csGATKOneSomaticOneGermline() { VcfRecord v2 = new VcfRecord(new String[]{"chr1","39592385",".","G","A",".",".","BaseQRankSum=0.767;ClippingRankSum=-0.266;DP=46;FS=14.005;MQ=60.00;MQRankSum=-0.423;QD=31.52;ReadPosRankSum=-0.611;SOR=0.028;IN=2;HOM=0,CTTGAGCTTGaGAGGCAGAGA;","GT:AD:CCC:CCM:DP:FT:GQ:INF:QL","0/0:.:Reference:13:.:PASS:.:NCIG:.","0/1:7,38:Somatic:13:45:PASS:99:SOMATIC:1449.77"}); List vcfs = Arrays.asList(v1,v2); Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs); - assertEquals(false, oVcf.isPresent()); + assertFalse(oVcf.isPresent()); + } + + @Test + public void testGetFailedFilterCS_NullAccumulators() { + // When input is null + String result = PipelineUtil.getFailedFilterCS(null); + + // Expecting MISSING_DATA_STRING + Assert.assertEquals(Constants.MISSING_DATA_STRING, result); + } + + @Test + public void testGetFailedFilterCS_EmptyAccumulators() { + // When input is an empty list + List accumulators = Collections.emptyList(); + String result = PipelineUtil.getFailedFilterCS(accumulators); + + // Expecting empty output + Assert.assertEquals(".", result); + } + + @Test + public void testGetFailedFilterCS_SingleAccumulator() { + // Mocking a single Accumulator + Accumulator accumulator = new Accumulator(12345); + for (int i = 1 ; i < 4 ; i++) { + accumulator.addFailedFilterBase((byte) 'A', i); + } + for (int i = 4 ; i < 6 ; i++) { + accumulator.addFailedFilterBase((byte) 'C', i); + } + for (int i = 6 ; i < 7 ; i++) { + accumulator.addFailedFilterBase((byte) 'G', i); + } + for (int i = 7 ; i < 8 ; i++) { + accumulator.addFailedFilterBase((byte) 'T', i); + } + assertEquals("A3;C2;G1;T1", PipelineUtil.getFailedFilterCS(Collections.singletonList(accumulator))); + } + + @Test + public void testGetFailedFilterCS_MultipleAccumulators() { + // Mocking two Accumulators + Accumulator acc1 = new Accumulator(12345); + for (int i = 1 ; i < 4 ; i++) { + acc1.addFailedFilterBase((byte) 'A', i); + } + for (int i = 4 ; i < 6 ; i++) { + acc1.addFailedFilterBase((byte) 'C', i); + } + Accumulator acc2 = new Accumulator(12346); + for (int i = 1 ; i < 4 ; i++) { + acc2.addFailedFilterBase((byte) 'G', i); + } + for (int i = 4 ; i < 6 ; i++) { + acc2.addFailedFilterBase((byte) 'T', i); + } + List accumulators = Arrays.asList(acc1, acc2); + assertEquals("AG3;CT2", PipelineUtil.getFailedFilterCS(accumulators)); + + for (int i = 6 ; i < 10 ; i++) { + acc1.addFailedFilterBase((byte) 'T', i); + } + for (int i = 10 ; i < 11 ; i++) { + acc2 .addFailedFilterBase((byte) 'T', i); + } + assertEquals("AG3;CT2;T_4;_T1", PipelineUtil.getFailedFilterCS(accumulators)); + + } + + @Test + public void testGetFailedFilterCS_Triple() { + // Mocking multiple Accumulators + Accumulator acc1 = new Accumulator(12345); + acc1.addFailedFilterBase((byte) 'A', 1); + Accumulator acc2 = new Accumulator(12346); + acc2.addFailedFilterBase((byte) 'C', 1); + Accumulator acc3 = new Accumulator(12347); + acc3.addFailedFilterBase((byte) 'G', 1); + + List accumulators = Arrays.asList(acc1, acc2, acc3); + assertEquals("ACG1", PipelineUtil.getFailedFilterCS(accumulators)); + + acc1.addFailedFilterBase((byte) 'A', 2); + assertEquals("ACG1;A__1", PipelineUtil.getFailedFilterCS(accumulators)); + acc2.addFailedFilterBase((byte) 'C', 3); + assertEquals("ACG1;A__1;_C_1", PipelineUtil.getFailedFilterCS(accumulators)); + acc3.addFailedFilterBase((byte) 'G', 4); + assertEquals("ACG1;A__1;_C_1;__G1", PipelineUtil.getFailedFilterCS(accumulators)); + acc3.addFailedFilterBase((byte) 'G', 2); + assertEquals("ACG1;A_G1;_C_1;__G1", PipelineUtil.getFailedFilterCS(accumulators)); + } + + @Test + public void testGetFailedFilterCS_RealLife() { + // Mocking two Accumulators + Accumulator acc1 = new Accumulator(12345); + + acc1.addFailedFilterBase((byte) 'C', 391785L); + acc1.addFailedFilterBase((byte) 'C', 391815L); + acc1.addFailedFilterBase((byte) 'C', 391817L); + acc1.addFailedFilterBase((byte) 'C', 391832L); + acc1.addFailedFilterBase((byte) 'C', 391843L); + acc1.addFailedFilterBase((byte) 'C', 391867L); + acc1.addFailedFilterBase((byte) 'T', 391748L); + acc1.addFailedFilterBase((byte) 'T', 391752L); + acc1.addFailedFilterBase((byte) 'T', 391754L); + acc1.addFailedFilterBase((byte) 'T', 391755L); + acc1.addFailedFilterBase((byte) 'T', 391756L); + acc1.addFailedFilterBase((byte) 'T', 391757L); + acc1.addFailedFilterBase((byte) 'T', 391760L); + acc1.addFailedFilterBase((byte) 'T', 391761L); + acc1.addFailedFilterBase((byte) 'T', 391762L); + acc1.addFailedFilterBase((byte) 'T', 391763L); + acc1.addFailedFilterBase((byte) 'T', 391765L); + acc1.addFailedFilterBase((byte) 'T', 391766L); + acc1.addFailedFilterBase((byte) 'T', 391768L); + acc1.addFailedFilterBase((byte) 'T', 391769L); + acc1.addFailedFilterBase((byte) 'T', 391770L); + acc1.addFailedFilterBase((byte) 'T', 391771L); + acc1.addFailedFilterBase((byte) 'T', 391773L); + acc1.addFailedFilterBase((byte) 'T', 391776L); + acc1.addFailedFilterBase((byte) 'T', 391777L); + acc1.addFailedFilterBase((byte) 'T', 391778L); + acc1.addFailedFilterBase((byte) 'T', 391779L); + acc1.addFailedFilterBase((byte) 'T', 391780L); + acc1.addFailedFilterBase((byte) 'T', 391782L); + acc1.addFailedFilterBase((byte) 'T', 391783L); + acc1.addFailedFilterBase((byte) 'T', 391787L); + acc1.addFailedFilterBase((byte) 'T', 391788L); + acc1.addFailedFilterBase((byte) 'T', 391789L); + acc1.addFailedFilterBase((byte) 'T', 391790L); + acc1.addFailedFilterBase((byte) 'T', 391791L); + acc1.addFailedFilterBase((byte) 'T', 391792L); + acc1.addFailedFilterBase((byte) 'T', 391795L); + acc1.addFailedFilterBase((byte) 'T', 391796L); + acc1.addFailedFilterBase((byte) 'T', 391797L); + acc1.addFailedFilterBase((byte) 'T', 391799L); + acc1.addFailedFilterBase((byte) 'T', 391800L); + acc1.addFailedFilterBase((byte) 'T', 391802L); + acc1.addFailedFilterBase((byte) 'T', 391803L); + acc1.addFailedFilterBase((byte) 'T', 391806L); + acc1.addFailedFilterBase((byte) 'T', 391809L); + acc1.addFailedFilterBase((byte) 'T', 391810L); + acc1.addFailedFilterBase((byte) 'T', 391811L); + acc1.addFailedFilterBase((byte) 'T', 391812L); + acc1.addFailedFilterBase((byte) 'T', 391813L); + acc1.addFailedFilterBase((byte) 'T', 391814L); + acc1.addFailedFilterBase((byte) 'T', 391816L); + acc1.addFailedFilterBase((byte) 'T', 391818L); + acc1.addFailedFilterBase((byte) 'T', 391819L); + acc1.addFailedFilterBase((byte) 'T', 391821L); + acc1.addFailedFilterBase((byte) 'T', 391822L); + acc1.addFailedFilterBase((byte) 'T', 391823L); + acc1.addFailedFilterBase((byte) 'T', 391826L); + acc1.addFailedFilterBase((byte) 'T', 391827L); + acc1.addFailedFilterBase((byte) 'T', 391828L); + acc1.addFailedFilterBase((byte) 'T', 391831L); + acc1.addFailedFilterBase((byte) 'T', 391834L); + acc1.addFailedFilterBase((byte) 'T', 391835L); + acc1.addFailedFilterBase((byte) 'T', 391836L); + acc1.addFailedFilterBase((byte) 'T', 391838L); + acc1.addFailedFilterBase((byte) 'T', 391839L); + acc1.addFailedFilterBase((byte) 'T', 391840L); + acc1.addFailedFilterBase((byte) 'T', 391841L); + acc1.addFailedFilterBase((byte) 'T', 391844L); + acc1.addFailedFilterBase((byte) 'T', 391845L); + acc1.addFailedFilterBase((byte) 'T', 391846L); + acc1.addFailedFilterBase((byte) 'T', 391847L); + acc1.addFailedFilterBase((byte) 'T', 391848L); + acc1.addFailedFilterBase((byte) 'T', 391850L); + acc1.addFailedFilterBase((byte) 'T', 391851L); + acc1.addFailedFilterBase((byte) 'T', 391853L); + acc1.addFailedFilterBase((byte) 'T', 391854L); + acc1.addFailedFilterBase((byte) 'T', 391855L); + acc1.addFailedFilterBase((byte) 'T', 391857L); + acc1.addFailedFilterBase((byte) 'T', 391859L); + acc1.addFailedFilterBase((byte) 'T', 391860L); + acc1.addFailedFilterBase((byte) 'T', 391862L); + acc1.addFailedFilterBase((byte) 'T', 391863L); + acc1.addFailedFilterBase((byte) 'T', 391865L); + acc1.addFailedFilterBase((byte) 'T', 391866L); + acc1.addFailedFilterBase((byte) 'T', 391868L); + acc1.addFailedFilterBase((byte) 'T', 391869L); + acc1.addFailedFilterBase((byte) 'T', 391870L); + acc1.addFailedFilterBase((byte) 'T', 391871L); + acc1.addFailedFilterBase((byte) 'T', 391872L); + acc1.addFailedFilterBase((byte) 'T', 391873L); + acc1.addFailedFilterBase((byte) 'T', 391874L); + acc1.addFailedFilterBase((byte) 'T', 391875L); + acc1.addFailedFilterBase((byte) 'T', 391877L); + acc1.addFailedFilterBase((byte) 'T', 391878L); + acc1.addFailedFilterBase((byte) 'T', 391879L); + acc1.addFailedFilterBase((byte) 'T', 391880L); + acc1.addFailedFilterBase((byte) 'T', 391881L); + acc1.addFailedFilterBase((byte) 'T', 391882L); + acc1.addFailedFilterBase((byte) 'T', 391884L); + acc1.addFailedFilterBase((byte) 'T', 391885L); + acc1.addFailedFilterBase((byte) 'T', 391886L); + acc1.addFailedFilterBase((byte) 'T', 391887L); + acc1.addFailedFilterBase((byte) 'T', 391889L); + + Accumulator acc2 = new Accumulator(12346); + acc2.addFailedFilterBase((byte) 'A', 391815L); + acc2.addFailedFilterBase((byte) 'A', 391817L); + acc2.addFailedFilterBase((byte) 'G', 391748L); + acc2.addFailedFilterBase((byte) 'G', 391752L); + acc2.addFailedFilterBase((byte) 'G', 391754L); + acc2.addFailedFilterBase((byte) 'G', 391755L); + acc2.addFailedFilterBase((byte) 'G', 391756L); + acc2.addFailedFilterBase((byte) 'G', 391757L); + acc2.addFailedFilterBase((byte) 'G', 391760L); + acc2.addFailedFilterBase((byte) 'G', 391761L); + acc2.addFailedFilterBase((byte) 'G', 391762L); + acc2.addFailedFilterBase((byte) 'G', 391763L); + acc2.addFailedFilterBase((byte) 'G', 391765L); + acc2.addFailedFilterBase((byte) 'G', 391766L); + acc2.addFailedFilterBase((byte) 'G', 391768L); + acc2.addFailedFilterBase((byte) 'G', 391769L); + acc2.addFailedFilterBase((byte) 'G', 391770L); + acc2.addFailedFilterBase((byte) 'G', 391771L); + acc2.addFailedFilterBase((byte) 'G', 391773L); + acc2.addFailedFilterBase((byte) 'G', 391776L); + acc2.addFailedFilterBase((byte) 'G', 391777L); + acc2.addFailedFilterBase((byte) 'G', 391778L); + acc2.addFailedFilterBase((byte) 'G', 391779L); + acc2.addFailedFilterBase((byte) 'G', 391780L); + acc2.addFailedFilterBase((byte) 'G', 391782L); + acc2.addFailedFilterBase((byte) 'G', 391783L); + acc2.addFailedFilterBase((byte) 'G', 391785L); + acc2.addFailedFilterBase((byte) 'G', 391787L); + acc2.addFailedFilterBase((byte) 'G', 391788L); + acc2.addFailedFilterBase((byte) 'G', 391789L); + acc2.addFailedFilterBase((byte) 'G', 391790L); + acc2.addFailedFilterBase((byte) 'G', 391791L); + acc2.addFailedFilterBase((byte) 'G', 391792L); + acc2.addFailedFilterBase((byte) 'G', 391795L); + acc2.addFailedFilterBase((byte) 'G', 391796L); + acc2.addFailedFilterBase((byte) 'G', 391797L); + acc2.addFailedFilterBase((byte) 'G', 391799L); + acc2.addFailedFilterBase((byte) 'G', 391800L); + acc2.addFailedFilterBase((byte) 'G', 391802L); + acc2.addFailedFilterBase((byte) 'G', 391803L); + acc2.addFailedFilterBase((byte) 'G', 391806L); + acc2.addFailedFilterBase((byte) 'G', 391809L); + acc2.addFailedFilterBase((byte) 'G', 391810L); + acc2.addFailedFilterBase((byte) 'G', 391811L); + acc2.addFailedFilterBase((byte) 'G', 391812L); + acc2.addFailedFilterBase((byte) 'G', 391813L); + acc2.addFailedFilterBase((byte) 'G', 391814L); + acc2.addFailedFilterBase((byte) 'G', 391816L); + acc2.addFailedFilterBase((byte) 'G', 391818L); + acc2.addFailedFilterBase((byte) 'G', 391819L); + acc2.addFailedFilterBase((byte) 'G', 391821L); + acc2.addFailedFilterBase((byte) 'G', 391822L); + acc2.addFailedFilterBase((byte) 'G', 391823L); + acc2.addFailedFilterBase((byte) 'G', 391826L); + acc2.addFailedFilterBase((byte) 'G', 391827L); + acc2.addFailedFilterBase((byte) 'G', 391828L); + acc2.addFailedFilterBase((byte) 'G', 391831L); + acc2.addFailedFilterBase((byte) 'G', 391832L); + acc2.addFailedFilterBase((byte) 'G', 391834L); + acc2.addFailedFilterBase((byte) 'G', 391835L); + acc2.addFailedFilterBase((byte) 'G', 391836L); + acc2.addFailedFilterBase((byte) 'G', 391838L); + acc2.addFailedFilterBase((byte) 'G', 391839L); + acc2.addFailedFilterBase((byte) 'G', 391840L); + acc2.addFailedFilterBase((byte) 'G', 391841L); + acc2.addFailedFilterBase((byte) 'G', 391843L); + acc2.addFailedFilterBase((byte) 'G', 391844L); + acc2.addFailedFilterBase((byte) 'G', 391845L); + acc2.addFailedFilterBase((byte) 'G', 391846L); + acc2.addFailedFilterBase((byte) 'G', 391847L); + acc2.addFailedFilterBase((byte) 'G', 391848L); + acc2.addFailedFilterBase((byte) 'G', 391850L); + acc2.addFailedFilterBase((byte) 'G', 391851L); + acc2.addFailedFilterBase((byte) 'G', 391853L); + acc2.addFailedFilterBase((byte) 'G', 391854L); + acc2.addFailedFilterBase((byte) 'G', 391855L); + acc2.addFailedFilterBase((byte) 'G', 391857L); + acc2.addFailedFilterBase((byte) 'G', 391859L); + acc2.addFailedFilterBase((byte) 'G', 391860L); + acc2.addFailedFilterBase((byte) 'G', 391862L); + acc2.addFailedFilterBase((byte) 'G', 391863L); + acc2.addFailedFilterBase((byte) 'G', 391865L); + acc2.addFailedFilterBase((byte) 'G', 391866L); + acc2.addFailedFilterBase((byte) 'G', 391867L); + acc2.addFailedFilterBase((byte) 'G', 391868L); + acc2.addFailedFilterBase((byte) 'G', 391869L); + acc2.addFailedFilterBase((byte) 'G', 391870L); + acc2.addFailedFilterBase((byte) 'G', 391871L); + acc2.addFailedFilterBase((byte) 'G', 391872L); + acc2.addFailedFilterBase((byte) 'G', 391873L); + acc2.addFailedFilterBase((byte) 'G', 391874L); + acc2.addFailedFilterBase((byte) 'G', 391875L); + acc2.addFailedFilterBase((byte) 'G', 391877L); + acc2.addFailedFilterBase((byte) 'G', 391878L); + acc2.addFailedFilterBase((byte) 'G', 391879L); + acc2.addFailedFilterBase((byte) 'G', 391880L); + acc2.addFailedFilterBase((byte) 'G', 391881L); + acc2.addFailedFilterBase((byte) 'G', 391882L); + acc2.addFailedFilterBase((byte) 'G', 391884L); + acc2.addFailedFilterBase((byte) 'G', 391885L); + acc2.addFailedFilterBase((byte) 'G', 391886L); + acc2.addFailedFilterBase((byte) 'G', 391887L); + acc2.addFailedFilterBase((byte) 'G', 391889L); + acc2.addFailedFilterBase((byte) 'G', 391890L); + acc2.addFailedFilterBase((byte) 'G', 391891L); + + List accumulators = Arrays.asList(acc1, acc2); + assertEquals("CA2;CG4;TG97;_G2", PipelineUtil.getFailedFilterCS(accumulators)); + } - //TOTO awaiting decision on whether 1/1 -> 0/0 is SOMATIC -// @Test -// public void csRealLife2() { -// /* -// * chr10 54817257 rs386743785 AG GA . . IN=1,2;DB;HOM=0,TTTAACCTTCgaCTTGCCCACA GT:AD:CCC:CCM:DP:FT:INF:MR:NNS:OABS 1/1:0,19:Germline:32:34:PASS:.:19:19:AA8[]6[];GA8[]11[];TA0[]1[] 0/0:2,0:ReferenceNoVariant:32:55:PASS:SOMATIC:.:.:AA33[]20[];AG1[]1[];A_2[]0[] 1/1:0,19:Germline:32:34:PASS:.:19:19:AA8[]6[];GA8[]11[];TA0[]1[] 0/0:2,0:ReferenceNoVariant:32:55:PASS:SOMATIC:.:.:AA33[]20[];AG1[]1[];A_2[]0[] -// * -// * This should not be SOMATIC -// */ -// VcfRecord v1 = VcfUtils.createVcfRecord(new ChrPointPosition("10", 54817257),null,"A","G"); -// VcfRecord v2 = VcfUtils.createVcfRecord(new ChrPointPosition("10", 54817258),null,"G","A"); -// final Accumulator tumour100 = new Accumulator(54817257); -// final Accumulator tumour101 = new Accumulator(54817258); -// for (int i = 1 ; i <= 34 ; i++) { -// tumour100.addBase((byte)'A', (byte)30, true, 154701261 + i, 154701381, 154701391, i); -// tumour101.addBase((byte)'A', (byte)30, true, 154701261 + i, 154701382, 154701391, i); -// } -// for (int i = 35 ; i < 35 + 31 ; i++) { -// tumour100.addBase((byte)'A', (byte)30, false, 154701261 + i, 154701381, 154701391 + i, i); -// tumour101.addBase((byte)'A', (byte)30, false, 154701261 + i, 154701382, 154701391 + i, i); -// } -// -// tumour100.addBase((byte)'A', (byte)30, true, 154701262, 154701381, 154701391, 70); -// tumour101.addBase((byte)'C', (byte)30, true, 154701262, 154701382, 154701391, 70); -// tumour100.addBase((byte)'C', (byte)30, true, 154701262, 154701381, 154701391, 71); -// tumour101.addBase((byte)'A', (byte)30, true, 154701262, 154701382, 154701391, 71); -// -// Map> map = new HashMap<>(4); -// map.put(v1, new Pair<>(null, tumour100)); -// map.put(v2, new Pair<>(null, tumour101)); -// -// Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3); -// assertEquals(true, ov.isPresent()); -// VcfRecord v = ov.get(); -// assertEquals("AA", v.getAlt()); -// assertEquals("GG", v.getRef()); -// List ff = v.getFormatFields(); -// assertEquals("1/1:0,65:67:.:SOMATIC:65:65:AA34[]31[];AC1[]0[];CA1[]0[]", ff.get(2)); // tumour -// } + @Test + public void getUnique() { + Accumulator acc1 = new Accumulator(12345); + acc1.addFailedFilterBase((byte) 'C', 7715117792191186532L); + acc1.addFailedFilterBase((byte) 'C', 419015429944394057L); + acc1.addFailedFilterBase((byte) 'C', -4982705001061907857L); + acc1.addFailedFilterBase((byte) 'C', -5310912577898632174L); + acc1.addFailedFilterBase((byte) 'C', 4442000349200974418L); + acc1.addFailedFilterBase((byte) 'C', -6357506197841616687L); + acc1.addFailedFilterBase((byte) 'T', -7438533981666644002L); + acc1.addFailedFilterBase((byte) 'T', 1782036110835987330L); + acc1.addFailedFilterBase((byte) 'T', -8751309811463707606L); + acc1.addFailedFilterBase((byte) 'T', -1006789886807925312L); + acc1.addFailedFilterBase((byte) 'T', 267604890061977987L); + acc1.addFailedFilterBase((byte) 'T', -5752866221558017591L); + acc1.addFailedFilterBase((byte) 'T', 8813529100103331852L); + acc1.addFailedFilterBase((byte) 'T', 8740981370654066209L); + acc1.addFailedFilterBase((byte) 'T', 3875546135377654529L); + acc1.addFailedFilterBase((byte) 'T', 3156183271998102851L); + acc1.addFailedFilterBase((byte) 'T', 496025280786752802L); + acc1.addFailedFilterBase((byte) 'T', -5370540407260949421L); + acc1.addFailedFilterBase((byte) 'T', -1034241958712548931L); + acc1.addFailedFilterBase((byte) 'T', 4760541127041820054L); + acc1.addFailedFilterBase((byte) 'T', -478460491748683719L); + acc1.addFailedFilterBase((byte) 'T', 1080824909688009888L); + acc1.addFailedFilterBase((byte) 'T', -248833557815058818L); + acc1.addFailedFilterBase((byte) 'T', -836224118038877802L); + acc1.addFailedFilterBase((byte) 'T', -6251926633067930061L); + acc1.addFailedFilterBase((byte) 'T', 5693373976150522013L); + acc1.addFailedFilterBase((byte) 'T', -8618732021493771414L); + acc1.addFailedFilterBase((byte) 'T', -3535500610440114311L); + acc1.addFailedFilterBase((byte) 'T', -8607579577620952717L); + acc1.addFailedFilterBase((byte) 'T', 3617903064101974186L); + acc1.addFailedFilterBase((byte) 'T', -292355879921902716L); + acc1.addFailedFilterBase((byte) 'T', -3333658515894473867L); + acc1.addFailedFilterBase((byte) 'T', -6061011605950203461L); + acc1.addFailedFilterBase((byte) 'T', -2558972151765706823L); + acc1.addFailedFilterBase((byte) 'T', 8278189905233837843L); + acc1.addFailedFilterBase((byte) 'T', 3158165356931543161L); + acc1.addFailedFilterBase((byte) 'T', 8095381987075818260L); + acc1.addFailedFilterBase((byte) 'T', 4995250372108758135L); + acc1.addFailedFilterBase((byte) 'T', 4308956381034063213L); + acc1.addFailedFilterBase((byte) 'T', -3975576866416630705L); + acc1.addFailedFilterBase((byte) 'T', 215674860906757583L); + acc1.addFailedFilterBase((byte) 'T', 9130708757120638831L); + acc1.addFailedFilterBase((byte) 'T', 2603036776856178377L); + acc1.addFailedFilterBase((byte) 'T', 5735814374756852238L); + acc1.addFailedFilterBase((byte) 'T', 7540064501637539247L); + acc1.addFailedFilterBase((byte) 'T', 6419930603122219958L); + acc1.addFailedFilterBase((byte) 'T', 8210168556586079108L); + acc1.addFailedFilterBase((byte) 'T', 3268524321762958948L); + acc1.addFailedFilterBase((byte) 'T', -138463099035032818L); + acc1.addFailedFilterBase((byte) 'T', 2609032967337224118L); + acc1.addFailedFilterBase((byte) 'T', 1180519307271286853L); + acc1.addFailedFilterBase((byte) 'T', 3647809539552025230L); + acc1.addFailedFilterBase((byte) 'T', 8493692125897398515L); + acc1.addFailedFilterBase((byte) 'T', 3315994573281898804L); + acc1.addFailedFilterBase((byte) 'T', 724501628528950317L); + acc1.addFailedFilterBase((byte) 'T', 2324044325092459430L); + acc1.addFailedFilterBase((byte) 'T', 4824931694165828140L); + acc1.addFailedFilterBase((byte) 'T', 1027175994126509642L); + acc1.addFailedFilterBase((byte) 'T', -2957957358873393133L); + acc1.addFailedFilterBase((byte) 'T', 3268524321762958948L); + acc1.addFailedFilterBase((byte) 'T', -8828184361023553865L); + acc1.addFailedFilterBase((byte) 'T', -8251184108896989485L); + acc1.addFailedFilterBase((byte) 'T', 4329503331942280002L); + acc1.addFailedFilterBase((byte) 'T', 3244033920476022406L); + acc1.addFailedFilterBase((byte) 'T', 6461699796293877052L); + acc1.addFailedFilterBase((byte) 'T', -1679260168527306503L); + acc1.addFailedFilterBase((byte) 'T', 1216738602136290533L); + acc1.addFailedFilterBase((byte) 'T', 1144424180764508213L); + acc1.addFailedFilterBase((byte) 'T', 7433969058982544698L); + acc1.addFailedFilterBase((byte) 'T', 8740981370654066209L); + acc1.addFailedFilterBase((byte) 'T', -7503905741508074480L); + acc1.addFailedFilterBase((byte) 'T', 9219990827317843535L); + acc1.addFailedFilterBase((byte) 'T', -562526361002022550L); + acc1.addFailedFilterBase((byte) 'T', -1583125034918006130L); + acc1.addFailedFilterBase((byte) 'T', 7486429413782649127L); + acc1.addFailedFilterBase((byte) 'T', -1476485875783679687L); + acc1.addFailedFilterBase((byte) 'T', -7726467777379464574L); + acc1.addFailedFilterBase((byte) 'T', 6145111076905202179L); + acc1.addFailedFilterBase((byte) 'T', -6962289282457110290L); + acc1.addFailedFilterBase((byte) 'T', 1829825168833198510L); + acc1.addFailedFilterBase((byte) 'T', -8597867675373289290L); + acc1.addFailedFilterBase((byte) 'T', -5752866221558017591L); + acc1.addFailedFilterBase((byte) 'T', 1015854181370342302L); + acc1.addFailedFilterBase((byte) 'T', 35473122144577186L); + acc1.addFailedFilterBase((byte) 'T', 7825657632162557305L); + acc1.addFailedFilterBase((byte) 'T', -4454819942067680881L); + acc1.addFailedFilterBase((byte) 'T', -3618246714841591676L); + acc1.addFailedFilterBase((byte) 'T', 8557524143668341863L); + acc1.addFailedFilterBase((byte) 'T', 7092326401254028467L); + acc1.addFailedFilterBase((byte) 'T', 4031901574327385806L); + acc1.addFailedFilterBase((byte) 'T', 7382490855834685218L); + acc1.addFailedFilterBase((byte) 'T', -7609961797280175578L); + acc1.addFailedFilterBase((byte) 'T', 6264118358119675396L); + acc1.addFailedFilterBase((byte) 'T', 3774827528163625704L); + acc1.addFailedFilterBase((byte) 'T', 8493692125897398515L); + acc1.addFailedFilterBase((byte) 'T', 3728160140633686841L); + acc1.addFailedFilterBase((byte) 'T', 876929434758225646L); + acc1.addFailedFilterBase((byte) 'T', 4853207379767922037L); + acc1.addFailedFilterBase((byte) 'T', 7915916741906599442L); + acc1.addFailedFilterBase((byte) 'T', -8874197118979016859L); + acc1.addFailedFilterBase((byte) 'T', -2135344415274795553L); + acc1.addFailedFilterBase((byte) 'T', 7092828207829028047L); + acc1.addFailedFilterBase((byte) 'T', 8518363537084736565L); + + Accumulator acc2 = new Accumulator(12346); + + acc2.addFailedFilterBase((byte) 'A', 419015429944394057L); + acc2.addFailedFilterBase((byte) 'A', -4982705001061907857L); + acc2.addFailedFilterBase((byte) 'G', -7438533981666644002L); + acc2.addFailedFilterBase((byte) 'G', 1782036110835987330L); + acc2.addFailedFilterBase((byte) 'G', -8751309811463707606L); + acc2.addFailedFilterBase((byte) 'G', -1006789886807925312L); + acc2.addFailedFilterBase((byte) 'G', 267604890061977987L); + acc2.addFailedFilterBase((byte) 'G', -5752866221558017591L); + acc2.addFailedFilterBase((byte) 'G', 8813529100103331852L); + acc2.addFailedFilterBase((byte) 'G', 8740981370654066209L); + acc2.addFailedFilterBase((byte) 'G', 3875546135377654529L); + acc2.addFailedFilterBase((byte) 'G', 3156183271998102851L); + acc2.addFailedFilterBase((byte) 'G', 496025280786752802L); + acc2.addFailedFilterBase((byte) 'G', -5370540407260949421L); + acc2.addFailedFilterBase((byte) 'G', -1034241958712548931L); + acc2.addFailedFilterBase((byte) 'G', 4760541127041820054L); + acc2.addFailedFilterBase((byte) 'G', -478460491748683719L); + acc2.addFailedFilterBase((byte) 'G', 1080824909688009888L); + acc2.addFailedFilterBase((byte) 'G', -248833557815058818L); + acc2.addFailedFilterBase((byte) 'G', -836224118038877802L); + acc2.addFailedFilterBase((byte) 'G', -6251926633067930061L); + acc2.addFailedFilterBase((byte) 'G', 5693373976150522013L); + acc2.addFailedFilterBase((byte) 'G', -8618732021493771414L); + acc2.addFailedFilterBase((byte) 'G', -3535500610440114311L); + acc2.addFailedFilterBase((byte) 'G', -8607579577620952717L); + acc2.addFailedFilterBase((byte) 'G', 3617903064101974186L); + acc2.addFailedFilterBase((byte) 'G', 7715117792191186532L); + acc2.addFailedFilterBase((byte) 'G', -292355879921902716L); + acc2.addFailedFilterBase((byte) 'G', -3333658515894473867L); + acc2.addFailedFilterBase((byte) 'G', -6061011605950203461L); + acc2.addFailedFilterBase((byte) 'G', -2558972151765706823L); + acc2.addFailedFilterBase((byte) 'G', 8278189905233837843L); + acc2.addFailedFilterBase((byte) 'G', 3158165356931543161L); + acc2.addFailedFilterBase((byte) 'G', 8095381987075818260L); + acc2.addFailedFilterBase((byte) 'G', 4995250372108758135L); + acc2.addFailedFilterBase((byte) 'G', 4308956381034063213L); + acc2.addFailedFilterBase((byte) 'G', -3975576866416630705L); + acc2.addFailedFilterBase((byte) 'G', 215674860906757583L); + acc2.addFailedFilterBase((byte) 'G', 9130708757120638831L); + acc2.addFailedFilterBase((byte) 'G', 2603036776856178377L); + acc2.addFailedFilterBase((byte) 'G', 5735814374756852238L); + acc2.addFailedFilterBase((byte) 'G', 7540064501637539247L); + acc2.addFailedFilterBase((byte) 'G', 6419930603122219958L); + acc2.addFailedFilterBase((byte) 'G', 8210168556586079108L); + acc2.addFailedFilterBase((byte) 'G', 3268524321762958948L); + acc2.addFailedFilterBase((byte) 'G', -138463099035032818L); + acc2.addFailedFilterBase((byte) 'G', 2609032967337224118L); + acc2.addFailedFilterBase((byte) 'G', 1180519307271286853L); + acc2.addFailedFilterBase((byte) 'G', 3647809539552025230L); + acc2.addFailedFilterBase((byte) 'G', 8493692125897398515L); + acc2.addFailedFilterBase((byte) 'G', 3315994573281898804L); + acc2.addFailedFilterBase((byte) 'G', 724501628528950317L); + acc2.addFailedFilterBase((byte) 'G', 2324044325092459430L); + acc2.addFailedFilterBase((byte) 'G', 4824931694165828140L); + acc2.addFailedFilterBase((byte) 'G', 1027175994126509642L); + acc2.addFailedFilterBase((byte) 'G', -2957957358873393133L); + acc2.addFailedFilterBase((byte) 'G', 3268524321762958948L); + acc2.addFailedFilterBase((byte) 'G', -5310912577898632174L); + acc2.addFailedFilterBase((byte) 'G', -8828184361023553865L); + acc2.addFailedFilterBase((byte) 'G', -8251184108896989485L); + acc2.addFailedFilterBase((byte) 'G', 4329503331942280002L); + acc2.addFailedFilterBase((byte) 'G', 3244033920476022406L); + acc2.addFailedFilterBase((byte) 'G', 6461699796293877052L); + acc2.addFailedFilterBase((byte) 'G', -1679260168527306503L); + acc2.addFailedFilterBase((byte) 'G', 1216738602136290533L); + acc2.addFailedFilterBase((byte) 'G', 4442000349200974418L); + acc2.addFailedFilterBase((byte) 'G', 1144424180764508213L); + acc2.addFailedFilterBase((byte) 'G', 7433969058982544698L); + acc2.addFailedFilterBase((byte) 'G', 8740981370654066209L); + acc2.addFailedFilterBase((byte) 'G', -7503905741508074480L); + acc2.addFailedFilterBase((byte) 'G', 9219990827317843535L); + acc2.addFailedFilterBase((byte) 'G', -562526361002022550L); + acc2.addFailedFilterBase((byte) 'G', -1583125034918006130L); + acc2.addFailedFilterBase((byte) 'G', 7486429413782649127L); + acc2.addFailedFilterBase((byte) 'G', -1476485875783679687L); + acc2.addFailedFilterBase((byte) 'G', -7726467777379464574L); + acc2.addFailedFilterBase((byte) 'G', 6145111076905202179L); + acc2.addFailedFilterBase((byte) 'G', -6962289282457110290L); + acc2.addFailedFilterBase((byte) 'G', 1829825168833198510L); + acc2.addFailedFilterBase((byte) 'G', -8597867675373289290L); + acc2.addFailedFilterBase((byte) 'G', -5752866221558017591L); + acc2.addFailedFilterBase((byte) 'G', 1015854181370342302L); + acc2.addFailedFilterBase((byte) 'G', 35473122144577186L); + acc2.addFailedFilterBase((byte) 'G', -6357506197841616687L); + acc2.addFailedFilterBase((byte) 'G', 7825657632162557305L); + acc2.addFailedFilterBase((byte) 'G', -4454819942067680881L); + acc2.addFailedFilterBase((byte) 'G', -3618246714841591676L); + acc2.addFailedFilterBase((byte) 'G', 8557524143668341863L); + acc2.addFailedFilterBase((byte) 'G', 7092326401254028467L); + acc2.addFailedFilterBase((byte) 'G', 4031901574327385806L); + acc2.addFailedFilterBase((byte) 'G', 7382490855834685218L); + acc2.addFailedFilterBase((byte) 'G', -7609961797280175578L); + acc2.addFailedFilterBase((byte) 'G', 6264118358119675396L); + acc2.addFailedFilterBase((byte) 'G', 3774827528163625704L); + acc2.addFailedFilterBase((byte) 'G', 8493692125897398515L); + acc2.addFailedFilterBase((byte) 'G', 3728160140633686841L); + acc2.addFailedFilterBase((byte) 'G', 876929434758225646L); + acc2.addFailedFilterBase((byte) 'G', 4853207379767922037L); + acc2.addFailedFilterBase((byte) 'G', 7915916741906599442L); + acc2.addFailedFilterBase((byte) 'G', -8874197118979016859L); + acc2.addFailedFilterBase((byte) 'G', -2135344415274795553L); + acc2.addFailedFilterBase((byte) 'G', 7092828207829028047L); + acc2.addFailedFilterBase((byte) 'G', 8518363537084736565L); + acc2.addFailedFilterBase((byte) 'G', -8751309811463707606L); + acc2.addFailedFilterBase((byte) 'G', -8252885584572880588L); + + List accumulators = Arrays.asList(acc1, acc2); + assertEquals("CA2;CG4;TG93;_G1", PipelineUtil.getFailedFilterCS(accumulators)); + + } -// @Test -// public void getSkeletonVcf() { -// List snps = new ArrayList<>(); -// snps.add(VcfUtils.createVcfRecord(new ChrPointPosition("1", 99),null,"A","C")); -// snps.add(VcfUtils.createVcfRecord(new ChrPointPosition("1", 101),null,"A","C")); -// snps.add(VcfUtils.createVcfRecord(new ChrPointPosition("1", 102),null,"A","C")); -// snps.add(VcfUtils.createVcfRecord(new ChrPointPosition("1", 103),null,"A","C")); -// snps.add(VcfUtils.createVcfRecord(new ChrPointPosition("1", 104),null,"A","C")); -// snps.add(VcfUtils.createVcfRecord(new ChrPointPosition("1", 106),null,"A","C")); -// -// List> loloVcfs = PipelineUtil.listOfListOfAdjacentVcfs(snps); -// assertEquals(1, loloVcfs.size()); -// VcfRecord v = PipelineUtil.createSkeletonCompoundSnp(loloVcfs.get(0)); -// assertEquals("1", v.getChrPosition().getChromosome()); -// assertEquals(101, v.getChrPosition().getStartPosition()); -// assertEquals("AAAA", v.getRef()); -// assertEquals("CCCC", v.getAlt()); -// -// } + @Test + public void realLifeFunnyBusiness() { + List readNames = Arrays.asList("DCW97JN1:295:D1B5AACXX:3:2309:14480:47693" , + "DCW97JN1:295:D1B5AACXX:5:1111:15402:19442" , + "DCW97JN1:295:D1B5AACXX:3:2210:16730:78147" , + "HWI-ST526:219:C16B2ACXX:1:2304:7016:24438" , + "HWI-ST526:219:C16B2ACXX:1:1307:1391:41225" , + "HWI-ST526:219:C16B2ACXX:1:1103:16387:29528" , + "HWI-ST526:219:C16B2ACXX:1:2112:18851:73892" , + "DCW97JN1:295:D1B5AACXX:4:2114:16640:6591" , + "DCW97JN1:295:D1B5AACXX:3:1303:9721:20997" , + "HWI-ST526:219:C16B2ACXX:1:2207:13419:94062" , + "DCW97JN1:295:D1B5AACXX:3:2309:14480:47693" , + "DCW97JN1:295:D1B5AACXX:5:2305:3789:14633" , + "HWI-ST526:219:C16B2ACXX:1:1104:4560:57572" , + "DCW97JN1:295:D1B5AACXX:6:2111:9627:7792" , + "DCW97JN1:295:D1B5AACXX:5:2113:4992:94218" , + "DCW97JN1:295:D1B5AACXX:4:2202:16591:44438" , + "DCW97JN1:295:D1B5AACXX:4:2205:19357:57058" , + "DCW97JN1:295:D1B5AACXX:6:2307:12495:71902" , + "DCW97JN1:295:D1B5AACXX:4:2212:17500:6828" , + "DCW97JN1:295:D1B5AACXX:5:1109:19970:67964" , + "DCW97JN1:295:D1B5AACXX:3:2310:15515:71252" , + "DCW97JN1:295:D1B5AACXX:5:2201:4467:9303" , + "DCW97JN1:295:D1B5AACXX:6:1115:2311:14137" , + "DCW97JN1:295:D1B5AACXX:5:1302:19127:9050" , + "DCW97JN1:295:D1B5AACXX:6:1114:11214:2476" , + "DCW97JN1:295:D1B5AACXX:5:1212:1161:16540" , + "DCW97JN1:295:D1B5AACXX:5:1301:16241:83752" , + "DCW97JN1:295:D1B5AACXX:3:1116:13596:47139" , + "DCW97JN1:295:D1B5AACXX:5:1307:3521:66193" , + "DCW97JN1:295:D1B5AACXX:5:2208:16724:61398" , + "DCW97JN1:295:D1B5AACXX:3:2107:10686:79006" , + "DCW97JN1:295:D1B5AACXX:4:2107:13457:26494" , + "HWI-ST526:219:C16B2ACXX:1:2205:13081:23009" , + "DCW97JN1:295:D1B5AACXX:6:1213:17550:51242" , + "HWI-ST526:219:C16B2ACXX:1:2312:4528:65210" , + "DCW97JN1:295:D1B5AACXX:3:1209:5045:36182" , + "DCW97JN1:295:D1B5AACXX:6:2309:19872:67179" , + "DCW97JN1:295:D1B5AACXX:4:2216:7610:96187" , + "DCW97JN1:295:D1B5AACXX:5:1109:17596:79874" , + "DCW97JN1:295:D1B5AACXX:4:1308:6864:53678" , + "DCW97JN1:295:D1B5AACXX:6:1314:5251:24654" , + "DCW97JN1:295:D1B5AACXX:4:1201:1616:97707" , + "DCW97JN1:295:D1B5AACXX:3:2210:16730:78147" , + "DCW97JN1:295:D1B5AACXX:3:1310:11233:73883" , + "HWI-ST526:219:C16B2ACXX:1:2312:13402:11372" , + "HWI-ST526:219:C16B2ACXX:1:2307:2775:76985" , + "DCW97JN1:295:D1B5AACXX:6:2313:6436:3202" , + "DCW97JN1:295:D1B5AACXX:5:1109:5309:44655" , + "DCW97JN1:295:D1B5AACXX:4:1210:13752:91386" , + "DCW97JN1:295:D1B5AACXX:4:1210:13752:91386" , + "DCW97JN1:295:D1B5AACXX:4:2116:13414:59472" , + "DCW97JN1:295:D1B5AACXX:4:2304:8591:34902" , + "DCW97JN1:295:D1B5AACXX:4:1206:17734:74060" , + "DCW97JN1:295:D1B5AACXX:3:1214:4999:4158" , + "DCW97JN1:295:D1B5AACXX:4:2205:19357:57058"); + assertEquals(55, readNames.size()); + Set readNameSet = new HashSet<>(); + for (String s : readNames) { + if (!readNameSet.add(s)) { + System.out.println("dup read: " + s); + } + } + assertEquals(51, readNameSet.size()); + + + Accumulator acc1 = new Accumulator(12345); + acc1.addFailedFilterBase((byte) 'A', 4086900006971767854L); + acc1.addFailedFilterBase((byte) 'A', -4716832531774407080L); + acc1.addFailedFilterBase((byte) 'A', 6700611398151820155L); + acc1.addFailedFilterBase((byte) 'A', 5186962780778754199L); + acc1.addFailedFilterBase((byte) 'A', 5122404141805319601L); + acc1.addFailedFilterBase((byte) 'A', 8958843543082877714L); + acc1.addFailedFilterBase((byte) 'A', 8831294537041955437L); + acc1.addFailedFilterBase((byte) 'A', 6048261452686045528L); + acc1.addFailedFilterBase((byte) 'A', -27981145301803143L); + acc1.addFailedFilterBase((byte) 'A', -1895240219048882894L); + acc1.addFailedFilterBase((byte) 'A', -432780264239122912L); + acc1.addFailedFilterBase((byte) 'T', -1441445426238881582L); + acc1.addFailedFilterBase((byte) 'T', 7422662345407810992L); + acc1.addFailedFilterBase((byte) 'T', -27981145301803143L); + acc1.addFailedFilterBase((byte) 'T', -3745913329893305916L); + acc1.addFailedFilterBase((byte) 'T', -5872199506007072810L); + acc1.addFailedFilterBase((byte) 'T', 8018589148027039207L); + acc1.addFailedFilterBase((byte) 'T', -6926956303777753425L); + acc1.addFailedFilterBase((byte) 'T', -8269550325241252261L); + acc1.addFailedFilterBase((byte) 'T', -1441445426238881582L); + acc1.addFailedFilterBase((byte) 'T', -4183403039113486709L); + acc1.addFailedFilterBase((byte) 'T', -2252558088007944815L); + acc1.addFailedFilterBase((byte) 'T', 5095097565078919292L); + acc1.addFailedFilterBase((byte) 'T', -5149194738896197730L); + acc1.addFailedFilterBase((byte) 'T', -8386429281397821662L); + acc1.addFailedFilterBase((byte) 'T', 9018607088384783716L); + acc1.addFailedFilterBase((byte) 'T', -4582515424865507576L); + acc1.addFailedFilterBase((byte) 'T', 5006058289362654996L); + acc1.addFailedFilterBase((byte) 'T', 1570085767318999878L); + acc1.addFailedFilterBase((byte) 'T', 6902405747651132852L); + acc1.addFailedFilterBase((byte) 'T', 550354402160307514L); + acc1.addFailedFilterBase((byte) 'T', -8462486584110695116L); + acc1.addFailedFilterBase((byte) 'T', 3461374088017820567L); + acc1.addFailedFilterBase((byte) 'T', 281625484296755152L); + + Accumulator acc2 = new Accumulator(12346); + acc2.addFailedFilterBase((byte) 'A',-1441445426238881582L); + acc2.addFailedFilterBase((byte) 'A',7422662345407810992L); + acc2.addFailedFilterBase((byte) 'A',-27981145301803143L); + acc2.addFailedFilterBase((byte) 'A',-3745913329893305916L); + acc2.addFailedFilterBase((byte) 'A',-5872199506007072810L); + acc2.addFailedFilterBase((byte) 'A',8018589148027039207L); + acc2.addFailedFilterBase((byte) 'A',-8780768932796289538L); + acc2.addFailedFilterBase((byte) 'A',-6926956303777753425L); + acc2.addFailedFilterBase((byte) 'A',-8269550325241252261L); + acc2.addFailedFilterBase((byte) 'A',-1441445426238881582L); + acc2.addFailedFilterBase((byte) 'A',-4183403039113486709L); + acc2.addFailedFilterBase((byte) 'A',-2252558088007944815L); + acc2.addFailedFilterBase((byte) 'A',5095097565078919292L); + acc2.addFailedFilterBase((byte) 'A',-5149194738896197730L); + acc2.addFailedFilterBase((byte) 'A',-8386429281397821662L); + acc2.addFailedFilterBase((byte) 'A',9018607088384783716L); + acc2.addFailedFilterBase((byte) 'A',-4582515424865507576L); + acc2.addFailedFilterBase((byte) 'A',5006058289362654996L); + acc2.addFailedFilterBase((byte) 'A',1570085767318999878L); + acc2.addFailedFilterBase((byte) 'A',6902405747651132852L); + acc2.addFailedFilterBase((byte) 'A',550354402160307514L); + acc2.addFailedFilterBase((byte) 'A',-8462486584110695116L); + acc2.addFailedFilterBase((byte) 'A',3461374088017820567L); + acc2.addFailedFilterBase((byte) 'A',281625484296755152L); + acc2.addFailedFilterBase((byte) 'T', 7172062261984156803L); + acc2.addFailedFilterBase((byte) 'T', 4537039380766474115L); + acc2.addFailedFilterBase((byte) 'T', 4086900006971767854L); + acc2.addFailedFilterBase((byte) 'T', -4716832531774407080L); + acc2.addFailedFilterBase((byte) 'T', -1351966021709934743L); + acc2.addFailedFilterBase((byte) 'T', -330036681603147982L); + acc2.addFailedFilterBase((byte) 'T', 1382422122404187806L); + acc2.addFailedFilterBase((byte) 'T', 6700611398151820155L); + acc2.addFailedFilterBase((byte) 'T', 5186962780778754199L); + acc2.addFailedFilterBase((byte) 'T', 5122404141805319601L); + acc2.addFailedFilterBase((byte) 'T', 8958843543082877714L); + acc2.addFailedFilterBase((byte) 'T', -3052003932123722238L); + acc2.addFailedFilterBase((byte) 'T', 6488180788633260269L); + acc2.addFailedFilterBase((byte) 'T', 8831294537041955437L); + acc2.addFailedFilterBase((byte) 'T', 6048261452686045528L); + acc2.addFailedFilterBase((byte) 'T', -27981145301803143L); + acc2.addFailedFilterBase((byte) 'T', 8691136542396127591L); + acc2.addFailedFilterBase((byte) 'T', 7868192676364940616L); + acc2.addFailedFilterBase((byte) 'T', -1895240219048882894L); + acc2.addFailedFilterBase((byte) 'T', -432780264239122912L); + + List accumulators = Arrays.asList(acc1, acc2); + assertEquals("AT10;TA21;_A1;_T9", PipelineUtil.getFailedFilterCS(accumulators)); + } + + @Test + public void testGetFailedFilterCS() { + Accumulator acc1 = new Accumulator(12345); + acc1.addFailedFilterBase((byte) 'A', 2609032967337224118L); + acc1.addFailedFilterBase((byte) 'A', 1180519307271286853L); + acc1.addFailedFilterBase((byte) 'A', 3647809539552025230L); + + Accumulator acc2 = new Accumulator(12346); + acc2.addFailedFilterBase((byte) 'A', 2609032967337224118L); + acc2.addFailedFilterBase((byte) 'C', 2609032967337224118L); + List accumulators = Arrays.asList(acc1, acc2); + assertEquals("A_2", PipelineUtil.getFailedFilterCS(accumulators)); + + acc1.addFailedFilterBase((byte) 'C', 3647809539552025230L); + assertEquals("A_1", PipelineUtil.getFailedFilterCS(accumulators)); + acc1.addFailedFilterBase((byte) 'G', 555); + acc2.addFailedFilterBase((byte) 'G', 555); + assertEquals("A_1;GG1", PipelineUtil.getFailedFilterCS(accumulators)); + + } }