diff --git a/qannotate/test/au/edu/qimr/qannotate/modes/ConfidenceModeTest.java b/qannotate/test/au/edu/qimr/qannotate/modes/ConfidenceModeTest.java
index cf7c0db4b..2a57137f4 100644
--- a/qannotate/test/au/edu/qimr/qannotate/modes/ConfidenceModeTest.java
+++ b/qannotate/test/au/edu/qimr/qannotate/modes/ConfidenceModeTest.java
@@ -294,6 +294,10 @@ public void checkMIUN() {
ConfidenceMode.checkMIUN(new String[]{"A"}, 90, "A3;C8", sb, 2, 3, null);
assertEquals("", sb.toString());
+ sb = new StringBuilder();
+ ConfidenceMode.checkMIUN(new String[]{"AA"}, 90, "AA13;AC8", sb, 2, 3, null);
+ assertEquals("MIUN", sb.toString());
+
//
}
@@ -811,6 +815,28 @@ public void realLifeMIUN2() {
assertEquals("PASS", vcf.getSampleFormatRecord(2).getField(VcfHeaderUtils.FORMAT_FILTER));
assertEquals("PASS", vcf.getSampleFormatRecord(3).getField(VcfHeaderUtils.FORMAT_FILTER));
assertEquals("PASS", vcf.getSampleFormatRecord(4).getField(VcfHeaderUtils.FORMAT_FILTER));
+
+ }
+
+ @Test
+ public void compoundSnpMIUN() {
+ /*
+ chr1 205931 . AA GG . . . GT:AD:DP:FF:FT:INF:NNS:OABS 0/0:6,0:6:AA47;AT1;A_1;GG3:.:.:.:AA3[]3[] 0/1:20,4:25:AA182;GG2;TA1:.:SOMATIC:4:AA
+7[]13[];GA1[]0[];GG0[]4[]
+ */
+ VcfRecord vcf = VcfUtils.createVcfRecord(ChrPositionUtils.getChrPosition("chr1", 205931, 205931), ".", "CA", "GG");
+ vcf.setInfo("FLANK=GTAAAACTGGA;BaseQRankSum=0.325;ClippingRankSum=0.000;DP=58;ExcessHet=3.0103;FS=4.683;MQ=55.10;MQRankSum=-6.669;QD=4.63;ReadPosRankSum=-0.352;SOR=1.425;IN=1;DB;VLD;HOM=3,TATATGTAAAgCTGGATTAAT;EFF=downstream_gene_variant(MODIFIER||914|||MST1P2|unprocessed_pseudogene|NON_CODING|ENST00000457982||1),intergenic_region(MODIFIER||||||||||1)");
+ vcf.setFormatFields(java.util.Arrays.asList(
+ "GT:AD:DP:FF:FT:INF:NNS:OABS",
+ "0/0:36,0:36:CA12;GG4;_A2:.:.:.:CA17[]19[];C_1[]0[]",
+ "0/1:102,11:114:AA1;CA30;CC1;CT1;C_3;GG50;GT1:.:SOMATIC:10:AA1[]0[];CA61[]41[];GG4[]7[];G_1[]0[];_A0[]2[]"));
+ ConfidenceMode cm = new ConfidenceMode(TWO_SAMPLE_ONE_CALLER_META);
+ cm.positionRecordMap.put(vcf.getChrPosition(), List.of(vcf));
+ cm.addAnnotation();
+ vcf = cm.positionRecordMap.get(vcf.getChrPosition()).getFirst();
+ assertEquals("MIUN", vcf.getSampleFormatRecord(1).getField(VcfHeaderUtils.FORMAT_FILTER));
+ assertEquals("PASS", vcf.getSampleFormatRecord(2).getField(VcfHeaderUtils.FORMAT_FILTER));
+
}
@Test
@@ -1345,6 +1371,30 @@ public void confidenceRealLifeMerged9() {
assertEquals("PASS", vcf.getSampleFormatRecord(4).getField(VcfHeaderUtils.FORMAT_FILTER));
}
+ @Test
+ public void realLifeCSMIUN() {
+ /*
+ chr1 11445731 rs386628485 AG GC . . IN=1;DB;HOM=0,ACAGAGAGACagAGAGTCAGAG GT:AD:DP:FF:FT:INF:NNS:OABS 0/0:18,1:20:AC1;AG6;AGC1;AT1;A_1;CC1;GC7:PASS:.:.:AG7[]11[];GC0[]1[];GG0
+[]1[];_C1[]0[] 0/1:32,4:36:AA1;AG11;A_1;GA1;GC18;G_1:MR:SOMATIC:4:AG21[]11[];A_1[]0[];GC2[]2[];_C1[]0[] ./.:.:.:.:COV:.:.:. ./.:.:.:.:COV:.:.:.
+ */
+ VcfRecord vcf = new VcfRecord(new String[]{"chr1", "11445731", "rs386628485", "AG", "GC", ".", ".", "IN=1;DB;HOM=0,ACAGAGAGACagAGAGTCAGAG"
+ , "GT:AD:DP:FF:FT:INF:NNS:OABS"
+ , "0/0:18,1:20:AC1;AG6;AGC1;AT1;A_1;CC1;GC7:.:.:.:AG7[]11[];GC0[]1[];GG0[]1[];_C1[]0[]"
+ , "0/1:32,4:36:AA1;AG11;A_1;GA1;GC18;G_1:.:SOMATIC:4:AG21[]11[];A_1[]0[];GC2[]2[];_C1[]0[]"
+ , "./.:.:.:.:.:.:.:."
+ , "./.:.:.:.:.:.:.:."});
+ ConfidenceMode cm = new ConfidenceMode(TWO_SAMPLE_TWO_CALLER_META);
+ cm.positionRecordMap.put(vcf.getChrPosition(), List.of(vcf));
+ cm.addAnnotation();
+
+ vcf = cm.positionRecordMap.get(vcf.getChrPosition()).getFirst();
+ assertEquals("MIUN", vcf.getSampleFormatRecord(1).getField(VcfHeaderUtils.FORMAT_FILTER));
+ assertEquals("MR", vcf.getSampleFormatRecord(2).getField(VcfHeaderUtils.FORMAT_FILTER));
+ assertEquals("COV", vcf.getSampleFormatRecord(3).getField(VcfHeaderUtils.FORMAT_FILTER));
+ assertEquals("COV", vcf.getSampleFormatRecord(4).getField(VcfHeaderUtils.FORMAT_FILTER));
+
+ }
+
@Test
public void applyMRFilter() {
assertFalse(ConfidenceMode.applyMutantReadFilter(null, null, -1));
diff --git a/qcommon/src/org/qcmg/common/model/Accumulator.java b/qcommon/src/org/qcmg/common/model/Accumulator.java
index b5b1d66de..8ad6ebc9d 100644
--- a/qcommon/src/org/qcmg/common/model/Accumulator.java
+++ b/qcommon/src/org/qcmg/common/model/Accumulator.java
@@ -43,10 +43,10 @@ public class Accumulator {
private final int position;
- private short failedFilterACount = 0;
- private short failedFilterCCount = 0;
- private short failedFilterGCount = 0;
- private short failedFilterTCount = 0;
+ private TLongList failedFilterACount;
+ private TLongList failedFilterCCount;
+ private TLongList failedFilterGCount;
+ private TLongList failedFilterTCount;
private TLongList readNameHashStrandBasePositionQualities;
@@ -58,19 +58,23 @@ public int getPosition() {
return position;
}
- public void addFailedFilterBase(final byte base) {
+ public void addFailedFilterBase(final byte base, long readNameHash) {
switch (base) {
case A_BYTE:
- failedFilterACount++;
+ if (null == failedFilterACount) failedFilterACount = new TLongArrayList();
+ failedFilterACount.add(readNameHash);
break;
case C_BYTE:
- failedFilterCCount++;
+ if (null == failedFilterCCount) failedFilterCCount = new TLongArrayList();
+ failedFilterCCount.add(readNameHash);
break;
case G_BYTE:
- failedFilterGCount++;
+ if (null == failedFilterGCount) failedFilterGCount = new TLongArrayList();
+ failedFilterGCount.add(readNameHash);
break;
case T_BYTE:
- failedFilterTCount++;
+ if (null == failedFilterTCount) failedFilterTCount = new TLongArrayList();
+ failedFilterTCount.add(readNameHash);
break;
default: /* do nothing */
break;
@@ -120,17 +124,17 @@ public String toString() {
public String getFailedFilterPileup() {
StringBuilder sb = new StringBuilder();
- if (failedFilterACount > 0) {
- StringUtils.updateStringBuilder(sb, A_STRING + failedFilterACount, Constants.SEMI_COLON);
+ if (null != failedFilterACount && ! failedFilterACount.isEmpty()) {
+ StringUtils.updateStringBuilder(sb, A_STRING + failedFilterACount.size(), Constants.SEMI_COLON);
}
- if (failedFilterCCount > 0) {
- StringUtils.updateStringBuilder(sb, C_STRING + failedFilterCCount, Constants.SEMI_COLON);
+ if (null != failedFilterCCount && ! failedFilterCCount.isEmpty()) {
+ StringUtils.updateStringBuilder(sb, C_STRING + failedFilterCCount.size(), Constants.SEMI_COLON);
}
- if (failedFilterGCount > 0) {
- StringUtils.updateStringBuilder(sb, G_STRING + failedFilterGCount, Constants.SEMI_COLON);
+ if (null != failedFilterGCount && ! failedFilterGCount.isEmpty()) {
+ StringUtils.updateStringBuilder(sb, G_STRING + failedFilterGCount.size(), Constants.SEMI_COLON);
}
- if (failedFilterTCount > 0) {
- StringUtils.updateStringBuilder(sb, T_STRING + failedFilterTCount, Constants.SEMI_COLON);
+ if (null != failedFilterTCount && ! failedFilterTCount.isEmpty()) {
+ StringUtils.updateStringBuilder(sb, T_STRING + failedFilterTCount.size(), Constants.SEMI_COLON);
}
return !sb.isEmpty() ? sb.toString() : Constants.MISSING_DATA_STRING;
}
@@ -140,4 +144,23 @@ public int getCoverage() {
return null == readNameHashStrandBasePositionQualities ? 0 : readNameHashStrandBasePositionQualities.size() / 2;
}
+ public boolean isEmpty() {
+ return null == readNameHashStrandBasePositionQualities && null == failedFilterACount && null == failedFilterCCount && null == failedFilterGCount && null == failedFilterTCount;
+ }
+
+ public TLongList getFailedFilterACount() {
+ return failedFilterACount;
+ }
+
+ public TLongList getFailedFilterCCount() {
+ return failedFilterCCount;
+ }
+
+ public TLongList getFailedFilterGCount() {
+ return failedFilterGCount;
+ }
+
+ public TLongList getFailedFilterTCount() {
+ return failedFilterTCount;
+ }
}
diff --git a/qcommon/src/org/qcmg/common/util/AccumulatorUtils.java b/qcommon/src/org/qcmg/common/util/AccumulatorUtils.java
index 44d357230..79e0653b2 100644
--- a/qcommon/src/org/qcmg/common/util/AccumulatorUtils.java
+++ b/qcommon/src/org/qcmg/common/util/AccumulatorUtils.java
@@ -60,7 +60,6 @@ public class AccumulatorUtils {
public static final long T_BASE_BIT = 0x400000000000000L;
public static final int T_BASE_BIT_POSITION = 58;
-
public static final long STRAND_BIT = 0x8000000000000000L;
public static final int STRAND_BIT_POSITION = 63;
public static final long END_OF_READ_BIT = 0x4000000000000000L;
@@ -69,7 +68,7 @@ public class AccumulatorUtils {
/**
* This removes reads that have the same read name hash from the accumulator.
- * If
+ *
* If the duplicates have the same base, then 1 is left, if they have different bases, they are both (all?) removed
*
* This method updates the Accumulator object that is passed in, and is therefore not side-effect free
@@ -236,6 +235,7 @@ public static int[] getBaseCountByStrand(int[] array, char c) {
* strand (bit 63)
* end of read (bit 62)
* base (bits 58-61)
+ * passedFilter (bit 57)
* quality (bits 32-40)
* position (bits 0-31)
*
@@ -1014,8 +1014,9 @@ public static TLongIntMap getReadNameHashStartPositionMap(Accumulator acc) {
if (null != acc) {
TLongList list = acc.getData();
if (null != list) {
- TLongIntMap map = new TLongIntHashMap(list.size() * 2);
- for (int i = 0, len = list.size(); i < len; i += 2) {
+ int len = list.size();
+ TLongIntMap map = new TLongIntHashMap(len);
+ for (int i = 0; i < len; i += 2) {
int startPosition = (int) list.get(i + 1);
if (((list.get(i + 1) >>> STRAND_BIT_POSITION) & 1) == 0) {
@@ -1038,8 +1039,9 @@ public static TLongCharMap getReadNameHashBaseMap(Accumulator acc) {
if (null != acc) {
TLongList list = acc.getData();
if (null != list) {
- TLongCharMap map = new TLongCharHashMap(list.size() * 2);
- for (int i = 0, len = list.size(); i < len; i += 2) {
+ int len = list.size();
+ TLongCharMap map = new TLongCharHashMap(len);
+ for (int i = 0; i < len; i += 2) {
char base = getBaseAsCharFromLong(list.get(i + 1));
if (((list.get(i + 1) >>> STRAND_BIT_POSITION) & 1) == 0) {
diff --git a/qcommon/test/org/qcmg/common/model/AccumulatorTest.java b/qcommon/test/org/qcmg/common/model/AccumulatorTest.java
index 3994106a6..31e8c0e89 100644
--- a/qcommon/test/org/qcmg/common/model/AccumulatorTest.java
+++ b/qcommon/test/org/qcmg/common/model/AccumulatorTest.java
@@ -81,11 +81,12 @@ public void endOfReads() {
public void testUnfilteredPileup() {
Accumulator acc = new Accumulator(1);
String basesString = "ACGT";
- for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b);
+ long readNameHash = 1;
+ for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b, readNameHash);
assertEquals("A1;C1;G1;T1", acc.getFailedFilterPileup());
- for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b);
+ for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b, readNameHash + 1);
assertEquals("A2;C2;G2;T2", acc.getFailedFilterPileup());
- for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b);
+ for (byte b : basesString.getBytes()) acc.addFailedFilterBase(b, readNameHash + 2);
assertEquals("A3;C3;G3;T3", acc.getFailedFilterPileup());
}
@@ -97,8 +98,9 @@ public void testUnfilteredPileupPercentage() {
}
String basesString = "GG";
+ long readNameHash = 1;
for (byte b : basesString.getBytes()) {
- acc.addFailedFilterBase(b);
+ acc.addFailedFilterBase(b, readNameHash++);
}
assertEquals("G2", acc.getFailedFilterPileup());
/*
@@ -106,7 +108,7 @@ public void testUnfilteredPileupPercentage() {
*/
basesString = "G";
for (byte b : basesString.getBytes()) {
- acc.addFailedFilterBase(b);
+ acc.addFailedFilterBase(b, readNameHash++);
}
assertEquals("G3", acc.getFailedFilterPileup());
}
@@ -115,31 +117,32 @@ public void testUnfilteredPileupPercentage() {
@Test
public void singleUnfilteredPileup() {
Accumulator acc = new Accumulator(1);
- for (byte b : "ACGT".getBytes()) acc.addFailedFilterBase(b);
+ long readNameHash = 1;
+ for (byte b : "ACGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
assertEquals("A1;C1;G1;T1", acc.getFailedFilterPileup());
acc = new Accumulator(1);
- for (byte b : "ACGTA".getBytes()) acc.addFailedFilterBase(b);
+ for (byte b : "ACGTA".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
assertEquals("A2;C1;G1;T1", acc.getFailedFilterPileup());
acc = new Accumulator(1);
- for (byte b : "ACCGT".getBytes()) acc.addFailedFilterBase(b);
+ for (byte b : "ACCGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
assertEquals("A1;C2;G1;T1", acc.getFailedFilterPileup());
acc = new Accumulator(1);
- for (byte b : "ATTTGT".getBytes()) acc.addFailedFilterBase(b);
+ for (byte b : "ATTTGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
assertEquals("A1;G1;T4", acc.getFailedFilterPileup());
acc = new Accumulator(1);
- for (byte b : "AAAATTTGT".getBytes()) acc.addFailedFilterBase(b);
+ for (byte b : "AAAATTTGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
assertEquals("A4;G1;T4", acc.getFailedFilterPileup());
acc = new Accumulator(1);
- for (byte b : "AAAACTTTCGT".getBytes()) acc.addFailedFilterBase(b);
+ for (byte b : "AAAACTTTCGT".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
assertEquals("A4;C2;G1;T4", acc.getFailedFilterPileup());
acc = new Accumulator(1);
- for (byte b : "AAAACTTTCGTG".getBytes()) acc.addFailedFilterBase(b);
+ for (byte b : "AAAACTTTCGTG".getBytes()) acc.addFailedFilterBase(b, readNameHash++);
assertEquals("A4;C2;G2;T4", acc.getFailedFilterPileup());
}
@@ -217,7 +220,7 @@ public void getGenotypeRealLife() {
Accumulator acc = new Accumulator(1);
for (int i = 1; i <= 60; i++) acc.addBase((byte) 'G', (byte) 40, false, 1, 1, 2, i);
for (int i = 1; i <= 5; i++) acc.addBase((byte) 'C', (byte) 42, false, 1, 1, 2, i + 61);
- for (int i = 1; i <= 1; i++) acc.addBase((byte) 'C', (byte) 42, true, 1, 1, 2, i + 67);
+ for (int i = 1; i == 1; i++) acc.addBase((byte) 'C', (byte) 42, true, 1, 1, 2, i + 67);
assertEquals("C1[42]5[42];G0[0]60[40]", AccumulatorUtils.getOABS(acc));
/*
diff --git a/qsnp/src/org/qcmg/snp/Pipeline.java b/qsnp/src/org/qcmg/snp/Pipeline.java
index 83ccb9f8e..86179232e 100644
--- a/qsnp/src/org/qcmg/snp/Pipeline.java
+++ b/qsnp/src/org/qcmg/snp/Pipeline.java
@@ -28,6 +28,7 @@
import java.io.File;
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
@@ -35,7 +36,6 @@
import java.util.Calendar;
import java.util.Collections;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.Queue;
@@ -133,9 +133,7 @@ public abstract class Pipeline {
int[] testStartPositions;
int noOfControlFiles;
int noOfTestFiles;
- boolean includeIndels;
- int mutationId;
-
+
List controlRules = new ArrayList<>(4);
List testRules = new ArrayList<>(4);
@@ -354,13 +352,13 @@ void writeVCF(String outputFileName) throws Exception {
try (FastaSequenceFile fsf = new FastaSequenceFile(new File(referenceFile), true)) {
if (null != fsf.getSequenceDictionary()) {
refFileContigs = fsf.getSequenceDictionary().getSequences()
- .stream().map(ssr -> ssr.getSequenceName()).collect(Collectors.toList());
+ .stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.toList());
}
}
}
snps.sort(refFileContigs.isEmpty() ? null : ChrPositionComparator.getVcfRecordComparator(refFileContigs));
- try (RecordWriter writer = new RecordWriter<>(new File(outputFileName));) {
+ try (RecordWriter writer = new RecordWriter<>(new File(outputFileName))) {
final VcfHeader header = getHeaderForQSnp(patientId, controlSampleId, testSampleId, "qSNP v" + Main.version, normalBamIds, tumourBamIds, qexec.getUuid().getValue());
VcfHeaderUtils.addQPGLineToHeader(header, qexec.getToolName().getValue(), qexec.getToolVersion().getValue(), qexec.getCommandLine().getValue()
+ (StringUtils.isNullOrEmpty(runMode) ? "" : " [runMode: " + runMode + "]"));
@@ -578,7 +576,7 @@ static boolean isPileupRecordAKeeper(int variantCount, int coverage, Rule rule,
static boolean isVariantOnBothStrands(List baseCounts) {
final PileupElement pe = PileupElementUtil.getLargestVariant(baseCounts);
- return null == pe ? false : pe.isFoundOnBothStrands();
+ return null != pe && pe.isFoundOnBothStrands();
}
/**
@@ -653,19 +651,16 @@ void walkBams(boolean includeDups) throws Exception {
Accumulator [] controlAccs = new Accumulator[1024 * 1024 * 256];
Accumulator [] testAccs = new Accumulator[1024 * 1024 * 256];
- final CyclicBarrier barrier = new CyclicBarrier(noOfThreads, new Runnable() {
- @Override
- public void run() {
- // reset the minStartPositions values to zero
- controlMinStart.set(0);
- testMinStart.set(0);
-
- // update the reference bases array
- loadNextReferenceSequence();
-
- logger.info("barrier has been reached by all threads - moving onto next chromosome");
- }
- });
+ final CyclicBarrier barrier = new CyclicBarrier(noOfThreads, () -> {
+ // reset the minStartPositions values to zero
+ controlMinStart.set(0);
+ testMinStart.set(0);
+
+ // update the reference bases array
+ loadNextReferenceSequence();
+
+ logger.info("barrier has been reached by all threads - moving onto next chromosome");
+ });
final ExecutorService service = Executors.newFixedThreadPool(noOfThreads);
final CountDownLatch consumerLatch = new CountDownLatch(consumerLatchSize);
final CountDownLatch controlProducerLatch = new CountDownLatch(1);
@@ -737,7 +732,7 @@ public class Producer implements Runnable {
private final CyclicBarrier barrier;
private final boolean includeDups;
private final boolean runqBamFilter;
- private Accumulator [] accum;
+ private final Accumulator [] accum;
private XXHash64 xxhash64;
private final static int seed = 0x9747b28c; // used to initialize the hash value, use whatever value you want, but always the same
private final static int ONE_MILLION = 1_000_000;
@@ -746,7 +741,7 @@ public Producer(final String[] bamFiles, final CountDownLatch latch, final boole
final Queue samQueue, final Thread mainThread, final String query,
final CyclicBarrier barrier, boolean includeDups, Accumulator [] accum) throws Exception {
this.latch = latch;
- final Set bams = new HashSet();
+ final Set bams = new HashSet<>();
for (final String bamFile : bamFiles) {
bams.add(new File(bamFile));
}
@@ -776,7 +771,7 @@ public void run() {
try {
boolean keepRunning = true;
-
+
while (keepRunning) {
@@ -796,13 +791,13 @@ public void run() {
while (iter.hasNext()) {
final SAMRecord record = iter.next();
-
+
if (++ counter > ONE_MILLION) {
higherOrderCounter++;
counter = 0;
int qSize = queue.size();
logger.info("hit " + higherOrderCounter + "M sam records, passed filter: " + passedFilterCount + ", qsize: " + qSize);
- if (passedFilterCount == 0 && (counter + (ONE_MILLION * higherOrderCounter)) >= noOfRecordsFailingFilter) {
+ if (passedFilterCount == 0 && (counter + ((long) ONE_MILLION * higherOrderCounter)) >= noOfRecordsFailingFilter) {
throw new SnpException("INVALID_FILTER", ""+ (counter + (ONE_MILLION * higherOrderCounter)));
}
while (qSize > 10000) {
@@ -894,10 +889,23 @@ private void addRecordToQueue(final SAMRecord record, final boolean passesFilte
passedFilterCount++;
}
record.getCigar(); // cache cigar for all records
- record.getAlignmentEnd(); // cache alignment end for all records
-
- final SAMRecordFilterWrapper wrapper = new SAMRecordFilterWrapper(record, xxhash64.hash(record.getReadName().getBytes(), 0, record.getReadNameLength(), seed));
+ int end = record.getAlignmentEnd(); // cache alignment end for all records
+ int start = record.getAlignmentStart();
+ final SAMRecordFilterWrapper wrapper = new SAMRecordFilterWrapper(record, xxhash64.hash(record.getReadName().getBytes(),0, record.getReadName().length(), seed));
wrapper.setPassesFilter(passesFilter);
+
+ /*
+ setup Accumulators for this read
+ */
+ int startPosition = Math.min(start, end);
+ int endPosition = Math.max(start, end);
+ for (int i = startPosition; i <= endPosition; i++) {
+ Accumulator acc = accum[i];
+ if (null == acc) {
+ accum[i] = new Accumulator(i);
+ }
+ }
+
queue.add(wrapper);
}
}
@@ -935,12 +943,14 @@ public Consumer(final CountDownLatch consumerLatch, final CountDownLatch normalL
public void processSAMRecord(final SAMRecordFilterWrapper record) {
final SAMRecord sam = record.getRecord();
final boolean forwardStrand = ! sam.getReadNegativeStrandFlag();
+ final boolean passesFilter = record.getPassesFilter();
final int startPosition = sam.getAlignmentStart();
// endPosition is just that for reverse strand, but for forward strand reads it is start position
final int endPosition = sam.getAlignmentEnd();
final byte[] bases = sam.getReadBases();
- final byte[] qualities = record.getPassesFilter() ? sam.getBaseQualities() : null;
+ final byte[] qualities = passesFilter ? sam.getBaseQualities() : null;
final Cigar cigar = sam.getCigar();
+ final long readNameHash = record.getPosition();
int referenceOffset = 0, offset = 0;
@@ -951,8 +961,8 @@ public void processSAMRecord(final SAMRecordFilterWrapper record) {
if (co.consumesReferenceBases() && co.consumesReadBases()) {
// we have a number (length) of bases that can be advanced.
updateMapWithAccums(startPosition, bases,
- qualities, forwardStrand, offset, length, referenceOffset,
- record.getPassesFilter(), endPosition, record.getPosition());
+ qualities, forwardStrand, offset, length, referenceOffset,
+ passesFilter, endPosition, readNameHash);
// advance offsets
referenceOffset += length;
offset += length;
@@ -984,16 +994,18 @@ public void updateMapWithAccums(int startPosition, final byte[] bases, final byt
final int startPosAndRefOffset = startPosition + referenceOffset;
for (int i = 0 ; i < length ; i++) {
- Accumulator acc = array[i + startPosAndRefOffset];
+ int currentPos = i + startPosAndRefOffset;
+ Accumulator acc = array[currentPos];
if (null == acc) {
- acc = new Accumulator(i + startPosAndRefOffset);
- array[i + startPosAndRefOffset] = acc;
+ acc = new Accumulator(currentPos);
+ array[currentPos] = acc;
}
- if (passesFilter && qualities[i + offset] >= minBaseQual) {
- acc.addBase(bases[i + offset], qualities[i + offset], forwardStrand,
- startPosition, i + startPosAndRefOffset, readEndPosition, readNameHash);
+ int iPlusOffset = i + offset;
+ if (passesFilter && qualities[iPlusOffset] >= minBaseQual) {
+ acc.addBase(bases[iPlusOffset], qualities[iPlusOffset], forwardStrand,
+ startPosition, currentPos, readEndPosition, readNameHash);
} else {
- acc.addFailedFilterBase(bases[i + offset]);
+ acc.addFailedFilterBase(bases[iPlusOffset], readNameHash);
}
}
}
@@ -1045,7 +1057,7 @@ public void run() {
if (barrier.getNumberWaiting() >= (singleSampleMode ? 1 : 2)) {
// logger.info("null record, barrier count > 2 - what now??? q.size: " + queue.size());
// just me left
- if (queue.size() == 0 ) {
+ if (queue.isEmpty()) {
logger.info("Consumer: Processed all records in " + currentChr + ", waiting at barrier");
try {
@@ -1243,8 +1255,15 @@ private void processControlAndTest(Accumulator controlAcc, Accumulator testAcc)
}
- private void interrogateAccumulations(final Accumulator control, final Accumulator test) {
-
+ private void interrogateAccumulations(Accumulator control, Accumulator test) {
+
+ if (null != control && control.isEmpty()) {
+ control = null;
+ }
+ if (null != test && test.isEmpty()) {
+ test = null;
+ }
+
// get coverage for both normal and tumour
int controlCoverage = null != control ? control.getCoverage() : 0;
int testCoverage = null != test ? test.getCoverage() : 0;
@@ -1262,9 +1281,9 @@ private void interrogateAccumulations(final Accumulator control, final Accumulat
final int position = control != null ? control.getPosition() : test.getPosition();
// if we are over the length of this particular sequence - return
- if (position-1 >= referenceBasesLength) return;
+ if (position - 1 >= referenceBasesLength) return;
- char ref = (char) referenceBases[position-1];
+ char ref = (char) referenceBases[position - 1];
if ( ! BaseUtils.isACGT(ref)) {
logger.warn("ignoring potential snp at " + currentChr + ":" + position + " - don't deal with ref values of: " + ref);
} else {
@@ -1343,7 +1362,7 @@ private void interrogateAccumulations(final Accumulator control, final Accumulat
/*
* attempt to add format field information
*/
- List ff = new ArrayList(4);
+ List ff = new ArrayList<>(4);
ff.add(header);
if ( ! singleSampleMode) {
@@ -1363,15 +1382,7 @@ private void interrogateAccumulations(final Accumulator control, final Accumulat
}
}
}
-
- /**
- * Overloaded method
- * @see compoundSnps(boolean complete)
- */
- void compoundSnps() {
- compoundSnps(true);
- }
-
+
public static List getRecordsAtPosition(SamReader reader, String contig, int position) {
SAMRecordIterator iter = reader.query(contig, position, position, false);
List recs = new ArrayList<>();
@@ -1450,15 +1461,9 @@ void compoundSnps(boolean complete) {
}
}
- if (toRemove.size() > 0) {
+ if (!toRemove.isEmpty()) {
logger.info("About to call remove with toRemove size: " + toRemove.size());
- Iterator iter = snps.iterator();
- while (iter.hasNext()) {
- VcfRecord v = iter.next();
- if (toRemove.contains(v)) {
- iter.remove();
- }
- }
+ snps.removeIf(toRemove::contains);
logger.info("About to call remove with toRemove size: " + toRemove.size() + " - DONE");
}
diff --git a/qsnp/src/org/qcmg/snp/util/GenotypeUtil.java b/qsnp/src/org/qcmg/snp/util/GenotypeUtil.java
index d19f229e3..e0724e75e 100644
--- a/qsnp/src/org/qcmg/snp/util/GenotypeUtil.java
+++ b/qsnp/src/org/qcmg/snp/util/GenotypeUtil.java
@@ -76,7 +76,7 @@ public static String getFormatValues(Accumulator acc, String gt, String alt, cha
StringBuilder sb = new StringBuilder();
sb.append(null != gt ? gt : Constants.MISSING_GT).append(Constants.COLON);
- sb.append(VcfUtils.getAD(""+ref, alt, oabs)).append(Constants.COLON);
+ sb.append(VcfUtils.getAD("" + ref, alt, oabs)).append(Constants.COLON);
sb.append(null == acc ? Constants.MISSING_DATA_STRING :acc.getCoverage()).append(Constants.COLON);
/*
* adding EOR (end of reads -similar in format to FF)
diff --git a/qsnp/src/org/qcmg/snp/util/PipelineUtil.java b/qsnp/src/org/qcmg/snp/util/PipelineUtil.java
index 385270cce..34cc942c0 100644
--- a/qsnp/src/org/qcmg/snp/util/PipelineUtil.java
+++ b/qsnp/src/org/qcmg/snp/util/PipelineUtil.java
@@ -1,16 +1,10 @@
package org.qcmg.snp.util;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
+import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
+import gnu.trove.set.hash.TLongHashSet;
import org.qcmg.common.log.QLogger;
import org.qcmg.common.log.QLoggerFactory;
import org.qcmg.common.model.Accumulator;
@@ -41,7 +35,6 @@
public class PipelineUtil {
public static final String OPEN_CLOSE_BRACKETS = "[]";
- public static final String ZERO_ZERO_GT = "0/0";
private static final QLogger logger = QLoggerFactory.getLogger(PipelineUtil.class);
public static List> listOfListOfAdjacentVcfs(List snps) {
@@ -103,15 +96,15 @@ public static List> listOfListOfAdjacentVcfs(List snp
*/
public static List getAltStringAndGenotypes(List control, List test, String reference) {
if (StringUtils.isNullOrEmpty(reference)) {
- throw new IllegalArgumentException("Null or empty reference passed to PipelIneUtil.getAltStringAndGenotypes");
+ throw new IllegalArgumentException("Null or empty reference passed to PipelineUtil.getAltStringAndGenotypes");
}
Listallels = new ArrayList<>(5);
allels.add(reference);
if (null != control) {
- allels.addAll(control.stream().distinct().filter(s -> isAltFreeOfRef(s, reference)).collect(Collectors.toList()));
+ allels.addAll(control.stream().distinct().filter(s -> isAltFreeOfRef(s, reference)).toList());
}
if (null != test) {
- allels.addAll(test.stream().distinct().filter(s -> isAltFreeOfRef(s, reference)).collect(Collectors.toList()));
+ allels.addAll(test.stream().distinct().filter(s -> isAltFreeOfRef(s, reference)).toList());
}
allels = allels.stream().distinct().collect(Collectors.toList());
@@ -131,7 +124,7 @@ public static List getAltStringAndGenotypes(List control, List -1 ? Constants.MISSING_GT : controlGT[0] + Constants.SLASH_STRING + controlGT[1];
String tgt = Arrays.binarySearch(testGT, (short) -1) > -1 ? Constants.MISSING_GT : testGT[0] + Constants.SLASH_STRING + testGT[1];
- return Arrays.asList(allels.isEmpty() ? Constants.MISSING_DATA_STRING : allels.stream().collect(Collectors.joining(Constants.COMMA_STRING)), cgt, tgt);
+ return Arrays.asList(allels.isEmpty() ? Constants.MISSING_DATA_STRING : String.join(Constants.COMMA_STRING, allels), cgt, tgt);
}
/**
@@ -167,28 +160,32 @@ public static short[] getGenotypeArray(List sampleAlleles, List
}
/**
- * Checks each character in the alt string against the corresponding character in the alt string. If any are the same, returns true. False otherwise
- * @param alt
- * @param ref
- * @return
+ * Checks if the provided alternate string ('alt') is completely free of any reference bases ('ref').
+ * The method ensures that all characters in 'alt' differ from their corresponding characters in 'ref'.
+ * If either 'alt' or 'ref' is null, empty, or contains '.', or if their lengths do not match,
+ * the method returns true, assuming 'alt' is free of any reference comparison.
+ *
+ * @param alt The alternate string to be checked. Must not be null, empty, or contain missing data (".").
+ * @param ref The reference string to compare against. Must not be null, empty, or contain missing data (".").
+ * @return true if 'alt' is free of any characters from 'ref' or the inputs are invalid; otherwise, false.
*/
public static boolean isAltFreeOfRef(String alt, String ref) {
/*
* alt and ref must be not null, empty, missing data, and must be the same length
*/
- if ( ! StringUtils.isNullOrEmptyOrMissingData(alt) && ! StringUtils.isNullOrEmptyOrMissingData(ref)) {
- int len = alt.length();
- if (len == ref.length()) {
- for (int i = 0 ; i < len ; i++) {
- if (alt.charAt(i) == ref.charAt(i)) {
- return false;
- }
- }
+ if (alt == null || ref == null || alt.isEmpty() || ref.isEmpty() || ".".equals(alt) || ".".equals(ref) || alt.length() != ref.length()) {
+ return true;
+ }
+
+ int len = alt.length();
+ for (int i = 0 ; i < len ; i++) {
+ if (alt.charAt(i) == ref.charAt(i)) {
+ return false;
}
}
-
- return true;
+
+ return true; // No matches found, alt is free of ref.
}
/**
@@ -202,14 +199,11 @@ public static List getBasesForGenotype(Map basesAndCoun
if (null != basesAndCounts) {
List genotypeBases = basesAndCounts.entrySet().stream()
- .filter(e -> ! e.getKey().contains("_"))
- .filter(e -> e.getValue().length == 4)
- .filter(e -> (e.getValue()[0] + e.getValue()[2]) >= minimumCoverage )
- .filter(e -> e.getKey().equals(ref) || isAltFreeOfRef(e.getKey(), ref))
+ .filter(e -> e.getValue().length == 4 && (e.getValue()[0] + e.getValue()[2]) >= minimumCoverage && (e.getKey().equals(ref) || isAltFreeOfRef(e.getKey(), ref)) && ! e.getKey().contains("_"))
.sorted(
Comparator.comparing((Map.Entry e) -> e.getValue()[0] + e.getValue()[2], Comparator.reverseOrder())
.thenComparing(e -> e.getValue()[0] > 0 && e.getValue()[2] > 0, Comparator.reverseOrder()))
- .map(e -> e.getKey())
+ .map(Map.Entry::getKey)
.collect(Collectors.toList());
if (genotypeBases.size() > 2) {
@@ -223,19 +217,22 @@ public static List getBasesForGenotype(Map basesAndCoun
/**
* Returns the Observed Alleles By Strand for this map of bases and counts.
- *
- * @param basesAndCounts
- * @return
+ *
+ * @param basesAndCounts a map where the key is a string and the value is an array of
+ * short integers; used to generate a formatted string if certain
+ * conditions are met
+ * @return an Optional containing the formatted string if the input map is not null and
+ * contains matching entries, otherwise an empty Optional
*/
public static Optional getOABS(Map basesAndCounts) {
if (null != basesAndCounts) {
String oabs = basesAndCounts.entrySet().stream()
.filter(e -> e.getValue().length == 4)
- .sorted((e1, e2) -> e1.getKey().compareTo(e2.getKey()))
+ .sorted(Map.Entry.comparingByKey())
.map(e -> e.getKey() + e.getValue()[0] + OPEN_CLOSE_BRACKETS + e.getValue()[2] + OPEN_CLOSE_BRACKETS)
.collect(Collectors.joining(Constants.SEMI_COLON_STRING));
- return Optional.ofNullable(oabs.length() > 0 ? oabs : null);
+ return Optional.ofNullable(!oabs.isEmpty() ? oabs : null);
}
return Optional.empty();
}
@@ -297,9 +294,7 @@ public static Map getBasesFromAccumulators(List ac
StringBuilder sb = moReadIdsAndBases.get(j);
if (null == sb) {
sb = new StringBuilder();
- for (int k = 0 ; k < ai.get() ; k++) {
- sb.append('_');
- }
+ sb.append("_".repeat(Math.max(0, ai.get())));
moReadIdsAndBases.put(j, sb );
}
sb.append(c);
@@ -416,10 +411,16 @@ public static int getUniqueCount(TIntIntMap map, TIntList list, boolean fs) {
return set.size();
}
+ /**
+ * Determines if the given string contains at least one lowercase character.
+ *
+ * @param s the string to be checked for lowercase characters
+ * @return true if the string contains at least one lowercase character, false otherwise
+ */
public static boolean isStringLowerCase(String s) {
if (null != s) {
for (char c : s.toCharArray()) {
- if (Character.isLetter(c) && Character.isLowerCase(c)) {
+ if (Character.isLowerCase(c)) {
return true;
}
}
@@ -428,11 +429,17 @@ public static boolean isStringLowerCase(String s) {
}
/**
- * for each element in the list, get the corresponding value in the map, and return the unique count of these values
- * In this map, strandedness is the sign in the int value, the long key is the readname hashcode, and all 64 bits are used for this.
- * @param map
- * @param list
- * @return
+ * Computes the count of unique integers based on the given map and list.
+ * The method evaluates the elements in the list, retrieves their corresponding values
+ * from the map, and filters them based on the specified boolean flag.
+ * Only absolute values of the integers matching the condition are considered, and duplicates
+ * are ignored.
+ *
+ * @param map a mapping of long keys to integer values that is used to determine the start positions
+ * @param list a list of long values to be evaluated
+ * @param fs a flag that determines the criteria for filtering the map values; true for positive
+ * values and false for negative values
+ * @return the count of unique absolute values matching the specified criteria
*/
public static int getUniqueCount(TLongIntMap map, TLongList list, boolean fs) {
TIntSet set = new TIntHashSet();
@@ -446,96 +453,100 @@ public static int getUniqueCount(TLongIntMap map, TLongList list, boolean fs) {
return set.size();
}
-// /**
-// * Returns a VcfRecord with just the positional and ref and alt information provided. Does not contain filter, info, format etc.
-// * @param vcfs
-// * @return
-// */
-// public static VcfRecord createSkeletonCompoundSnp(List vcfs) {
-// /*
-// * sort list
-// */
-// vcfs.sort(null);
-// StringBuilder ref = new StringBuilder();
-// StringBuilder alt = new StringBuilder();
-// ChrPosition startPosition = vcfs.get(0).getChrPosition();
-//
-// for (VcfRecord v : vcfs) {
-// ref.append(v.getRefChar());
-// alt.append(v.getAlt());
-// }
-// return VcfUtils.createVcfRecord(startPosition, null, ref.toString(), alt.toString());
-// }
-//
-
+ /**
+ * Processes a map of VcfRecord to a pair of Accumulator objects and separates them into two lists:
+ * one for control accumulators and another for test accumulators. Entries with non-null accumulators
+ * are added to their respective lists.
+ *
+ * @param vcfs A map where keys are VcfRecord objects and values are pairs of Accumulator objects.
+ * The left value in the pair corresponds to the control accumulator, and the right value
+ * corresponds to the test accumulator.
+ * @return A pair of lists, where the first list contains the control accumulators and the second
+ * list contains the test accumulators.
+ */
public static Pair, List> getAccs(Map> vcfs) {
- /*
- * sort keys in map
- */
- List l = new ArrayList<>(vcfs.keySet());
- l.sort(null);
-
- /*
- * extract left for control, right for test
- */
+ // Preallocate lists for control and test accumulators based on map size
List cAccs = new ArrayList<>(vcfs.size() + 1);
List tAccs = new ArrayList<>(vcfs.size() + 1);
-
- for (VcfRecord v : l) {
- Pair p = vcfs.get(v);
- if (null != p.left()) {
- cAccs.add(p.left());
- }
- if (null != p.right()) {
- tAccs.add(p.right());
- }
- }
-
- return new Pair<>(cAccs,tAccs);
+
+ // Stream and process the map entries directly
+ vcfs.entrySet()
+ .stream()
+ .sorted(Map.Entry.comparingByKey()) // Optional: sorting based on keys
+ .forEach(entry -> {
+ Pair p = entry.getValue();
+ if (p.left() != null) {
+ cAccs.add(p.left());
+ }
+ if (p.right() != null) {
+ tAccs.add(p.right());
+ }
+ });
+
+ // Return the final pair
+ return new Pair<>(cAccs, tAccs);
+
}
public static Optional getReference(Collection vcfs) {
- return Optional.ofNullable(vcfs.stream().sorted().map(VcfRecord::getRef).collect(Collectors.joining()));
+ return Optional.of(vcfs.stream().sorted().map(VcfRecord::getRef).collect(Collectors.joining()));
}
/**
- * REturns a count of either the novel starts
- *
- * map contains bases as key, and short array contains 4 elements, which are (in this order):
- * forward strand count
- * forward strand novel starts count
- * reverse strand count
- * reverse strand novel starts count
- *
- * Offset dictates whether you are getting novel starts (offset = 1), or counts (offset = 0)
- *
- *
- * @param map
- * @param key
- * @param offset
- * @return
+ * Retrieves a specific count from the short array associated with the given key in the map.
+ * The short array contains coverage data, and the count is determined by the offset and an adjacent index.
+ * If the key is not found or the short array is null, the method returns 0.
+ *
+ * @param map A map where keys are strings representing specific identifiers, and values are short arrays
+ * containing coverage data.
+ * @param key The key for which the count is to be retrieved.
+ * @param offset The offset within the short array to determine the count.
+ * @return The sum of the value at the offset index and the value at the adjacent index (offset + 2)
+ * in the short array corresponding to the specified key. If the key is not found or the associated
+ * array is null, 0 is returned.
*/
public static int getCount(Map map, String key, int offset) {
short[] sa =map.get(key);
- return (null != sa) ? sa[0 + offset] + sa[2 + offset] : 0;
+ return (null != sa) ? sa[offset] + sa[2 + offset] : 0;
}
/**
- * returns the novel starts counts for both strands for this base (key)
- * @see getCount(Map map, String key, int offset)
- * @param map
- * @param key
- * @return
+ * Retrieves the count of novel starts for the specified key from the given map.
+ * The map contains bases as keys, and each key corresponds to a short array
+ * that holds count information for various metrics. The novel starts count is
+ * calculated by summing the forward strand novel starts count and the reverse strand
+ * novel starts count.
+ *
+ * @param map A map where keys are strings representing bases, and values are
+ * short arrays containing count data. The short array has
+ * 4 elements in the following order:
+ * - Forward strand count
+ * - Forward strand novel starts count
+ * - Reverse strand count
+ * - Reverse strand novel starts count
+ * @param key The specific base key for which to retrieve the novel starts count.
+ * @return The sum of forward strand and reverse strand novel starts counts for
+ * the specified base key. If the key is not found in the map, 0 is returned.
*/
public static int getNovelStartsCounts(Map map, String key) {
return getCount(map, key, 1);
}
/**
- * Returns the total count for both strands for this base
- * @see getCount(Map map, String key, int offset)
- * @param map
- * @param key
- * @return
+ * Retrieves the total counts for the specified key from the given map.
+ * The map contains bases as keys, and each key corresponds to a short array
+ * that holds count information for various metrics. The total count is
+ * calculated by summing the counts from both forward and reverse strands.
+ *
+ * @param map A map where keys are strings representing bases, and values
+ * are short arrays containing count data. The short array has
+ * 4 elements in the following order:
+ * - Forward strand count
+ * - Forward strand novel starts count
+ * - Reverse strand count
+ * - Reverse strand novel starts count
+ * @param key The specific base key for which to retrieve the total counts.
+ * @return The total counts (sum of forward and reverse strand counts) for
+ * the specified base key. If the key is not found in the map, 0 is returned.
*/
public static int getTotalCounts(Map map, String key) {
return getCount(map, key, 0);
@@ -543,10 +554,10 @@ public static int getTotalCounts(Map map, String key) {
public static String[] getMR(Map map, String[] aAlts, int firstG, int secondG) {
if (null == aAlts) {
- throw new IllegalArgumentException("Null or empty alts passed to PipelIneUtil.getMR");
+ throw new IllegalArgumentException("Null or empty alts passed to PipeLineUtil.getMR");
}
if (null == map) {
- throw new IllegalArgumentException("Null map passed to PipelIneUtil.getMR");
+ throw new IllegalArgumentException("Null map passed to PipeLineUtil.getMR");
}
/*
@@ -578,10 +589,10 @@ public static String[] getMR(Map map, String[] aAlts, int first
/*
* if string builder are empty, put missing data in there
*/
- if (mr.length() == 0) {
+ if (mr.isEmpty()) {
mr.append(Constants.MISSING_DATA_STRING);
}
- if (nns.length() == 0) {
+ if (nns.isEmpty()) {
nns.append(Constants.MISSING_DATA_STRING);
}
@@ -600,9 +611,9 @@ public static int getCoverage(Map map) {
/**
* Create compound snp based purely on GATK vcf information.
- * Classification (ie. SOMATIC) must be same for all snps - thats about the only rule...
+ * Classification (i.e. SOMATIC) must be same for all snps - that's about the only rule...
* oh, and the genotypes need to be the same for all control samples and for all test samples
- * eg. 0/0 ->0/1 for all snps in cs
+ * e.g. 0/0 ->0/1 for all snps in cs
*
* @param vcfs
* @return
@@ -636,7 +647,7 @@ public static Optional createCompoundSnpGATK(List vcfs, bo
String [] gtArr = ffMap.get(VcfHeaderUtils.FORMAT_GENOTYPE);
String [] dpArr = ffMap.get(VcfHeaderUtils.FORMAT_READ_DEPTH);
if (null == dpArr) {
- logger.warn("null dp array for rec: " + v.toString());
+ logger.warn("null dp array for rec: " + v);
}
String [] adArr = ffMap.get(VcfHeaderUtils.FORMAT_ALLELIC_DEPTHS);
String [] gqArr = ffMap.get(VcfHeaderUtils.FORMAT_GENOTYPE_QUALITY);
@@ -675,11 +686,11 @@ public static Optional createCompoundSnpGATK(List vcfs, bo
// if gts are the same, alls well
if (singleSampleMode || (controlGTs.stream().distinct().count() == 1 && testGTs.stream().distinct().count() == 1 )) {
- VcfRecord firstRec = vcfs.get(0);
+ VcfRecord firstRec = vcfs.getFirst();
VcfRecord v = VcfUtils.createVcfRecord(firstRec.getChrPosition(), null, csRef, csAlt);
/*
- * sort collections to get lowest value first - thats what we will use
+ * sort collections to get lowest value first - that's what we will use
*/
if ( ! singleSampleMode) {
controlDPs.sort(null);
@@ -705,26 +716,26 @@ public static Optional createCompoundSnpGATK(List vcfs, bo
*/
StringBuilder cSB = null;
if ( ! singleSampleMode) {
- cSB = new StringBuilder(controlGTs.get(0)); //GT
- StringUtils.updateStringBuilder(cSB, controlADs.get(0), Constants.COLON); //AD
- StringUtils.updateStringBuilder(cSB, controlDPs.get(0), Constants.COLON); //DP
+ cSB = new StringBuilder(controlGTs.getFirst()); //GT
+ StringUtils.updateStringBuilder(cSB, controlADs.getFirst(), Constants.COLON); //AD
+ StringUtils.updateStringBuilder(cSB, controlDPs.getFirst(), Constants.COLON); //DP
StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON); //FT
- StringUtils.updateStringBuilder(cSB,controlGQs.get(0), Constants.COLON); // GQ field
+ StringUtils.updateStringBuilder(cSB,controlGQs.getFirst(), Constants.COLON); // GQ field
StringUtils.updateStringBuilder(cSB,cINF, Constants.COLON); // INF field
StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON); //NNS
StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON); //OABS
- StringUtils.updateStringBuilder(cSB, controlQLs.get(0), Constants.COLON); //QL
+ StringUtils.updateStringBuilder(cSB, controlQLs.getFirst(), Constants.COLON); //QL
}
- StringBuilder tSB = new StringBuilder(testGTs.get(0));
- StringUtils.updateStringBuilder(tSB, testADs.get(0), Constants.COLON);
- StringUtils.updateStringBuilder(tSB, testDPs.get(0), Constants.COLON);
+ StringBuilder tSB = new StringBuilder(testGTs.getFirst());
+ StringUtils.updateStringBuilder(tSB, testADs.getFirst(), Constants.COLON);
+ StringUtils.updateStringBuilder(tSB, testDPs.getFirst(), Constants.COLON);
StringUtils.updateStringBuilder(tSB, Constants.MISSING_DATA_STRING, Constants.COLON);
- StringUtils.updateStringBuilder(tSB, testGQs.get(0), Constants.COLON); // GQ field
+ StringUtils.updateStringBuilder(tSB, testGQs.getFirst(), Constants.COLON); // GQ field
StringUtils.updateStringBuilder(tSB, somCount > 0 ? "SOMATIC" : Constants.MISSING_DATA_STRING, Constants.COLON); // INF field
StringUtils.updateStringBuilder(tSB, Constants.MISSING_DATA_STRING, Constants.COLON); //NNS
StringUtils.updateStringBuilder(tSB, Constants.MISSING_DATA_STRING, Constants.COLON); //OABS
- StringUtils.updateStringBuilder(tSB, testQLs.get(0), Constants.COLON); //QL
+ StringUtils.updateStringBuilder(tSB, testQLs.getFirst(), Constants.COLON); //QL
if ( singleSampleMode) {
v.setFormatFields(Arrays.asList("GT:AD:DP:FT:GQ:INF:NNS:OABS:QL", tSB.toString()));
@@ -739,12 +750,36 @@ public static Optional createCompoundSnpGATK(List vcfs, bo
return Optional.empty();
}
-
+
+ public static String formatCBasesCountsNNS(Map cBasesCountsNNS) {
+ if (cBasesCountsNNS == null || cBasesCountsNNS.isEmpty()) {
+ return "cBasesCountsNNS is empty or null";
+ }
+
+ // Prepare StringBuilder to format content
+ StringBuilder sb = new StringBuilder("{");
+ cBasesCountsNNS.forEach((key, counts) -> {
+ sb.append(key)
+ .append(": [")
+ .append(Arrays.toString(counts)) // Format short[] as a string
+ .append("], ");
+ });
+
+ // Remove trailing ", " if it exists and close the string
+ if (sb.length() > 1) {
+ sb.setLength(sb.length() - 2); // Remove last ", "
+ }
+ sb.append("}");
+
+ return sb.toString();
+ }
+
+
public static Optional createCompoundSnp(Map> vcfs, List controlRules, List testRules, boolean runSBias, int sBiasCov, int sBiasAlt) {
Pair, List> p = getAccs(vcfs);
Optional refO = getReference(vcfs.keySet());
- String ref = refO.isPresent() ? refO.get() : null;
+ String ref = refO.orElse(null);
Map cBasesCountsNNS = getBasesFromAccumulators(p.left());
Map tBasesCountsNNS = getBasesFromAccumulators(p.right());
@@ -753,7 +788,7 @@ public static Optional createCompoundSnp(Map createCompoundSnp(Map createCompoundSnp(Map createCompoundSnp(Map oOabs = getOABS(cBasesCountsNNS);
- String oabs = oOabs.isPresent() ? oOabs.get() : Constants.MISSING_DATA_STRING;
-
+ String oabs = oOabs.orElse(Constants.MISSING_DATA_STRING);
+
+ String failedFilter = getFailedFilterCS(p.left());
+
StringBuilder cSB = new StringBuilder(altsAndGTs.get(1));
- StringUtils.updateStringBuilder(cSB, VcfUtils.getAD(ref, altsAndGTs.get(0), oabs), Constants.COLON);
- StringUtils.updateStringBuilder(cSB, controlCov > 0 ? controlCov+"" : "0", Constants.COLON);
+ StringUtils.updateStringBuilder(cSB, VcfUtils.getAD(ref, altsAndGTs.get(0), oabs), Constants.COLON);//GT and
+ StringUtils.updateStringBuilder(cSB, controlCov > 0 ? controlCov + "" : "0", Constants.COLON); //DP
+ StringUtils.updateStringBuilder(cSB, failedFilter, Constants.COLON); // FF (failed filter)
/*
* filters are applied in qannotate now
*/
- StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON);
+ StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON); // FT field
String [] mrNNS = getMR(cBasesCountsNNS, aAlts, controlFirstG, controlSecondG);
- StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON); // INF field
- StringUtils.updateStringBuilder(cSB, mrNNS[1], Constants.COLON);
- StringUtils.updateStringBuilder(cSB, oabs, Constants.COLON);
+ StringUtils.updateStringBuilder(cSB, Constants.MISSING_DATA_STRING, Constants.COLON); // INF field
+ StringUtils.updateStringBuilder(cSB, mrNNS[1], Constants.COLON); // NNS field
+ StringUtils.updateStringBuilder(cSB, oabs, Constants.COLON); // OABS field
oOabs = getOABS(tBasesCountsNNS);
- oabs = oOabs.isPresent() ? oOabs.get() : Constants.MISSING_DATA_STRING;
+ oabs = oOabs.orElse(Constants.MISSING_DATA_STRING);
StringBuilder tSB = new StringBuilder(altsAndGTs.get(2));
- StringUtils.updateStringBuilder(tSB, VcfUtils.getAD(ref, altsAndGTs.get(0), oabs), Constants.COLON);
- StringUtils.updateStringBuilder(tSB, testCov > 0 ? testCov +"" : "0", Constants.COLON);
+ StringUtils.updateStringBuilder(tSB, VcfUtils.getAD(ref, altsAndGTs.get(0), oabs), Constants.COLON);//GT and
+ StringUtils.updateStringBuilder(tSB, testCov > 0 ? testCov + "" : "0", Constants.COLON); //DP
+ failedFilter = getFailedFilterCS(p.right());
+ StringUtils.updateStringBuilder(tSB, failedFilter, Constants.COLON); // FF (failed filter)
/*
* filters are applied in qannotate now
*/
- StringUtils.updateStringBuilder(tSB, Constants.MISSING_DATA_STRING, Constants.COLON);
+ StringUtils.updateStringBuilder(tSB, Constants.MISSING_DATA_STRING, Constants.COLON); // FT field
StringUtils.updateStringBuilder(tSB, (c == Classification.SOMATIC ? VcfHeaderUtils.INFO_SOMATIC : Constants.MISSING_DATA_STRING), Constants.COLON); // INF field
mrNNS = getMR(tBasesCountsNNS,aAlts, testFirstG, testSecondG);
- StringUtils.updateStringBuilder(tSB, mrNNS[1], Constants.COLON);
- StringUtils.updateStringBuilder(tSB, oabs, Constants.COLON);
+ StringUtils.updateStringBuilder(tSB, mrNNS[1], Constants.COLON); // NNS field
+ StringUtils.updateStringBuilder(tSB, oabs, Constants.COLON); // OABS field
- v.setFormatFields(Arrays.asList("GT:AD:DP:FT:INF:NNS:OABS", cSB.toString(), tSB.toString()));
+ v.setFormatFields(Arrays.asList("GT:AD:DP:FF:FT:INF:NNS:OABS", cSB.toString(), tSB.toString()));
- return Optional.ofNullable(v);
+ return Optional.of(v);
+ }
+
+ public static String getFailedFilterCS(List accumulators) {
+ if (null == accumulators || accumulators.isEmpty()) {
+ return Constants.MISSING_DATA_STRING;
+ }
+ Map failedFilters = new THashMap<>();
+ int x = 0;
+ int runningTally = 0;
+ for (Accumulator acc : accumulators) {
+ if (null != acc) {
+ String padding = "_".repeat(x);
+ TLongList count = acc.getFailedFilterACount();
+ if (null != count && ! count.isEmpty()) {
+ TLongHashSet failedFilterSet = new TLongHashSet(count);
+ long[] array = failedFilterSet.toArray();
+ for (long l : array) {
+ failedFilters.computeIfAbsent(l, k -> new StringBuilder(padding)).append("A");
+ }
+ }
+ count = acc.getFailedFilterCCount();
+ if (null != count && ! count.isEmpty()) {
+ TLongHashSet failedFilterSet = new TLongHashSet(count);
+ long[] array = failedFilterSet.toArray();
+ for (long l : array) {
+ failedFilters.computeIfAbsent(l, k -> new StringBuilder(padding)).append("C");
+ }
+ }
+ count = acc.getFailedFilterGCount();
+ if (null != count && ! count.isEmpty()) {
+ TLongHashSet failedFilterSet = new TLongHashSet(count);
+ long[] array = failedFilterSet.toArray();
+ for (long l : array) {
+ failedFilters.computeIfAbsent(l, k -> new StringBuilder(padding)).append("G");
+ }
+ }
+ count = acc.getFailedFilterTCount();
+ if (null != count && ! count.isEmpty()) {
+ TLongHashSet failedFilterSet = new TLongHashSet(count);
+ long[] array = failedFilterSet.toArray();
+ for (long l : array) {
+ failedFilters.computeIfAbsent(l, k -> new StringBuilder(padding)).append("T");
+ }
+ }
+ }
+ if (x >= 1) {
+ /*
+ add padding to any entry in the map that has length less than x
+ */
+ for (Map.Entry e : failedFilters.entrySet()) {
+ if (e.getValue().length() < (x + 1)) {
+ e.getValue().append("_".repeat((x + 1) - e.getValue().length()));
+ }
+ }
+ }
+ x++;
+// logger.info("runningTally: " + runningTally + ", failedFilters.size(): " + failedFilters.size());
+
+ /*
+ purge any entries in the map that have a value greater than length (x + 1)
+ */
+ Iterator> iter = failedFilters.entrySet().iterator();
+ while (iter.hasNext()) {
+ Map.Entry entry = iter.next();
+ if (entry.getValue().length() >= (x + 1)) {
+ iter.remove();
+ }
+ }
+
+ }
+ for (Map.Entry e : failedFilters.entrySet()) {
+ if (e.getValue().length() < x) {
+ e.getValue().append("_".repeat(x - e.getValue().length()));
+ }
+ }
+
+ /*
+ * now we have a map of readIds and failed filters, we need to turn this into a string
+ */
+ Map failedFilterCounts = new HashMap<>();
+ failedFilters.forEach((k,v) -> failedFilterCounts.computeIfAbsent(v.toString(), k1 -> new AtomicInteger()).incrementAndGet());
+
+ StringBuilder outputSB = new StringBuilder();
+ failedFilterCounts.entrySet().stream() .sorted(Map.Entry.comparingByKey())
+ .forEach(e -> StringUtils.updateStringBuilder(outputSB, e.getKey() + e.getValue().get(), ';'));
+
+ return outputSB.isEmpty() ? Constants.MISSING_DATA_STRING : outputSB.toString();
}
}
diff --git a/qsnp/test/org/qcmg/snp/PipelineTest.java b/qsnp/test/org/qcmg/snp/PipelineTest.java
index cfe168dc2..a4e79d91b 100644
--- a/qsnp/test/org/qcmg/snp/PipelineTest.java
+++ b/qsnp/test/org/qcmg/snp/PipelineTest.java
@@ -3,10 +3,7 @@
import static org.junit.Assert.assertEquals;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicInteger;
@@ -122,4 +119,45 @@ private static SAMRecord createSamRec(String s, SAMFileHeader header) {
return rec;
}
+
+ @Test
+ public void testNameHashing() {
+ List names = Arrays.asList( "HGMY7DSX5230415:3:1538:31602:12446",
+ "HGN5JDSX5230415:4:1456:2709:30452",
+ "HGNCTDSX5230415:3:2520:4191:1266",
+ "HGNCTDSX5230415:3:1162:4490:7044",
+ "HGN5JDSX5230415:4:1148:16197:31015",
+ "HGNCTDSX5230415:3:2431:27805:36338",
+ "HGMY7DSX5230415:3:1318:28899:23234",
+ "HGN5JDSX5230415:4:1547:1759:7075",
+ "HGMWYDSX5230415:3:1236:30590:22561",
+ "HGMWYDSX5230415:3:2639:10818:19727",
+ "HGN5JDSX5230415:4:2371:13114:3474",
+ "HGMY7DSX5230415:3:1555:25482:29653",
+ "HGMY7DSX5230415:3:1644:20365:15029",
+ "HGMY7DSX5230415:3:1561:27091:11741",
+ "HGNCTDSX5230415:3:2515:16857:21919",
+ "HGMWYDSX5230415:3:1126:2166:31610",
+ "HGN5JDSX5230415:4:1456:2709:30452",
+ "HGNCTDSX5230415:3:1423:11342:20603",
+ "HGMWYDSX5230415:3:1555:18195:3411",
+ "HGMWYDSX5230415:3:2573:27751:31422",
+ "HGN5JDSX5230415:4:2208:8232:13088",
+ "HGNCTDSX5230415:3:1262:31783:25034",
+ "HGNCTDSX5230415:3:1262:31783:25034",
+ "HGMWYDSX5230415:3:2671:30002:6668",
+ "HGMWYDSX5230415:3:2344:28357:27524",
+ "HGMY7DSX5230415:3:2623:25364:17268",
+ "HGN5JDSX5230415:4:2368:32660:33818");
+ assertEquals(27, names.size());
+ /*
+ put into a set to check for uniqueness
+ */
+ Set nameSet = new HashSet<>(names);
+ assertEquals(25, nameSet.size());
+ /*
+ now hash them and check we get the same amount of unique hashes
+ */
+
+ }
}
diff --git a/qsnp/test/org/qcmg/snp/ProcessBamRecordTest.java b/qsnp/test/org/qcmg/snp/ProcessBamRecordTest.java
index 6bebb2b4b..f6533691a 100644
--- a/qsnp/test/org/qcmg/snp/ProcessBamRecordTest.java
+++ b/qsnp/test/org/qcmg/snp/ProcessBamRecordTest.java
@@ -166,7 +166,7 @@ public void updateMapWithAccums(int startPosition, final byte[] bases, final byt
if (passesFilter)
acc.addBase(bases[i + offset], qualities[i + offset], forwardStrand, startPosition, startPosition + i + referenceOffset, endPosition, 1);
else
- acc.addFailedFilterBase(bases[i + offset]);
+ acc.addFailedFilterBase(bases[i + offset], 2);
}
}
diff --git a/qsnp/test/org/qcmg/snp/util/GenotypeUtilTest.java b/qsnp/test/org/qcmg/snp/util/GenotypeUtilTest.java
index 140eda41c..be45f6f00 100644
--- a/qsnp/test/org/qcmg/snp/util/GenotypeUtilTest.java
+++ b/qsnp/test/org/qcmg/snp/util/GenotypeUtilTest.java
@@ -78,7 +78,7 @@ public void isSomatic3() {
/*
* chr10 54817257 rs386743785 AG GA . . IN=1,2;DB;HOM=0,TTTAACCTTCgaCTTGCCCACA;EFF=intergenic_region(MODIFIER||||||||||1) GT:AD:CCC:CCM:DP:FT:INF:MR:NNS:OABS 1/1:0,19:Germline:32:34:PASS:.:19:19:AA8[]6[];GA8[]11[];TA0[]1[] 0/0:2,0:ReferenceNoVariant:32:55:PASS:SOMATIC:.:.:AA33[]20[];AG1[]1[];A_2[]0[] 1/1:0,19:Germline:32:34:PASS:.:19:19:AA8[]6[];GA8[]11[];TA0[]1[] 0/0:2,0:ReferenceNoVariant:32:55:PASS:SOMATIC:.:.:AA33[]20[];AG1[]1[];A_2[]0[]
*
- * need to decomose this cs to see if the individual snps are somatic - should be germline
+ * need to decompose this cs to see if the individual snps are somatic - should be germline
*/
//AA8[]6[];GA8[]11[];TA0[]1[]
@@ -126,8 +126,8 @@ public void getFilters() {
control = AccumulatorUtils.createFromOABS(cOABS, 13118);
test = AccumulatorUtils.createFromOABS(tOABS, 13118);
gt = "0/1";
- assertEquals("0/1:10,6:"+control.getCoverage()+":.:.:.:.:2:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "T", 'C', true, 5, 5,Classification.GERMLINE, true));
- assertEquals("0/1:10,4:"+test.getCoverage()+":.:.:.:.:1:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "T", 'C', true, 5, 5,Classification.GERMLINE, false));
+ assertEquals("0/1:10,6:" + control.getCoverage() + ":.:.:.:.:2:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "T", 'C', true, 5, 5,Classification.GERMLINE, true));
+ assertEquals("0/1:10,4:" + test.getCoverage() + ":.:.:.:.:1:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "T", 'C', true, 5, 5,Classification.GERMLINE, false));
}
@@ -143,9 +143,9 @@ public void mutationEqualsReference() {
Accumulator control = AccumulatorUtils.createFromOABS(cOABS, 16534);
Accumulator test = AccumulatorUtils.createFromOABS(tOABS, 16534);
String gt = "1/1";
- assertEquals(gt + ":0,3:"+control.getCoverage()+":.:.:.:.:2:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "T", 'C', true, 5, 5,Classification.SOMATIC, true));
+ assertEquals(gt + ":0,3:" + control.getCoverage()+":.:.:.:.:2:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "T", 'C', true, 5, 5,Classification.SOMATIC, true));
gt = "0/0";
- assertEquals(gt + ":3,2:"+test.getCoverage()+":.:.:.:SOMATIC:.:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "T", 'C', true, 5, 5,Classification.SOMATIC, false));
+ assertEquals(gt + ":3,2:" + test.getCoverage()+":.:.:.:SOMATIC:.:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "T", 'C', true, 5, 5,Classification.SOMATIC, false));
}
@Test
@@ -159,16 +159,16 @@ public void san3() {
Accumulator test = AccumulatorUtils.createFromOABS(tOABS, 16571);
assertEquals(Classification.GERMLINE, GenotypeUtil.getClassification("ACG", ".", "1/1","A"));
String gt = "1/1";
- assertEquals("./.:2,1:"+control.getCoverage()+":.:.:.:.:.:"+cOABS, GenotypeUtil.getFormatValues(control, null, "A", 'G', true, 5, 5,Classification.GERMLINE, true));
- assertEquals(gt + ":2,3:"+test.getCoverage()+":.:.:.:.:1:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, false));
+ assertEquals("./.:2,1:" + control.getCoverage() + ":.:.:.:.:.:" + cOABS, GenotypeUtil.getFormatValues(control, null, "A", 'G', true, 5, 5,Classification.GERMLINE, true));
+ assertEquals(gt + ":2,3:" + test.getCoverage() + ":.:.:.:.:1:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, false));
cOABS = "A0[0]1[34];C0[0]1[1]";
tOABS = "A0[0]3[18];G0[0]2[30]";
control = AccumulatorUtils.createFromOABS(cOABS, 16571);
test = AccumulatorUtils.createFromOABS(tOABS, 16571);
assertEquals(Classification.GERMLINE, GenotypeUtil.getClassification("ACG", ".", "1/1","A"));
- assertEquals("./.:0,1:"+control.getCoverage()+":.:.:.:.:.:"+cOABS, GenotypeUtil.getFormatValues(control, null, "A", 'G', true, 5, 5,Classification.GERMLINE, true));
- assertEquals(gt + ":2,3:"+test.getCoverage()+":.:.:.:.:1:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, false));
+ assertEquals("./.:0,1:" + control.getCoverage()+":.:.:.:.:.:" + cOABS, GenotypeUtil.getFormatValues(control, null, "A", 'G', true, 5, 5,Classification.GERMLINE, true));
+ assertEquals(gt + ":2,3:" + test.getCoverage()+":.:.:.:.:1:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, false));
}
@Test
@@ -182,16 +182,16 @@ public void sat3() {
Accumulator test = AccumulatorUtils.createFromOABS(tOABS, 133129);
assertEquals(Classification.GERMLINE, GenotypeUtil.getClassification("AG", "1/1", ".","A"));
String gt = "1/1";
- assertEquals(gt + ":1,3:"+control.getCoverage()+":.:.:.:.:2:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, true));
- assertEquals("./.:2,1:"+test.getCoverage()+":.:.:.:.:.:"+tOABS, GenotypeUtil.getFormatValues(test, null, "A", 'G', true, 5, 5,Classification.GERMLINE, false));
+ assertEquals(gt + ":1,3:" + control.getCoverage()+":.:.:.:.:2:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, true));
+ assertEquals("./.:2,1:" + test.getCoverage()+":.:.:.:.:.:" + tOABS, GenotypeUtil.getFormatValues(test, null, "A", 'G', true, 5, 5,Classification.GERMLINE, false));
cOABS = "A2[30]1[2];G0[0]1[36]";
tOABS = "A1[21]0[0];G0[0]1[23]";
control = AccumulatorUtils.createFromOABS(cOABS, 133129);
test = AccumulatorUtils.createFromOABS(tOABS, 133129);
assertEquals(Classification.GERMLINE, GenotypeUtil.getClassification("AG", "1/1", ".","A"));
- assertEquals(gt + ":1,3:"+control.getCoverage()+":.:.:.:.:2:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, true));
- assertEquals("./.:1,1:"+test.getCoverage()+":.:.:.:.:.:"+tOABS, GenotypeUtil.getFormatValues(test, null, "A", 'G', true, 5, 5,Classification.GERMLINE, false));
+ assertEquals(gt + ":1,3:" + control.getCoverage()+":.:.:.:.:2:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,Classification.GERMLINE, true));
+ assertEquals("./.:1,1:" + test.getCoverage()+":.:.:.:.:.:" + tOABS, GenotypeUtil.getFormatValues(test, null, "A", 'G', true, 5, 5,Classification.GERMLINE, false));
}
@Test
@@ -205,9 +205,9 @@ public void doubleMIN() {
Accumulator test = AccumulatorUtils.createFromOABS(tOABS, 15274);
assertEquals(Classification.SOMATIC, GenotypeUtil.getClassification("GT", "2/2", "1/2","G,T"));
String gt = "2/2";
- assertEquals(gt + ":0,4,61:"+control.getCoverage()+":.:.:.:.:2:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, true));
+ assertEquals(gt + ":0,4,61:" + control.getCoverage()+":.:.:.:.:2:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, true));
gt = "1/2";
- assertEquals(gt + ":0,9,47:"+test.getCoverage()+":.:.:.:SOMATIC:1,2:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, false));
+ assertEquals(gt + ":0,9,47:" + test.getCoverage()+":.:.:.:SOMATIC:1,2:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, false));
}
@Test
@@ -221,9 +221,9 @@ public void getAD() {
Accumulator test = AccumulatorUtils.createFromOABS(tOABS, 15274);
assertEquals(Classification.SOMATIC, GenotypeUtil.getClassification("GT", "2/2", "1/2","G,T"));
String gt = "2/2";
- assertEquals(gt + ":0,4,61:"+control.getCoverage()+":.:.:.:.:2:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, true));
+ assertEquals(gt + ":0,4,61:" + control.getCoverage() + ":.:.:.:.:2:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, true));
gt = "1/2";
- assertEquals(gt + ":0,9,47:"+test.getCoverage()+":.:.:.:SOMATIC:1,2:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, false));
+ assertEquals(gt + ":0,9,47:" + test.getCoverage() + ":.:.:.:SOMATIC:1,2:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "G,T", 'A', true, 5, 5,Classification.SOMATIC, false));
}
@Test
@@ -238,30 +238,31 @@ public void miun() {
Classification c = Classification.SOMATIC;
assertEquals(Classification.SOMATIC, GenotypeUtil.getClassification("GT", "2/2", "1/2","G,T"));
String gt = "0/0";
- assertEquals(gt + ":356,6:"+control.getCoverage()+":.:.:.:.:.:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,c, true));
+ assertEquals(gt + ":356,6:" + control.getCoverage()+":.:.:.:.:.:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,c, true));
gt = "0/1";
- assertEquals(gt + ":234,16:"+test.getCoverage()+":.:.:.:SOMATIC:2:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,c, false));
+ assertEquals(gt + ":234,16:" + test.getCoverage()+":.:.:.:SOMATIC:2:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,c, false));
/*
* add in the unfiltered alt - need 3% of these to trigger (which is 11 in this case
*/
- control.addFailedFilterBase((byte)'A');
- control.addFailedFilterBase((byte)'A');
+ long readNameHash = 1;
+ control.addFailedFilterBase((byte)'A', readNameHash++);
+ control.addFailedFilterBase((byte)'A', readNameHash++);
gt = "0/0";
- assertEquals(gt + ":356,6:"+control.getCoverage()+":.:A2:.:.:.:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,c, true));
+ assertEquals(gt + ":356,6:" + control.getCoverage() + ":.:A2:.:.:.:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,c, true));
gt = "0/1";
- assertEquals(gt + ":234,16:"+test.getCoverage()+":.:.:.:SOMATIC:2:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,c, false));
+ assertEquals(gt + ":234,16:" + test.getCoverage() + ":.:.:.:SOMATIC:2:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,c, false));
/*
* add 9 more
*/
for (int i = 0 ; i < 9 ; i++) {
- control.addFailedFilterBase((byte)'A');
+ control.addFailedFilterBase((byte)'A', readNameHash++);
}
gt = "0/0";
- assertEquals(gt + ":356,6:"+control.getCoverage()+":.:A11:.:.:.:"+cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,c, true));
+ assertEquals(gt + ":356,6:"+control.getCoverage() + ":.:A11:.:.:.:" + cOABS, GenotypeUtil.getFormatValues(control, gt, "A", 'G', true, 5, 5,c, true));
gt = "0/1";
- assertEquals(gt + ":234,16:"+test.getCoverage()+":.:.:.:SOMATIC:2:"+tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,c, false));
+ assertEquals(gt + ":234,16:"+test.getCoverage() + ":.:.:.:SOMATIC:2:" + tOABS, GenotypeUtil.getFormatValues(test, gt, "A", 'G', true, 5, 5,c, false));
}
@Test
diff --git a/qsnp/test/org/qcmg/snp/util/PipelineUtilTest.java b/qsnp/test/org/qcmg/snp/util/PipelineUtilTest.java
index de957f733..f28b30d13 100644
--- a/qsnp/test/org/qcmg/snp/util/PipelineUtilTest.java
+++ b/qsnp/test/org/qcmg/snp/util/PipelineUtilTest.java
@@ -1,16 +1,9 @@
package org.qcmg.snp.util;
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
+import java.util.*;
import org.junit.Assert;
+import org.junit.Ignore;
import org.junit.Test;
import org.qcmg.common.model.Accumulator;
import org.qcmg.common.model.ChrPointPosition;
@@ -33,6 +26,8 @@
import gnu.trove.map.hash.TIntIntHashMap;
import gnu.trove.map.hash.TLongIntHashMap;
+import static org.junit.Assert.*;
+
public class PipelineUtilTest {
public static final List cRules = Arrays.asList(new Rule(0,20,3), new Rule(21,50,4), new Rule(51,Integer.MAX_VALUE,10));
@@ -41,8 +36,8 @@ public class PipelineUtilTest {
@Test
public void getLoLoRecs() {
- assertEquals(null, PipelineUtil.listOfListOfAdjacentVcfs(null));
- assertEquals(true, PipelineUtil.listOfListOfAdjacentVcfs(new ArrayList()).isEmpty());
+ assertNull(PipelineUtil.listOfListOfAdjacentVcfs(null));
+ assertTrue(PipelineUtil.listOfListOfAdjacentVcfs(new ArrayList<>()).isEmpty());
}
@@ -111,8 +106,8 @@ public void getLoLoRecsDiffChr() {
snps.add(VcfUtils.createVcfRecord("1", 100));
snps.add(VcfUtils.createVcfRecord("2", 100));
snps.add(VcfUtils.createVcfRecord("3", 100));
-
- assertEquals(true, PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty());
+
+ assertTrue(PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty());
}
@Test
@@ -127,8 +122,8 @@ public void getLoLoRecsDiffClassification() {
List snps = new ArrayList<>();
snps.add(v1);
snps.add(v2);
-
- assertEquals(true, PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty());
+
+ assertTrue(PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty());
/*
* both som
*/
@@ -147,7 +142,7 @@ public void getLoLoRecsDiffClassification() {
* 1 germ 1 som
*/
v1.setInfo(VcfHeaderUtils.INFO_SOMATIC);
- assertEquals(true, PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty());
+ assertTrue(PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty());
}
@Test
@@ -196,8 +191,8 @@ public void getLoLoRecsSameChrLongWayAway() {
snps.add(VcfUtils.createVcfRecord("1", 100));
snps.add(VcfUtils.createVcfRecord("1", 200));
snps.add(VcfUtils.createVcfRecord("1", 300));
-
- assertEquals(true, PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty());
+
+ assertTrue(PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty());
}
@Test
public void getLoLoRecsSameChrShortWayAway() {
@@ -207,8 +202,8 @@ public void getLoLoRecsSameChrShortWayAway() {
snps.add(VcfUtils.createVcfRecord("1", 104));
snps.add(VcfUtils.createVcfRecord("1", 106));
snps.add(VcfUtils.createVcfRecord("1", 108));
-
- assertEquals(true, PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty());
+
+ assertTrue(PipelineUtil.listOfListOfAdjacentVcfs(snps).isEmpty());
}
@Test
@@ -219,7 +214,7 @@ public void getLoLoRecsSameChr() {
snps.add(VcfUtils.createVcfRecord("1", 300));
assertEquals(1, PipelineUtil.listOfListOfAdjacentVcfs(snps).size());
- assertEquals(2, PipelineUtil.listOfListOfAdjacentVcfs(snps).get(0).size());
+ assertEquals(2, PipelineUtil.listOfListOfAdjacentVcfs(snps).getFirst().size());
}
@Test
public void getLoLoRecsSameChr2() {
@@ -232,7 +227,7 @@ public void getLoLoRecsSameChr2() {
snps.add(VcfUtils.createVcfRecord("1", 300));
assertEquals(1, PipelineUtil.listOfListOfAdjacentVcfs(snps).size());
- assertEquals(5, PipelineUtil.listOfListOfAdjacentVcfs(snps).get(0).size());
+ assertEquals(5, PipelineUtil.listOfListOfAdjacentVcfs(snps).getFirst().size());
}
@Test
@@ -246,7 +241,7 @@ public void getLoLoRecsSameChr3() {
snps.add(VcfUtils.createVcfRecord("1", 105));
assertEquals(1, PipelineUtil.listOfListOfAdjacentVcfs(snps).size());
- assertEquals(6, PipelineUtil.listOfListOfAdjacentVcfs(snps).get(0).size());
+ assertEquals(6, PipelineUtil.listOfListOfAdjacentVcfs(snps).getFirst().size());
}
@Test
@@ -260,7 +255,7 @@ public void getLoLoRecsSameChr4() {
snps.add(VcfUtils.createVcfRecord("1", 106));
assertEquals(1, PipelineUtil.listOfListOfAdjacentVcfs(snps).size());
- assertEquals(4, PipelineUtil.listOfListOfAdjacentVcfs(snps).get(0).size());
+ assertEquals(4, PipelineUtil.listOfListOfAdjacentVcfs(snps).getFirst().size());
}
@Test
@@ -311,17 +306,17 @@ public void getRef() {
@Test
public void doesStringContainLC() {
- assertEquals(false, PipelineUtil.isStringLowerCase(null));
- assertEquals(false, PipelineUtil.isStringLowerCase(""));
- assertEquals(false, PipelineUtil.isStringLowerCase("."));
- assertEquals(false, PipelineUtil.isStringLowerCase("_"));
- assertEquals(false, PipelineUtil.isStringLowerCase("_A"));
- assertEquals(false, PipelineUtil.isStringLowerCase("A_"));
- assertEquals(false, PipelineUtil.isStringLowerCase("A_B"));
- assertEquals(true, PipelineUtil.isStringLowerCase("a"));
- assertEquals(true, PipelineUtil.isStringLowerCase("_a"));
- assertEquals(true, PipelineUtil.isStringLowerCase("_a_"));
- assertEquals(true, PipelineUtil.isStringLowerCase("__x"));
+ assertFalse(PipelineUtil.isStringLowerCase(null));
+ assertFalse(PipelineUtil.isStringLowerCase(""));
+ assertFalse(PipelineUtil.isStringLowerCase("."));
+ assertFalse(PipelineUtil.isStringLowerCase("_"));
+ assertFalse(PipelineUtil.isStringLowerCase("_A"));
+ assertFalse(PipelineUtil.isStringLowerCase("A_"));
+ assertFalse(PipelineUtil.isStringLowerCase("A_B"));
+ assertTrue(PipelineUtil.isStringLowerCase("a"));
+ assertTrue(PipelineUtil.isStringLowerCase("_a"));
+ assertTrue(PipelineUtil.isStringLowerCase("_a_"));
+ assertTrue(PipelineUtil.isStringLowerCase("__x"));
}
@Test
@@ -330,7 +325,7 @@ public void getMR() {
acc1.addBase((byte)'G', (byte) 1, true, 100, 150, 200, 1);
acc1.addBase((byte)'G', (byte) 1, true, 100, 150, 200, 2);
acc1.addBase((byte)'G', (byte) 1, false, 100, 150, 200, 3);
- List accs = Arrays.asList(acc1);
+ List accs = List.of(acc1);
Map basesAndCounts = PipelineUtil.getBasesFromAccumulators(accs);
/*
@@ -359,7 +354,7 @@ public void getCount() {
acc1.addBase((byte)'G', (byte) 1, true, 1, 1, 2, 1);
acc1.addBase((byte)'G', (byte) 1, true, 1, 1, 2, 2);
acc1.addBase((byte)'G', (byte) 1, false, 1, 1, 2, 3);
- List accs = Arrays.asList(acc1);
+ List accs = List.of(acc1);
Map basesAndCounts = PipelineUtil.getBasesFromAccumulators(accs);
assertEquals(3, PipelineUtil.getCount(basesAndCounts, "G", 0));
@@ -382,10 +377,10 @@ public void nonAdjacentAccums() {
try {
PipelineUtil.getBasesFromAccumulators(accs);
Assert.fail("Should have thrown an IAE");
- } catch (IllegalArgumentException iae){};
-
- assertEquals(true, PipelineUtil.getBasesFromAccumulators(null).isEmpty());
- assertEquals(true, PipelineUtil.getBasesFromAccumulators(new ArrayList<>()).isEmpty());
+ } catch (IllegalArgumentException iae){}
+
+ assertTrue(PipelineUtil.getBasesFromAccumulators(null).isEmpty());
+ assertTrue(PipelineUtil.getBasesFromAccumulators(new ArrayList<>()).isEmpty());
}
@Test
@@ -405,7 +400,7 @@ public void getAltsAndGTsNullInputs() {
assertEquals(Constants.MISSING_GT, altsGTs.get(1));
assertEquals(Constants.MISSING_GT, altsGTs.get(2));
- List control = Arrays.asList("ABC");
+ List control = List.of("ABC");
altsGTs = PipelineUtil.getAltStringAndGenotypes(control, null, "XYZ");
assertEquals(3, altsGTs.size());
@@ -413,7 +408,7 @@ public void getAltsAndGTsNullInputs() {
assertEquals("1/1", altsGTs.get(1));
assertEquals(Constants.MISSING_GT, altsGTs.get(2));
- List test = Arrays.asList("123");
+ List test = List.of("123");
altsGTs = PipelineUtil.getAltStringAndGenotypes(null, test, "XYZ");
assertEquals(3, altsGTs.size());
@@ -424,8 +419,8 @@ public void getAltsAndGTsNullInputs() {
@Test
public void getAltsAndGTs() {
- List control = Arrays.asList("ABC");
- List test = Arrays.asList("ABC");
+ List control = List.of("ABC");
+ List test = List.of("ABC");
List altsGTs = PipelineUtil.getAltStringAndGenotypes(control, test, "XYZ");
assertEquals(3, altsGTs.size());
@@ -433,16 +428,16 @@ public void getAltsAndGTs() {
assertEquals("1/1", altsGTs.get(1));
assertEquals("1/1", altsGTs.get(2));
- control = Arrays.asList("XYZ");
- test = Arrays.asList("ABC");
+ control = List.of("XYZ");
+ test = List.of("ABC");
altsGTs = PipelineUtil.getAltStringAndGenotypes(control, test, "XYZ");
assertEquals(3, altsGTs.size());
assertEquals("ABC", altsGTs.get(0));
assertEquals("0/0", altsGTs.get(1));
assertEquals("1/1", altsGTs.get(2));
- control = Arrays.asList("XYZ");
- test = Arrays.asList("XYZ");
+ control = List.of("XYZ");
+ test = List.of("XYZ");
altsGTs = PipelineUtil.getAltStringAndGenotypes(control, test, "XYZ");
assertEquals(3, altsGTs.size());
assertEquals(Constants.MISSING_DATA_STRING, altsGTs.get(0));
@@ -496,8 +491,8 @@ public void csAltsCantContainRef() {
* The following cs is not a cs!
* chr1 985449 . GG AG . . . GT:DP:MR:OABS 1/1:12:9:AG1[]8[];GA2[]0[];_G1[]0[] 1/1:10:6:AG3[]3[];GA2[]1[];_G1[]0[]
*/
- List control = Arrays.asList("AG");
- List test = Arrays.asList("AG");
+ List control = List.of("AG");
+ List test = List.of("AG");
List altsGTs = PipelineUtil.getAltStringAndGenotypes(control, test, "GG");
assertEquals(3, altsGTs.size());
@@ -508,15 +503,15 @@ public void csAltsCantContainRef() {
@Test
public void getEmptyOABS() {
- assertEquals(false, PipelineUtil.getOABS(null).isPresent());
+ assertFalse(PipelineUtil.getOABS(null).isPresent());
Map basesAndCounts = new HashMap<>();
- assertEquals(false, PipelineUtil.getOABS(basesAndCounts).isPresent());
+ assertFalse(PipelineUtil.getOABS(basesAndCounts).isPresent());
basesAndCounts.put("XYZ", new short[]{});
- assertEquals(false, PipelineUtil.getOABS(basesAndCounts).isPresent());
+ assertFalse(PipelineUtil.getOABS(basesAndCounts).isPresent());
basesAndCounts.put("XYZ", new short[4]);
- assertEquals(true, PipelineUtil.getOABS(basesAndCounts).isPresent());
+ assertTrue(PipelineUtil.getOABS(basesAndCounts).isPresent());
assertEquals("XYZ0[]0[]", PipelineUtil.getOABS(basesAndCounts).get());
basesAndCounts.get("XYZ")[0] = 1;
@@ -535,14 +530,14 @@ public void getOABS() {
}
@Test
- public void getBasesForGentype() {
+ public void getBasesForGenotype() {
Map basesAndCounts = new HashMap<>();
basesAndCounts.put("XYZ", new short[]{10,3,11,10});
assertEquals(1, PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").size());
- assertEquals("XYZ", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").get(0));
+ assertEquals("XYZ", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").getFirst());
basesAndCounts.put("ABC", new short[]{5,2,11,10});
assertEquals(1, PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").size());
- assertEquals("XYZ", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").get(0));
+ assertEquals("XYZ", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").getFirst());
assertEquals(2, PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"DDD").size());
assertEquals("XYZ", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"DDD").get(0));
assertEquals("ABC", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"DDD").get(1));
@@ -551,6 +546,16 @@ public void getBasesForGentype() {
assertEquals("XYZ", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").get(0));
assertEquals("HBH", PipelineUtil.getBasesForGenotype(basesAndCounts, 10,"AAA").get(1));
}
+
+ @Test
+ public void getBasesForGenotype2() {
+ Map basesAndCounts = new HashMap<>();
+ basesAndCounts.put("CG", new short[]{11,11,0,0});
+ basesAndCounts.put("TG", new short[]{1,1,0,0});
+ basesAndCounts.put("CA", new short[]{6,6,0,0});
+ assertEquals(1, PipelineUtil.getBasesForGenotype(basesAndCounts, 3,"TG").size());
+ assertEquals("CA", PipelineUtil.getBasesForGenotype(basesAndCounts, 3,"TG").getFirst());
+ }
@Test
public void getBasesFromAccs() {
@@ -618,12 +623,13 @@ public void createCSSinglePos() {
map.put(origV, new Pair<>(controlAcc100, testAcc100));
VcfRecord v = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).orElse(null);
- assertEquals(origV.getChrPosition(), v.getChrPosition());
+ assert v != null;
+ assertEquals(origV.getChrPosition(), v.getChrPosition());
assertEquals("C", v.getAlt());
assertEquals("A", v.getRef());
- assertEquals("GT:AD:DP:FT:INF:NNS:OABS", v.getFormatFields().get(0));
- assertEquals("0/0:3,0:3:.:.:.:A2[]1[]", v.getFormatFields().get(1));
- assertEquals("1/1:0,3:3:.:SOMATIC:2:C2[]1[]", v.getFormatFields().get(2));
+ assertEquals("GT:AD:DP:FF:FT:INF:NNS:OABS", v.getFormatFields().get(0));
+ assertEquals("0/0:3,0:3:.:.:.:.:A2[]1[]", v.getFormatFields().get(1));
+ assertEquals("1/1:0,3:3:.:.:SOMATIC:2:C2[]1[]", v.getFormatFields().get(2));
}
@Test
@@ -656,12 +662,13 @@ public void createCS2Pos() {
map.put(origV2, new Pair<>(controlAcc101, testAcc101));
VcfRecord v = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).orElse(null);
- assertEquals(2, v.getChrPosition().getLength());
+ assert v != null;
+ assertEquals(2, v.getChrPosition().getLength());
assertEquals("GT", v.getAlt());
assertEquals("AC", v.getRef());
- assertEquals("GT:AD:DP:FT:INF:NNS:OABS", v.getFormatFields().get(0));
- assertEquals("0/0:3,0:3:.:.:.:AC2[]1[]", v.getFormatFields().get(1));
- assertEquals("1/1:0,3:3:.:SOMATIC:2:GT2[]1[]", v.getFormatFields().get(2));
+ assertEquals("GT:AD:DP:FF:FT:INF:NNS:OABS", v.getFormatFields().get(0));
+ assertEquals("0/0:3,0:3:.:.:.:.:AC2[]1[]", v.getFormatFields().get(1));
+ assertEquals("1/1:0,3:3:.:.:SOMATIC:2:GT2[]1[]", v.getFormatFields().get(2));
/*
* add some noise
@@ -671,19 +678,20 @@ public void createCS2Pos() {
controlAcc100.addBase((byte)'T', (byte) 1, false, 100, 100, 200, 6);
v = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).orElse(null);
- assertEquals(2, v.getChrPosition().getLength());
+ assert v != null;
+ assertEquals(2, v.getChrPosition().getLength());
assertEquals("GT", v.getAlt());
// assertEquals("T_,GT", v.getAlt());
assertEquals("AC", v.getRef());
- assertEquals("GT:AD:DP:FT:INF:NNS:OABS", v.getFormatFields().get(0));
- assertEquals("0/0:3,0:3:.:.:.:AC2[]1[];T_0[]3[]", v.getFormatFields().get(1));
- assertEquals("1/1:0,3:3:.:SOMATIC:2:GT2[]1[]", v.getFormatFields().get(2));
+ assertEquals("GT:AD:DP:FF:FT:INF:NNS:OABS", v.getFormatFields().get(0));
+ assertEquals("0/0:3,0:3:.:.:.:.:AC2[]1[];T_0[]3[]", v.getFormatFields().get(1));
+ assertEquals("1/1:0,3:3:.:.:SOMATIC:2:GT2[]1[]", v.getFormatFields().get(2));
}
@Test
public void containsRef() {
/*
- * chr1 16862501 . TA CG . PASS . ACCS CG,28,7,C_,1,0,TA,48,15,TG,65,38,_G,1,2,_A,0,1 CG,22,4,TA,37,14,TG,52,18,_G,1,1,C_,0,1
+ * chr1 16862501 . TA CG . PASS . ACCS CG,28,7,C_,1,0,TA,48,15,TG,65,38,_G,1,2,_A,0,1 CG,22,4,TA,37,14,TG,52,18,_G,1,1,C_,0,1L);
*
* Largest 2 genotypes are TA (ref) and TG, but TG won't make it as it contains the ref!
*/
@@ -726,27 +734,25 @@ public void underscoresInAlt() {
basesAndCounts.put("__G", new short[]{5,1,0,0});
basesForGenotype = PipelineUtil.getBasesForGenotype(basesAndCounts, 4,"NNN");
assertEquals(1, basesForGenotype.size());
- assertEquals("TAG", basesForGenotype.get(0));
+ assertEquals("TAG", basesForGenotype.getFirst());
}
@Test
- public void compoundSnp() throws Exception {
+ public void compoundSnp() {
VcfRecord v1 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 100),null,"A","C");
v1.setInfo(VcfHeaderUtils.INFO_SOMATIC);
VcfRecord v2 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 101),null,"C","G");
v2.setInfo(VcfHeaderUtils.INFO_SOMATIC);
-
final Accumulator tumour100 = new Accumulator(100);
tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 1);
final Accumulator tumour101 = new Accumulator(101);
tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 1);
-
Map> map = new HashMap<>();
map.put(v1, new Pair<>(null, tumour100));
map.put(v2, new Pair<>(null, tumour101));
- assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent());
+ assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent());
// need 4 reads with the cs to register
tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 2);
@@ -755,17 +761,17 @@ public void compoundSnp() throws Exception {
tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 3);
tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 4);
tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 4);
- Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
- assertEquals(true, ov.isPresent());
+ Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
+ assertTrue(ov.isPresent());
VcfRecord v = ov.get();
List ff = v.getFormatFields();
- assertEquals(true, ff.get(2).contains("CG4[]0[]")); // tumour
- assertEquals("./.:.:0:.:.:.:.", ff.get(1)); // control
+ assertTrue(ff.get(2).contains("CG4[]0[]")); // tumour
+ assertEquals("./.:.:0:.:.:.:.:.", ff.get(1)); // control
}
@Test
- public void compoundSnpOneGermlineOneSomatic() throws Exception {
+ public void compoundSnpOneGermlineOneSomatic() {
VcfRecord v1 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 100),null,"A","C");
VcfRecord v2 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 101),null,"C","G");
v2.setInfo(VcfHeaderUtils.INFO_SOMATIC);
@@ -780,7 +786,7 @@ public void compoundSnpOneGermlineOneSomatic() throws Exception {
Map> map = new HashMap<>();
map.put(v1, new Pair<>(null, tumour100));
map.put(v2, new Pair<>(null, tumour101));
- assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent());
+ assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent());
// need 4 reads with the cs to register
tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 2);
@@ -789,17 +795,17 @@ public void compoundSnpOneGermlineOneSomatic() throws Exception {
tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 3);
tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 4);
tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 4);
- Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
- assertEquals(true, ov.isPresent());
+ Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
+ assertTrue(ov.isPresent());
VcfRecord v = ov.get();
List ff = v.getFormatFields();
- assertEquals(true, ff.get(2).contains("CG4[]0[]")); // tumour
- assertEquals("./.:.:0:.:.:.:.", ff.get(1)); // control
+ assertTrue(ff.get(2).contains("CG4[]0[]")); // tumour
+ assertEquals("./.:.:0:.:.:.:.:.", ff.get(1)); // control
}
@Test
- public void noCompoundSnpMissingAccs() throws Exception {
+ public void noCompoundSnpMissingAccs() {
/*
* chr4 8046419 . G T . . BaseQRankSum=0.694;ClippingRankSum=1.157;DP=24;FS=5.815;MQ=60.00;MQRankSum=-0.602;QD=15.87;ReadPosRankSum=0.787;SOR=2.258 GT:AD:DP:GQ:FT:INF:MR:NNS:OABS 0/1:5,14:19:99:SAN3:.:.:.:. 0/1:8,32:40:99:SBIASCOV;5BP=1;SAT3:.:1:1:T1[22]0[0]
chr4 8046420 . A C . . BaseQRankSum=1.528;ClippingRankSum=0.787;DP=24;FS=5.815;MQ=60.00;MQRankSum=-1.713;QD=15.87;ReadPosRankSum=0.787;SOR=2.258 GT:AD:DP:GQ:FT:INF:MR:NNS:OABS 0/1:5,14:19:99:SAN3:.:.:.:. 0/1:10,28:38:99:SAT3:.:.:.:.
@@ -826,13 +832,13 @@ public void noCompoundSnpMissingAccs() throws Exception {
}
@Test
- public void noCompoundSnpRefInAlt() throws Exception {
+ public void noCompoundSnpRefInAlt() {
/*
* Don't want this happening
* chr1 985449 . GG AG . . . GT:DP:MR:OABS 1/1:12:9:AG1[]8[];GA2[]0[];_G1[]0[] 1/1:10:6:AG3[]3[];GA2[]1[];_G1[]0[]
* which was made up from
- * chr1 985449 rs56255212 G A 421.77 PASS AC=2;AF=1.00;AN=2;DB;DP=10;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=31.82;SOR=2.303 GT:AD:DP:GQ:PL:GD:AC:OABS:MR:NNS 1/1:0,10:10:30:450,30,0:A/A:A1[37],8[35.12],G2[38],0[0]:.:9:9 0/1:3,6:9:99:208,0,138:A/G:A3[34],3[38.67],G2[37],1[35]:.:6:5
- * chr1 985450 . G A 67.77 MIN;MR;NNS SOMATIC GT:AD:DP:GQ:PL:GD:AC:OABS:MR:NNS .:.:.:.:.:G/G:A2[37],0[0],G2[37],8[36.5]:.:2:2 0/1:7,3:10:96:96,0,236:A/G:A2[35],1[35],G4[36.5],3[39]:.:3:3
+ * chr1 985449 rs56255212 G A 421.77 PASS AC=2;AF=1.00;AN=2;DB;DP=10;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQ0=0;QD=31.82;SOR=2.303 GT:AD:DP:GQ:PL:GD:AC:OABS:MR:NNS 1/1:0,10:10:30:450,30,0:A/A:A1[37],8[35.12],G2[38],0[0]:.:9:9 0/1:3,6:9:99:208,0,138:A/G:A3[34],3[38.67],G2[37],1[35]:.:6:5L);
+ * chr1 985450 . G A 67.77 MIN;MR;NNS SOMATIC GT:AD:DP:GQ:PL:GD:AC:OABS:MR:NNS .:.:.:.:.:G/G:A2[37],0[0],G2[37],8[36.5]:.:2:2 0/1:7,3:10:96:96,0,236:A/G:A2[35],1[35],G4[36.5],3[39]:.:3:3L);
*/
VcfRecord v1 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 100),null,"G","A");
v1.setInfo(VcfHeaderUtils.INFO_SOMATIC);
@@ -903,12 +909,12 @@ public void noCompoundSnpRefInAlt() throws Exception {
Map> map = new HashMap<>();
map.put(v1, new Pair<>(control100, tumour100));
map.put(v2, new Pair<>(control101, tumour101));
- assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent());
+ assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent());
}
@Test
- public void compoundSnpReverseStrand() throws Exception {
+ public void compoundSnpReverseStrand() {
VcfRecord v1 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 100),null,"A","C");
v1.setInfo(VcfHeaderUtils.INFO_SOMATIC);
VcfRecord v2 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 101),null,"C","G");
@@ -922,7 +928,7 @@ public void compoundSnpReverseStrand() throws Exception {
Map> map = new HashMap<>();
map.put(v1, new Pair<>(null, tumour100));
map.put(v2, new Pair<>(null, tumour101));
- assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent());
+ assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent());
// need 4 reads with the cs to register
tumour100.addBase((byte)'C', (byte)30, false, 100, 100, 200, 2);
@@ -931,17 +937,17 @@ public void compoundSnpReverseStrand() throws Exception {
tumour101.addBase((byte)'G', (byte)30, false, 101, 101, 200, 3);
tumour100.addBase((byte)'C', (byte)30, false, 100, 100, 200, 4);
tumour101.addBase((byte)'G', (byte)30, false, 101, 101, 200, 4);
- Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
- assertEquals(true, ov.isPresent());
+ Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
+ assertTrue(ov.isPresent());
VcfRecord v = ov.get();
List ff = v.getFormatFields();
- assertEquals(true, ff.get(2).contains("CG0[]4[]")); // tumour
- assertEquals("./.:.:0:.:.:.:.", ff.get(1)); // control
+ assertTrue(ff.get(2).contains("CG0[]4[]")); // tumour
+ assertEquals("./.:.:0:.:.:.:.:.", ff.get(1)); // control
}
@Test
- public void compoundSnpBothStrands() throws Exception {
+ public void compoundSnpBothStrands() {
VcfRecord v1 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 100),null,"A","C");
v1.setInfo(VcfHeaderUtils.INFO_SOMATIC);
VcfRecord v2 = VcfUtils.createVcfRecord(new ChrPointPosition("1", 101),null,"C","G");
@@ -955,7 +961,7 @@ public void compoundSnpBothStrands() throws Exception {
Map> map = new HashMap<>();
map.put(v1, new Pair<>(null, tumour100));
map.put(v2, new Pair<>(null, tumour101));
- assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent());
+ assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent());
// need 4 reads with the cs to register
tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 2);
@@ -965,12 +971,12 @@ public void compoundSnpBothStrands() throws Exception {
tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 4);
tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 4);
- Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
- assertEquals(true, ov.isPresent());
+ Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
+ assertTrue(ov.isPresent());
VcfRecord v = ov.get();
List ff = v.getFormatFields();
- assertEquals(true, ff.get(2).contains("CG2[]2[]")); // tumour
- assertEquals("./.:.:0:.:.:.:.", ff.get(1)); // control
+ assertTrue(ff.get(2).contains("CG2[]2[]")); // tumour
+ assertEquals("./.:.:0:.:.:.:.:.", ff.get(1)); // control
}
@Test
@@ -988,7 +994,7 @@ public void compoundSnpWithOverlappingReads() {
Map> map = new HashMap<>();
map.put(v1, new Pair<>(null, tumour100));
map.put(v2, new Pair<>(null, tumour101));
- assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent());
+ assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent());
// need 4 reads with the cs to register
tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 2);
@@ -999,12 +1005,12 @@ public void compoundSnpWithOverlappingReads() {
tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 4);
tumour101.addBase((byte)'G', (byte)30, true, 101, 101, 200, 5);
- Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
- assertEquals(true, ov.isPresent());
+ Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
+ assertTrue(ov.isPresent());
VcfRecord v = ov.get();
List ff = v.getFormatFields();
- assertEquals("./.:.:0:.:.:.:.", ff.get(1)); // control
- assertEquals(true, ff.get(2).contains("CG4[]0[];_G1[]0[]")); // tumour
+ assertEquals("./.:.:0:.:.:.:.:.", ff.get(1)); // control
+ assertTrue(ff.get(2).contains("CG4[]0[];_G1[]0[]")); // tumour
}
@Test
@@ -1022,7 +1028,7 @@ public void compoundSnpWithOverlappingReadsOtherEnd() {
Map> map = new HashMap<>();
map.put(v1, new Pair<>(null, tumour100));
map.put(v2, new Pair<>(null, tumour101));
- assertEquals(false, PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3).isPresent());
+ assertFalse(PipelineUtil.createCompoundSnp(map, cRules, tRules, true, 3, 3).isPresent());
// need 4 reads with the cs to register
tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 2);
@@ -1033,17 +1039,17 @@ public void compoundSnpWithOverlappingReadsOtherEnd() {
tumour101.addBase((byte)'G', (byte)30, true, 100, 101, 200, 4);
tumour100.addBase((byte)'C', (byte)30, true, 100, 100, 200, 5);
- Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
- assertEquals(true, ov.isPresent());
+ Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
+ assertTrue(ov.isPresent());
VcfRecord v = ov.get();
assertEquals("AC", v.getRef());
assertEquals("CG", v.getAlt());
List ff = v.getFormatFields();
- assertEquals("./.:.:0:.:.:.:.", ff.get(1)); // control
+ assertEquals("./.:.:0:.:.:.:.:.", ff.get(1)); // control
/*
* filters are now applied in qannotate
*/
- assertEquals("1/1:0,4:4:.:SOMATIC:1:CG4[]0[];C_1[]0[]", ff.get(2)); // tumour
+ assertEquals("1/1:0,4:4:.:.:SOMATIC:1:CG4[]0[];C_1[]0[]", ff.get(2)); // tumour
}
@Test
@@ -1082,13 +1088,13 @@ public void csRealLife() {
map.put(v1, new Pair<>(null, tumour100));
map.put(v2, new Pair<>(null, tumour101));
- Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
- assertEquals(true, ov.isPresent());
+ Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
+ assertTrue(ov.isPresent());
VcfRecord v = ov.get();
assertEquals("AA", v.getAlt());
assertEquals("GG", v.getRef());
List ff = v.getFormatFields();
- assertEquals("1/1:0,65:67:.:SOMATIC:65:AA34[]31[];AC1[]0[];CA1[]0[]", ff.get(2)); // tumour
+ assertEquals("1/1:0,65:67:.:.:SOMATIC:65:AA34[]31[];AC1[]0[];CA1[]0[]", ff.get(2)); // tumour
}
@Test
@@ -1123,8 +1129,8 @@ public void csRealLife2() {
map.put(v1, new Pair<>(null, tumour100));
map.put(v2, new Pair<>(null, tumour101));
- Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
- assertEquals(true, ov.isPresent());
+ Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
+ assertTrue(ov.isPresent());
VcfRecord v = ov.get();
assertEquals("CA", v.getAlt());
assertEquals("TG", v.getRef());
@@ -1138,7 +1144,7 @@ public void csGATK() {
VcfRecord v2 = new VcfRecord(new String[]{"chr1","39592385",".","G","A",".",".","BaseQRankSum=0.767;ClippingRankSum=-0.266;DP=46;FS=14.005;MQ=60.00;MQRankSum=-0.423;QD=31.52;ReadPosRankSum=-0.611;SOR=0.028;IN=2;HOM=0,CTTGAGCTTGaGAGGCAGAGA;","GT:AD:CCC:CCM:DP:FT:GQ:INF:QL","./.:.:Reference:13:.:PASS:.:NCIG:.","0/1:7,38:Somatic:13:45:PASS:99:SOMATIC:1449.77"});
List vcfs = Arrays.asList(v1,v2);
Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs);
- assertEquals(true, oVcf.isPresent());
+ assertTrue(oVcf.isPresent());
VcfRecord v = oVcf.get();
assertEquals("GG", v.getRef());
assertEquals("TA", v.getAlt());
@@ -1169,7 +1175,7 @@ public void csGATK2() {
VcfRecord v2 = new VcfRecord(new String[]{"chr1","40615302",".","C","T",".",".","BaseQRankSum=-0.104;ClippingRankSum=0.439;DP=69;FS=2.380;MQ=60.00;MQRankSum=-0.369;QD=30.87;ReadPosRankSum=-0.717;SOR=0.524;IN=2;HOM=0,ACCTGTAATCtCAGCTACTCG;EFF=intergenic_region(MODIFIER||||||||||1)","GT:AD:CCC:CCM:DP:FT:GQ:INF:QL","./.:.:Reference:13:.:PASS:.:NCIG:.","0/1:17,52:Somatic:13:69:PASS:99:SOMATIC:2129.77"});
List vcfs = Arrays.asList(v1,v2);
Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs);
- assertEquals(true, oVcf.isPresent());
+ assertTrue(oVcf.isPresent());
VcfRecord v = oVcf.get();
assertEquals("CC", v.getRef());
assertEquals("AT", v.getAlt());
@@ -1200,7 +1206,7 @@ public void csGATK3() {
VcfRecord v2 = new VcfRecord(new String[]{"chr1","47083666",".","A","T",".",".","BaseQRankSum=0.203;ClippingRankSum=0.452;DP=69;FS=1.235;MQ=60.00;MQRankSum=-0.717;QD=33.21;ReadPosRankSum=-0.733;SOR=0.436;IN=2;HOM=2,GAATACATAGtTACTAGGAGG","GT:AD:CCC:CCM:DP:FT:GQ:INF:QL","./.:.:Reference:13:.:PASS:.:NCIG:.","0/1:13,55:Somatic:13:68:PASS:99:SOMATIC:2291.77"});
List vcfs = Arrays.asList(v1,v2);
Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs);
- assertEquals(true, oVcf.isPresent());
+ assertTrue(oVcf.isPresent());
VcfRecord v = oVcf.get();
assertEquals("GA", v.getRef());
assertEquals("TT", v.getAlt());
@@ -1231,7 +1237,7 @@ public void csGATK4() {
VcfRecord v2 = new VcfRecord(new String[]{"chr1","169423270",".","G","A",".",".","DP=121;FS=0.000;MQ=60.00;QD=29.54;SOR=1.096;IN=2;HOM=2,CCTTCTTCAGaACCAAATAGA","GT:AD:CCC:CCM:DP:FT:GQ:INF:QL","./.:.:Reference:14:.:PASS:.:NCIG:.","1/1:0,121:SomaticNoReference:14:121:PASS:99:SOMATIC:5348.77"});
List vcfs = Arrays.asList(v1,v2);
Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs);
- assertEquals(true, oVcf.isPresent());
+ assertTrue(oVcf.isPresent());
VcfRecord v = oVcf.get();
assertEquals("GG", v.getRef());
assertEquals("AA", v.getAlt());
@@ -1262,7 +1268,7 @@ public void csGATK5() {
VcfRecord v2 = new VcfRecord(new String[]{"chr2","65487955",".","C","T",".",".","BaseQRankSum=2.173;ClippingRankSum=-0.100;DP=65;FS=2.303;MQ=59.61;MQRankSum=-0.186;QD=10.60;ReadPosRankSum=1.101;SOR=0.364;IN=2;HOM=0,GCTCTGCCTCtCGGGTTCACG","GT:AD:CCC:CCM:DP:FT:GQ:INF:QL","./.:.:Reference:13:.:PASS:.:NCIG:.","0/1:43,21:Somatic:13:64:PASS:99:SOMATIC:688.77"});
List vcfs = Arrays.asList(v1,v2);
Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs);
- assertEquals(true, oVcf.isPresent());
+ assertTrue(oVcf.isPresent());
VcfRecord v = oVcf.get();
assertEquals("CC", v.getRef());
assertEquals("TT", v.getAlt());
@@ -1293,7 +1299,7 @@ public void csGATK6() {
VcfRecord v2 = new VcfRecord(new String[]{"chr6","32495872",".","G","T","107.28",".","AC=2;AF=1.00;AN=2;DP=0;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=0.00;SOR=0.693","GT:AD:DP:GQ:INF:QL","./.:.:.:.:NCIG:.","1/1:.:.:9:.:."});
List vcfs = Arrays.asList(v1,v2);
Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs);
- assertEquals(true, oVcf.isPresent());
+ assertTrue(oVcf.isPresent());
VcfRecord v = oVcf.get();
assertEquals("CG", v.getRef());
assertEquals("TT", v.getAlt());
@@ -1324,7 +1330,7 @@ public void csGATK6SingleSample() {
VcfRecord v2 = new VcfRecord(new String[]{"chr6","32495872",".","G","T","107.28",".","AC=2;AF=1.00;AN=2;DP=0;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=0.00;SOR=0.693","GT:AD:DP:GQ:INF:QL","1/1:.:.:9:.:."});
List vcfs = Arrays.asList(v1,v2);
Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs, true);
- assertEquals(true, oVcf.isPresent());
+ assertTrue(oVcf.isPresent());
VcfRecord v = oVcf.get();
assertEquals("CG", v.getRef());
assertEquals("TT", v.getAlt());
@@ -1349,7 +1355,7 @@ public void csGATK7() {
VcfRecord v2 = new VcfRecord(new String[]{"chrY","13487854",".","C","A,T","107.28",".","AC=2;AF=1.00;AN=2;DP=0;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=0.00;SOR=0.693","GT:AD:DP:GQ:INF:QL","0/1:.:.:.:NCIG:.","0/2:.:.:9:.:."});
List vcfs = Arrays.asList(v1,v2);
Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs);
- assertEquals(false, oVcf.isPresent());
+ assertFalse(oVcf.isPresent());
}
@Test
@@ -1358,7 +1364,7 @@ public void csGATKSingleSample() {
VcfRecord v2 = new VcfRecord(new String[]{"chrY","13487854",".","C","A,T","107.28",".","AC=2;AF=1.00;AN=2;DP=0;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=0.00;SOR=0.693","GT:AD:DP:GQ:INF:QL","0/2:.:.:9:.:."});
List vcfs = Arrays.asList(v1,v2);
Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs, true);
- assertEquals(false, oVcf.isPresent());
+ assertFalse(oVcf.isPresent());
}
@Test
@@ -1367,65 +1373,712 @@ public void csGATKOneSomaticOneGermline() {
VcfRecord v2 = new VcfRecord(new String[]{"chr1","39592385",".","G","A",".",".","BaseQRankSum=0.767;ClippingRankSum=-0.266;DP=46;FS=14.005;MQ=60.00;MQRankSum=-0.423;QD=31.52;ReadPosRankSum=-0.611;SOR=0.028;IN=2;HOM=0,CTTGAGCTTGaGAGGCAGAGA;","GT:AD:CCC:CCM:DP:FT:GQ:INF:QL","0/0:.:Reference:13:.:PASS:.:NCIG:.","0/1:7,38:Somatic:13:45:PASS:99:SOMATIC:1449.77"});
List vcfs = Arrays.asList(v1,v2);
Optional oVcf = PipelineUtil.createCompoundSnpGATK(vcfs);
- assertEquals(false, oVcf.isPresent());
+ assertFalse(oVcf.isPresent());
+ }
+
+ @Test
+ public void testGetFailedFilterCS_NullAccumulators() {
+ // When input is null
+ String result = PipelineUtil.getFailedFilterCS(null);
+
+ // Expecting MISSING_DATA_STRING
+ Assert.assertEquals(Constants.MISSING_DATA_STRING, result);
+ }
+
+ @Test
+ public void testGetFailedFilterCS_EmptyAccumulators() {
+ // When input is an empty list
+ List accumulators = Collections.emptyList();
+ String result = PipelineUtil.getFailedFilterCS(accumulators);
+
+ // Expecting empty output
+ Assert.assertEquals(".", result);
+ }
+
+ @Test
+ public void testGetFailedFilterCS_SingleAccumulator() {
+ // Mocking a single Accumulator
+ Accumulator accumulator = new Accumulator(12345);
+ for (int i = 1 ; i < 4 ; i++) {
+ accumulator.addFailedFilterBase((byte) 'A', i);
+ }
+ for (int i = 4 ; i < 6 ; i++) {
+ accumulator.addFailedFilterBase((byte) 'C', i);
+ }
+ for (int i = 6 ; i < 7 ; i++) {
+ accumulator.addFailedFilterBase((byte) 'G', i);
+ }
+ for (int i = 7 ; i < 8 ; i++) {
+ accumulator.addFailedFilterBase((byte) 'T', i);
+ }
+ assertEquals("A3;C2;G1;T1", PipelineUtil.getFailedFilterCS(Collections.singletonList(accumulator)));
+ }
+
+ @Test
+ public void testGetFailedFilterCS_MultipleAccumulators() {
+ // Mocking two Accumulators
+ Accumulator acc1 = new Accumulator(12345);
+ for (int i = 1 ; i < 4 ; i++) {
+ acc1.addFailedFilterBase((byte) 'A', i);
+ }
+ for (int i = 4 ; i < 6 ; i++) {
+ acc1.addFailedFilterBase((byte) 'C', i);
+ }
+ Accumulator acc2 = new Accumulator(12346);
+ for (int i = 1 ; i < 4 ; i++) {
+ acc2.addFailedFilterBase((byte) 'G', i);
+ }
+ for (int i = 4 ; i < 6 ; i++) {
+ acc2.addFailedFilterBase((byte) 'T', i);
+ }
+ List accumulators = Arrays.asList(acc1, acc2);
+ assertEquals("AG3;CT2", PipelineUtil.getFailedFilterCS(accumulators));
+
+ for (int i = 6 ; i < 10 ; i++) {
+ acc1.addFailedFilterBase((byte) 'T', i);
+ }
+ for (int i = 10 ; i < 11 ; i++) {
+ acc2 .addFailedFilterBase((byte) 'T', i);
+ }
+ assertEquals("AG3;CT2;T_4;_T1", PipelineUtil.getFailedFilterCS(accumulators));
+
+ }
+
+ @Test
+ public void testGetFailedFilterCS_Triple() {
+ // Mocking multiple Accumulators
+ Accumulator acc1 = new Accumulator(12345);
+ acc1.addFailedFilterBase((byte) 'A', 1);
+ Accumulator acc2 = new Accumulator(12346);
+ acc2.addFailedFilterBase((byte) 'C', 1);
+ Accumulator acc3 = new Accumulator(12347);
+ acc3.addFailedFilterBase((byte) 'G', 1);
+
+ List accumulators = Arrays.asList(acc1, acc2, acc3);
+ assertEquals("ACG1", PipelineUtil.getFailedFilterCS(accumulators));
+
+ acc1.addFailedFilterBase((byte) 'A', 2);
+ assertEquals("ACG1;A__1", PipelineUtil.getFailedFilterCS(accumulators));
+ acc2.addFailedFilterBase((byte) 'C', 3);
+ assertEquals("ACG1;A__1;_C_1", PipelineUtil.getFailedFilterCS(accumulators));
+ acc3.addFailedFilterBase((byte) 'G', 4);
+ assertEquals("ACG1;A__1;_C_1;__G1", PipelineUtil.getFailedFilterCS(accumulators));
+ acc3.addFailedFilterBase((byte) 'G', 2);
+ assertEquals("ACG1;A_G1;_C_1;__G1", PipelineUtil.getFailedFilterCS(accumulators));
+ }
+
+ @Test
+ public void testGetFailedFilterCS_RealLife() {
+ // Mocking two Accumulators
+ Accumulator acc1 = new Accumulator(12345);
+
+ acc1.addFailedFilterBase((byte) 'C', 391785L);
+ acc1.addFailedFilterBase((byte) 'C', 391815L);
+ acc1.addFailedFilterBase((byte) 'C', 391817L);
+ acc1.addFailedFilterBase((byte) 'C', 391832L);
+ acc1.addFailedFilterBase((byte) 'C', 391843L);
+ acc1.addFailedFilterBase((byte) 'C', 391867L);
+ acc1.addFailedFilterBase((byte) 'T', 391748L);
+ acc1.addFailedFilterBase((byte) 'T', 391752L);
+ acc1.addFailedFilterBase((byte) 'T', 391754L);
+ acc1.addFailedFilterBase((byte) 'T', 391755L);
+ acc1.addFailedFilterBase((byte) 'T', 391756L);
+ acc1.addFailedFilterBase((byte) 'T', 391757L);
+ acc1.addFailedFilterBase((byte) 'T', 391760L);
+ acc1.addFailedFilterBase((byte) 'T', 391761L);
+ acc1.addFailedFilterBase((byte) 'T', 391762L);
+ acc1.addFailedFilterBase((byte) 'T', 391763L);
+ acc1.addFailedFilterBase((byte) 'T', 391765L);
+ acc1.addFailedFilterBase((byte) 'T', 391766L);
+ acc1.addFailedFilterBase((byte) 'T', 391768L);
+ acc1.addFailedFilterBase((byte) 'T', 391769L);
+ acc1.addFailedFilterBase((byte) 'T', 391770L);
+ acc1.addFailedFilterBase((byte) 'T', 391771L);
+ acc1.addFailedFilterBase((byte) 'T', 391773L);
+ acc1.addFailedFilterBase((byte) 'T', 391776L);
+ acc1.addFailedFilterBase((byte) 'T', 391777L);
+ acc1.addFailedFilterBase((byte) 'T', 391778L);
+ acc1.addFailedFilterBase((byte) 'T', 391779L);
+ acc1.addFailedFilterBase((byte) 'T', 391780L);
+ acc1.addFailedFilterBase((byte) 'T', 391782L);
+ acc1.addFailedFilterBase((byte) 'T', 391783L);
+ acc1.addFailedFilterBase((byte) 'T', 391787L);
+ acc1.addFailedFilterBase((byte) 'T', 391788L);
+ acc1.addFailedFilterBase((byte) 'T', 391789L);
+ acc1.addFailedFilterBase((byte) 'T', 391790L);
+ acc1.addFailedFilterBase((byte) 'T', 391791L);
+ acc1.addFailedFilterBase((byte) 'T', 391792L);
+ acc1.addFailedFilterBase((byte) 'T', 391795L);
+ acc1.addFailedFilterBase((byte) 'T', 391796L);
+ acc1.addFailedFilterBase((byte) 'T', 391797L);
+ acc1.addFailedFilterBase((byte) 'T', 391799L);
+ acc1.addFailedFilterBase((byte) 'T', 391800L);
+ acc1.addFailedFilterBase((byte) 'T', 391802L);
+ acc1.addFailedFilterBase((byte) 'T', 391803L);
+ acc1.addFailedFilterBase((byte) 'T', 391806L);
+ acc1.addFailedFilterBase((byte) 'T', 391809L);
+ acc1.addFailedFilterBase((byte) 'T', 391810L);
+ acc1.addFailedFilterBase((byte) 'T', 391811L);
+ acc1.addFailedFilterBase((byte) 'T', 391812L);
+ acc1.addFailedFilterBase((byte) 'T', 391813L);
+ acc1.addFailedFilterBase((byte) 'T', 391814L);
+ acc1.addFailedFilterBase((byte) 'T', 391816L);
+ acc1.addFailedFilterBase((byte) 'T', 391818L);
+ acc1.addFailedFilterBase((byte) 'T', 391819L);
+ acc1.addFailedFilterBase((byte) 'T', 391821L);
+ acc1.addFailedFilterBase((byte) 'T', 391822L);
+ acc1.addFailedFilterBase((byte) 'T', 391823L);
+ acc1.addFailedFilterBase((byte) 'T', 391826L);
+ acc1.addFailedFilterBase((byte) 'T', 391827L);
+ acc1.addFailedFilterBase((byte) 'T', 391828L);
+ acc1.addFailedFilterBase((byte) 'T', 391831L);
+ acc1.addFailedFilterBase((byte) 'T', 391834L);
+ acc1.addFailedFilterBase((byte) 'T', 391835L);
+ acc1.addFailedFilterBase((byte) 'T', 391836L);
+ acc1.addFailedFilterBase((byte) 'T', 391838L);
+ acc1.addFailedFilterBase((byte) 'T', 391839L);
+ acc1.addFailedFilterBase((byte) 'T', 391840L);
+ acc1.addFailedFilterBase((byte) 'T', 391841L);
+ acc1.addFailedFilterBase((byte) 'T', 391844L);
+ acc1.addFailedFilterBase((byte) 'T', 391845L);
+ acc1.addFailedFilterBase((byte) 'T', 391846L);
+ acc1.addFailedFilterBase((byte) 'T', 391847L);
+ acc1.addFailedFilterBase((byte) 'T', 391848L);
+ acc1.addFailedFilterBase((byte) 'T', 391850L);
+ acc1.addFailedFilterBase((byte) 'T', 391851L);
+ acc1.addFailedFilterBase((byte) 'T', 391853L);
+ acc1.addFailedFilterBase((byte) 'T', 391854L);
+ acc1.addFailedFilterBase((byte) 'T', 391855L);
+ acc1.addFailedFilterBase((byte) 'T', 391857L);
+ acc1.addFailedFilterBase((byte) 'T', 391859L);
+ acc1.addFailedFilterBase((byte) 'T', 391860L);
+ acc1.addFailedFilterBase((byte) 'T', 391862L);
+ acc1.addFailedFilterBase((byte) 'T', 391863L);
+ acc1.addFailedFilterBase((byte) 'T', 391865L);
+ acc1.addFailedFilterBase((byte) 'T', 391866L);
+ acc1.addFailedFilterBase((byte) 'T', 391868L);
+ acc1.addFailedFilterBase((byte) 'T', 391869L);
+ acc1.addFailedFilterBase((byte) 'T', 391870L);
+ acc1.addFailedFilterBase((byte) 'T', 391871L);
+ acc1.addFailedFilterBase((byte) 'T', 391872L);
+ acc1.addFailedFilterBase((byte) 'T', 391873L);
+ acc1.addFailedFilterBase((byte) 'T', 391874L);
+ acc1.addFailedFilterBase((byte) 'T', 391875L);
+ acc1.addFailedFilterBase((byte) 'T', 391877L);
+ acc1.addFailedFilterBase((byte) 'T', 391878L);
+ acc1.addFailedFilterBase((byte) 'T', 391879L);
+ acc1.addFailedFilterBase((byte) 'T', 391880L);
+ acc1.addFailedFilterBase((byte) 'T', 391881L);
+ acc1.addFailedFilterBase((byte) 'T', 391882L);
+ acc1.addFailedFilterBase((byte) 'T', 391884L);
+ acc1.addFailedFilterBase((byte) 'T', 391885L);
+ acc1.addFailedFilterBase((byte) 'T', 391886L);
+ acc1.addFailedFilterBase((byte) 'T', 391887L);
+ acc1.addFailedFilterBase((byte) 'T', 391889L);
+
+ Accumulator acc2 = new Accumulator(12346);
+ acc2.addFailedFilterBase((byte) 'A', 391815L);
+ acc2.addFailedFilterBase((byte) 'A', 391817L);
+ acc2.addFailedFilterBase((byte) 'G', 391748L);
+ acc2.addFailedFilterBase((byte) 'G', 391752L);
+ acc2.addFailedFilterBase((byte) 'G', 391754L);
+ acc2.addFailedFilterBase((byte) 'G', 391755L);
+ acc2.addFailedFilterBase((byte) 'G', 391756L);
+ acc2.addFailedFilterBase((byte) 'G', 391757L);
+ acc2.addFailedFilterBase((byte) 'G', 391760L);
+ acc2.addFailedFilterBase((byte) 'G', 391761L);
+ acc2.addFailedFilterBase((byte) 'G', 391762L);
+ acc2.addFailedFilterBase((byte) 'G', 391763L);
+ acc2.addFailedFilterBase((byte) 'G', 391765L);
+ acc2.addFailedFilterBase((byte) 'G', 391766L);
+ acc2.addFailedFilterBase((byte) 'G', 391768L);
+ acc2.addFailedFilterBase((byte) 'G', 391769L);
+ acc2.addFailedFilterBase((byte) 'G', 391770L);
+ acc2.addFailedFilterBase((byte) 'G', 391771L);
+ acc2.addFailedFilterBase((byte) 'G', 391773L);
+ acc2.addFailedFilterBase((byte) 'G', 391776L);
+ acc2.addFailedFilterBase((byte) 'G', 391777L);
+ acc2.addFailedFilterBase((byte) 'G', 391778L);
+ acc2.addFailedFilterBase((byte) 'G', 391779L);
+ acc2.addFailedFilterBase((byte) 'G', 391780L);
+ acc2.addFailedFilterBase((byte) 'G', 391782L);
+ acc2.addFailedFilterBase((byte) 'G', 391783L);
+ acc2.addFailedFilterBase((byte) 'G', 391785L);
+ acc2.addFailedFilterBase((byte) 'G', 391787L);
+ acc2.addFailedFilterBase((byte) 'G', 391788L);
+ acc2.addFailedFilterBase((byte) 'G', 391789L);
+ acc2.addFailedFilterBase((byte) 'G', 391790L);
+ acc2.addFailedFilterBase((byte) 'G', 391791L);
+ acc2.addFailedFilterBase((byte) 'G', 391792L);
+ acc2.addFailedFilterBase((byte) 'G', 391795L);
+ acc2.addFailedFilterBase((byte) 'G', 391796L);
+ acc2.addFailedFilterBase((byte) 'G', 391797L);
+ acc2.addFailedFilterBase((byte) 'G', 391799L);
+ acc2.addFailedFilterBase((byte) 'G', 391800L);
+ acc2.addFailedFilterBase((byte) 'G', 391802L);
+ acc2.addFailedFilterBase((byte) 'G', 391803L);
+ acc2.addFailedFilterBase((byte) 'G', 391806L);
+ acc2.addFailedFilterBase((byte) 'G', 391809L);
+ acc2.addFailedFilterBase((byte) 'G', 391810L);
+ acc2.addFailedFilterBase((byte) 'G', 391811L);
+ acc2.addFailedFilterBase((byte) 'G', 391812L);
+ acc2.addFailedFilterBase((byte) 'G', 391813L);
+ acc2.addFailedFilterBase((byte) 'G', 391814L);
+ acc2.addFailedFilterBase((byte) 'G', 391816L);
+ acc2.addFailedFilterBase((byte) 'G', 391818L);
+ acc2.addFailedFilterBase((byte) 'G', 391819L);
+ acc2.addFailedFilterBase((byte) 'G', 391821L);
+ acc2.addFailedFilterBase((byte) 'G', 391822L);
+ acc2.addFailedFilterBase((byte) 'G', 391823L);
+ acc2.addFailedFilterBase((byte) 'G', 391826L);
+ acc2.addFailedFilterBase((byte) 'G', 391827L);
+ acc2.addFailedFilterBase((byte) 'G', 391828L);
+ acc2.addFailedFilterBase((byte) 'G', 391831L);
+ acc2.addFailedFilterBase((byte) 'G', 391832L);
+ acc2.addFailedFilterBase((byte) 'G', 391834L);
+ acc2.addFailedFilterBase((byte) 'G', 391835L);
+ acc2.addFailedFilterBase((byte) 'G', 391836L);
+ acc2.addFailedFilterBase((byte) 'G', 391838L);
+ acc2.addFailedFilterBase((byte) 'G', 391839L);
+ acc2.addFailedFilterBase((byte) 'G', 391840L);
+ acc2.addFailedFilterBase((byte) 'G', 391841L);
+ acc2.addFailedFilterBase((byte) 'G', 391843L);
+ acc2.addFailedFilterBase((byte) 'G', 391844L);
+ acc2.addFailedFilterBase((byte) 'G', 391845L);
+ acc2.addFailedFilterBase((byte) 'G', 391846L);
+ acc2.addFailedFilterBase((byte) 'G', 391847L);
+ acc2.addFailedFilterBase((byte) 'G', 391848L);
+ acc2.addFailedFilterBase((byte) 'G', 391850L);
+ acc2.addFailedFilterBase((byte) 'G', 391851L);
+ acc2.addFailedFilterBase((byte) 'G', 391853L);
+ acc2.addFailedFilterBase((byte) 'G', 391854L);
+ acc2.addFailedFilterBase((byte) 'G', 391855L);
+ acc2.addFailedFilterBase((byte) 'G', 391857L);
+ acc2.addFailedFilterBase((byte) 'G', 391859L);
+ acc2.addFailedFilterBase((byte) 'G', 391860L);
+ acc2.addFailedFilterBase((byte) 'G', 391862L);
+ acc2.addFailedFilterBase((byte) 'G', 391863L);
+ acc2.addFailedFilterBase((byte) 'G', 391865L);
+ acc2.addFailedFilterBase((byte) 'G', 391866L);
+ acc2.addFailedFilterBase((byte) 'G', 391867L);
+ acc2.addFailedFilterBase((byte) 'G', 391868L);
+ acc2.addFailedFilterBase((byte) 'G', 391869L);
+ acc2.addFailedFilterBase((byte) 'G', 391870L);
+ acc2.addFailedFilterBase((byte) 'G', 391871L);
+ acc2.addFailedFilterBase((byte) 'G', 391872L);
+ acc2.addFailedFilterBase((byte) 'G', 391873L);
+ acc2.addFailedFilterBase((byte) 'G', 391874L);
+ acc2.addFailedFilterBase((byte) 'G', 391875L);
+ acc2.addFailedFilterBase((byte) 'G', 391877L);
+ acc2.addFailedFilterBase((byte) 'G', 391878L);
+ acc2.addFailedFilterBase((byte) 'G', 391879L);
+ acc2.addFailedFilterBase((byte) 'G', 391880L);
+ acc2.addFailedFilterBase((byte) 'G', 391881L);
+ acc2.addFailedFilterBase((byte) 'G', 391882L);
+ acc2.addFailedFilterBase((byte) 'G', 391884L);
+ acc2.addFailedFilterBase((byte) 'G', 391885L);
+ acc2.addFailedFilterBase((byte) 'G', 391886L);
+ acc2.addFailedFilterBase((byte) 'G', 391887L);
+ acc2.addFailedFilterBase((byte) 'G', 391889L);
+ acc2.addFailedFilterBase((byte) 'G', 391890L);
+ acc2.addFailedFilterBase((byte) 'G', 391891L);
+
+ List accumulators = Arrays.asList(acc1, acc2);
+ assertEquals("CA2;CG4;TG97;_G2", PipelineUtil.getFailedFilterCS(accumulators));
+
}
- //TOTO awaiting decision on whether 1/1 -> 0/0 is SOMATIC
-// @Test
-// public void csRealLife2() {
-// /*
-// * chr10 54817257 rs386743785 AG GA . . IN=1,2;DB;HOM=0,TTTAACCTTCgaCTTGCCCACA GT:AD:CCC:CCM:DP:FT:INF:MR:NNS:OABS 1/1:0,19:Germline:32:34:PASS:.:19:19:AA8[]6[];GA8[]11[];TA0[]1[] 0/0:2,0:ReferenceNoVariant:32:55:PASS:SOMATIC:.:.:AA33[]20[];AG1[]1[];A_2[]0[] 1/1:0,19:Germline:32:34:PASS:.:19:19:AA8[]6[];GA8[]11[];TA0[]1[] 0/0:2,0:ReferenceNoVariant:32:55:PASS:SOMATIC:.:.:AA33[]20[];AG1[]1[];A_2[]0[]
-// *
-// * This should not be SOMATIC
-// */
-// VcfRecord v1 = VcfUtils.createVcfRecord(new ChrPointPosition("10", 54817257),null,"A","G");
-// VcfRecord v2 = VcfUtils.createVcfRecord(new ChrPointPosition("10", 54817258),null,"G","A");
-// final Accumulator tumour100 = new Accumulator(54817257);
-// final Accumulator tumour101 = new Accumulator(54817258);
-// for (int i = 1 ; i <= 34 ; i++) {
-// tumour100.addBase((byte)'A', (byte)30, true, 154701261 + i, 154701381, 154701391, i);
-// tumour101.addBase((byte)'A', (byte)30, true, 154701261 + i, 154701382, 154701391, i);
-// }
-// for (int i = 35 ; i < 35 + 31 ; i++) {
-// tumour100.addBase((byte)'A', (byte)30, false, 154701261 + i, 154701381, 154701391 + i, i);
-// tumour101.addBase((byte)'A', (byte)30, false, 154701261 + i, 154701382, 154701391 + i, i);
-// }
-//
-// tumour100.addBase((byte)'A', (byte)30, true, 154701262, 154701381, 154701391, 70);
-// tumour101.addBase((byte)'C', (byte)30, true, 154701262, 154701382, 154701391, 70);
-// tumour100.addBase((byte)'C', (byte)30, true, 154701262, 154701381, 154701391, 71);
-// tumour101.addBase((byte)'A', (byte)30, true, 154701262, 154701382, 154701391, 71);
-//
-// Map> map = new HashMap<>(4);
-// map.put(v1, new Pair<>(null, tumour100));
-// map.put(v2, new Pair<>(null, tumour101));
-//
-// Optional ov = PipelineUtil.createCompoundSnp(map, cRules,tRules, true, 3, 3);
-// assertEquals(true, ov.isPresent());
-// VcfRecord v = ov.get();
-// assertEquals("AA", v.getAlt());
-// assertEquals("GG", v.getRef());
-// List ff = v.getFormatFields();
-// assertEquals("1/1:0,65:67:.:SOMATIC:65:65:AA34[]31[];AC1[]0[];CA1[]0[]", ff.get(2)); // tumour
-// }
+ @Test
+ public void getUnique() {
+ Accumulator acc1 = new Accumulator(12345);
+ acc1.addFailedFilterBase((byte) 'C', 7715117792191186532L);
+ acc1.addFailedFilterBase((byte) 'C', 419015429944394057L);
+ acc1.addFailedFilterBase((byte) 'C', -4982705001061907857L);
+ acc1.addFailedFilterBase((byte) 'C', -5310912577898632174L);
+ acc1.addFailedFilterBase((byte) 'C', 4442000349200974418L);
+ acc1.addFailedFilterBase((byte) 'C', -6357506197841616687L);
+ acc1.addFailedFilterBase((byte) 'T', -7438533981666644002L);
+ acc1.addFailedFilterBase((byte) 'T', 1782036110835987330L);
+ acc1.addFailedFilterBase((byte) 'T', -8751309811463707606L);
+ acc1.addFailedFilterBase((byte) 'T', -1006789886807925312L);
+ acc1.addFailedFilterBase((byte) 'T', 267604890061977987L);
+ acc1.addFailedFilterBase((byte) 'T', -5752866221558017591L);
+ acc1.addFailedFilterBase((byte) 'T', 8813529100103331852L);
+ acc1.addFailedFilterBase((byte) 'T', 8740981370654066209L);
+ acc1.addFailedFilterBase((byte) 'T', 3875546135377654529L);
+ acc1.addFailedFilterBase((byte) 'T', 3156183271998102851L);
+ acc1.addFailedFilterBase((byte) 'T', 496025280786752802L);
+ acc1.addFailedFilterBase((byte) 'T', -5370540407260949421L);
+ acc1.addFailedFilterBase((byte) 'T', -1034241958712548931L);
+ acc1.addFailedFilterBase((byte) 'T', 4760541127041820054L);
+ acc1.addFailedFilterBase((byte) 'T', -478460491748683719L);
+ acc1.addFailedFilterBase((byte) 'T', 1080824909688009888L);
+ acc1.addFailedFilterBase((byte) 'T', -248833557815058818L);
+ acc1.addFailedFilterBase((byte) 'T', -836224118038877802L);
+ acc1.addFailedFilterBase((byte) 'T', -6251926633067930061L);
+ acc1.addFailedFilterBase((byte) 'T', 5693373976150522013L);
+ acc1.addFailedFilterBase((byte) 'T', -8618732021493771414L);
+ acc1.addFailedFilterBase((byte) 'T', -3535500610440114311L);
+ acc1.addFailedFilterBase((byte) 'T', -8607579577620952717L);
+ acc1.addFailedFilterBase((byte) 'T', 3617903064101974186L);
+ acc1.addFailedFilterBase((byte) 'T', -292355879921902716L);
+ acc1.addFailedFilterBase((byte) 'T', -3333658515894473867L);
+ acc1.addFailedFilterBase((byte) 'T', -6061011605950203461L);
+ acc1.addFailedFilterBase((byte) 'T', -2558972151765706823L);
+ acc1.addFailedFilterBase((byte) 'T', 8278189905233837843L);
+ acc1.addFailedFilterBase((byte) 'T', 3158165356931543161L);
+ acc1.addFailedFilterBase((byte) 'T', 8095381987075818260L);
+ acc1.addFailedFilterBase((byte) 'T', 4995250372108758135L);
+ acc1.addFailedFilterBase((byte) 'T', 4308956381034063213L);
+ acc1.addFailedFilterBase((byte) 'T', -3975576866416630705L);
+ acc1.addFailedFilterBase((byte) 'T', 215674860906757583L);
+ acc1.addFailedFilterBase((byte) 'T', 9130708757120638831L);
+ acc1.addFailedFilterBase((byte) 'T', 2603036776856178377L);
+ acc1.addFailedFilterBase((byte) 'T', 5735814374756852238L);
+ acc1.addFailedFilterBase((byte) 'T', 7540064501637539247L);
+ acc1.addFailedFilterBase((byte) 'T', 6419930603122219958L);
+ acc1.addFailedFilterBase((byte) 'T', 8210168556586079108L);
+ acc1.addFailedFilterBase((byte) 'T', 3268524321762958948L);
+ acc1.addFailedFilterBase((byte) 'T', -138463099035032818L);
+ acc1.addFailedFilterBase((byte) 'T', 2609032967337224118L);
+ acc1.addFailedFilterBase((byte) 'T', 1180519307271286853L);
+ acc1.addFailedFilterBase((byte) 'T', 3647809539552025230L);
+ acc1.addFailedFilterBase((byte) 'T', 8493692125897398515L);
+ acc1.addFailedFilterBase((byte) 'T', 3315994573281898804L);
+ acc1.addFailedFilterBase((byte) 'T', 724501628528950317L);
+ acc1.addFailedFilterBase((byte) 'T', 2324044325092459430L);
+ acc1.addFailedFilterBase((byte) 'T', 4824931694165828140L);
+ acc1.addFailedFilterBase((byte) 'T', 1027175994126509642L);
+ acc1.addFailedFilterBase((byte) 'T', -2957957358873393133L);
+ acc1.addFailedFilterBase((byte) 'T', 3268524321762958948L);
+ acc1.addFailedFilterBase((byte) 'T', -8828184361023553865L);
+ acc1.addFailedFilterBase((byte) 'T', -8251184108896989485L);
+ acc1.addFailedFilterBase((byte) 'T', 4329503331942280002L);
+ acc1.addFailedFilterBase((byte) 'T', 3244033920476022406L);
+ acc1.addFailedFilterBase((byte) 'T', 6461699796293877052L);
+ acc1.addFailedFilterBase((byte) 'T', -1679260168527306503L);
+ acc1.addFailedFilterBase((byte) 'T', 1216738602136290533L);
+ acc1.addFailedFilterBase((byte) 'T', 1144424180764508213L);
+ acc1.addFailedFilterBase((byte) 'T', 7433969058982544698L);
+ acc1.addFailedFilterBase((byte) 'T', 8740981370654066209L);
+ acc1.addFailedFilterBase((byte) 'T', -7503905741508074480L);
+ acc1.addFailedFilterBase((byte) 'T', 9219990827317843535L);
+ acc1.addFailedFilterBase((byte) 'T', -562526361002022550L);
+ acc1.addFailedFilterBase((byte) 'T', -1583125034918006130L);
+ acc1.addFailedFilterBase((byte) 'T', 7486429413782649127L);
+ acc1.addFailedFilterBase((byte) 'T', -1476485875783679687L);
+ acc1.addFailedFilterBase((byte) 'T', -7726467777379464574L);
+ acc1.addFailedFilterBase((byte) 'T', 6145111076905202179L);
+ acc1.addFailedFilterBase((byte) 'T', -6962289282457110290L);
+ acc1.addFailedFilterBase((byte) 'T', 1829825168833198510L);
+ acc1.addFailedFilterBase((byte) 'T', -8597867675373289290L);
+ acc1.addFailedFilterBase((byte) 'T', -5752866221558017591L);
+ acc1.addFailedFilterBase((byte) 'T', 1015854181370342302L);
+ acc1.addFailedFilterBase((byte) 'T', 35473122144577186L);
+ acc1.addFailedFilterBase((byte) 'T', 7825657632162557305L);
+ acc1.addFailedFilterBase((byte) 'T', -4454819942067680881L);
+ acc1.addFailedFilterBase((byte) 'T', -3618246714841591676L);
+ acc1.addFailedFilterBase((byte) 'T', 8557524143668341863L);
+ acc1.addFailedFilterBase((byte) 'T', 7092326401254028467L);
+ acc1.addFailedFilterBase((byte) 'T', 4031901574327385806L);
+ acc1.addFailedFilterBase((byte) 'T', 7382490855834685218L);
+ acc1.addFailedFilterBase((byte) 'T', -7609961797280175578L);
+ acc1.addFailedFilterBase((byte) 'T', 6264118358119675396L);
+ acc1.addFailedFilterBase((byte) 'T', 3774827528163625704L);
+ acc1.addFailedFilterBase((byte) 'T', 8493692125897398515L);
+ acc1.addFailedFilterBase((byte) 'T', 3728160140633686841L);
+ acc1.addFailedFilterBase((byte) 'T', 876929434758225646L);
+ acc1.addFailedFilterBase((byte) 'T', 4853207379767922037L);
+ acc1.addFailedFilterBase((byte) 'T', 7915916741906599442L);
+ acc1.addFailedFilterBase((byte) 'T', -8874197118979016859L);
+ acc1.addFailedFilterBase((byte) 'T', -2135344415274795553L);
+ acc1.addFailedFilterBase((byte) 'T', 7092828207829028047L);
+ acc1.addFailedFilterBase((byte) 'T', 8518363537084736565L);
+
+ Accumulator acc2 = new Accumulator(12346);
+
+ acc2.addFailedFilterBase((byte) 'A', 419015429944394057L);
+ acc2.addFailedFilterBase((byte) 'A', -4982705001061907857L);
+ acc2.addFailedFilterBase((byte) 'G', -7438533981666644002L);
+ acc2.addFailedFilterBase((byte) 'G', 1782036110835987330L);
+ acc2.addFailedFilterBase((byte) 'G', -8751309811463707606L);
+ acc2.addFailedFilterBase((byte) 'G', -1006789886807925312L);
+ acc2.addFailedFilterBase((byte) 'G', 267604890061977987L);
+ acc2.addFailedFilterBase((byte) 'G', -5752866221558017591L);
+ acc2.addFailedFilterBase((byte) 'G', 8813529100103331852L);
+ acc2.addFailedFilterBase((byte) 'G', 8740981370654066209L);
+ acc2.addFailedFilterBase((byte) 'G', 3875546135377654529L);
+ acc2.addFailedFilterBase((byte) 'G', 3156183271998102851L);
+ acc2.addFailedFilterBase((byte) 'G', 496025280786752802L);
+ acc2.addFailedFilterBase((byte) 'G', -5370540407260949421L);
+ acc2.addFailedFilterBase((byte) 'G', -1034241958712548931L);
+ acc2.addFailedFilterBase((byte) 'G', 4760541127041820054L);
+ acc2.addFailedFilterBase((byte) 'G', -478460491748683719L);
+ acc2.addFailedFilterBase((byte) 'G', 1080824909688009888L);
+ acc2.addFailedFilterBase((byte) 'G', -248833557815058818L);
+ acc2.addFailedFilterBase((byte) 'G', -836224118038877802L);
+ acc2.addFailedFilterBase((byte) 'G', -6251926633067930061L);
+ acc2.addFailedFilterBase((byte) 'G', 5693373976150522013L);
+ acc2.addFailedFilterBase((byte) 'G', -8618732021493771414L);
+ acc2.addFailedFilterBase((byte) 'G', -3535500610440114311L);
+ acc2.addFailedFilterBase((byte) 'G', -8607579577620952717L);
+ acc2.addFailedFilterBase((byte) 'G', 3617903064101974186L);
+ acc2.addFailedFilterBase((byte) 'G', 7715117792191186532L);
+ acc2.addFailedFilterBase((byte) 'G', -292355879921902716L);
+ acc2.addFailedFilterBase((byte) 'G', -3333658515894473867L);
+ acc2.addFailedFilterBase((byte) 'G', -6061011605950203461L);
+ acc2.addFailedFilterBase((byte) 'G', -2558972151765706823L);
+ acc2.addFailedFilterBase((byte) 'G', 8278189905233837843L);
+ acc2.addFailedFilterBase((byte) 'G', 3158165356931543161L);
+ acc2.addFailedFilterBase((byte) 'G', 8095381987075818260L);
+ acc2.addFailedFilterBase((byte) 'G', 4995250372108758135L);
+ acc2.addFailedFilterBase((byte) 'G', 4308956381034063213L);
+ acc2.addFailedFilterBase((byte) 'G', -3975576866416630705L);
+ acc2.addFailedFilterBase((byte) 'G', 215674860906757583L);
+ acc2.addFailedFilterBase((byte) 'G', 9130708757120638831L);
+ acc2.addFailedFilterBase((byte) 'G', 2603036776856178377L);
+ acc2.addFailedFilterBase((byte) 'G', 5735814374756852238L);
+ acc2.addFailedFilterBase((byte) 'G', 7540064501637539247L);
+ acc2.addFailedFilterBase((byte) 'G', 6419930603122219958L);
+ acc2.addFailedFilterBase((byte) 'G', 8210168556586079108L);
+ acc2.addFailedFilterBase((byte) 'G', 3268524321762958948L);
+ acc2.addFailedFilterBase((byte) 'G', -138463099035032818L);
+ acc2.addFailedFilterBase((byte) 'G', 2609032967337224118L);
+ acc2.addFailedFilterBase((byte) 'G', 1180519307271286853L);
+ acc2.addFailedFilterBase((byte) 'G', 3647809539552025230L);
+ acc2.addFailedFilterBase((byte) 'G', 8493692125897398515L);
+ acc2.addFailedFilterBase((byte) 'G', 3315994573281898804L);
+ acc2.addFailedFilterBase((byte) 'G', 724501628528950317L);
+ acc2.addFailedFilterBase((byte) 'G', 2324044325092459430L);
+ acc2.addFailedFilterBase((byte) 'G', 4824931694165828140L);
+ acc2.addFailedFilterBase((byte) 'G', 1027175994126509642L);
+ acc2.addFailedFilterBase((byte) 'G', -2957957358873393133L);
+ acc2.addFailedFilterBase((byte) 'G', 3268524321762958948L);
+ acc2.addFailedFilterBase((byte) 'G', -5310912577898632174L);
+ acc2.addFailedFilterBase((byte) 'G', -8828184361023553865L);
+ acc2.addFailedFilterBase((byte) 'G', -8251184108896989485L);
+ acc2.addFailedFilterBase((byte) 'G', 4329503331942280002L);
+ acc2.addFailedFilterBase((byte) 'G', 3244033920476022406L);
+ acc2.addFailedFilterBase((byte) 'G', 6461699796293877052L);
+ acc2.addFailedFilterBase((byte) 'G', -1679260168527306503L);
+ acc2.addFailedFilterBase((byte) 'G', 1216738602136290533L);
+ acc2.addFailedFilterBase((byte) 'G', 4442000349200974418L);
+ acc2.addFailedFilterBase((byte) 'G', 1144424180764508213L);
+ acc2.addFailedFilterBase((byte) 'G', 7433969058982544698L);
+ acc2.addFailedFilterBase((byte) 'G', 8740981370654066209L);
+ acc2.addFailedFilterBase((byte) 'G', -7503905741508074480L);
+ acc2.addFailedFilterBase((byte) 'G', 9219990827317843535L);
+ acc2.addFailedFilterBase((byte) 'G', -562526361002022550L);
+ acc2.addFailedFilterBase((byte) 'G', -1583125034918006130L);
+ acc2.addFailedFilterBase((byte) 'G', 7486429413782649127L);
+ acc2.addFailedFilterBase((byte) 'G', -1476485875783679687L);
+ acc2.addFailedFilterBase((byte) 'G', -7726467777379464574L);
+ acc2.addFailedFilterBase((byte) 'G', 6145111076905202179L);
+ acc2.addFailedFilterBase((byte) 'G', -6962289282457110290L);
+ acc2.addFailedFilterBase((byte) 'G', 1829825168833198510L);
+ acc2.addFailedFilterBase((byte) 'G', -8597867675373289290L);
+ acc2.addFailedFilterBase((byte) 'G', -5752866221558017591L);
+ acc2.addFailedFilterBase((byte) 'G', 1015854181370342302L);
+ acc2.addFailedFilterBase((byte) 'G', 35473122144577186L);
+ acc2.addFailedFilterBase((byte) 'G', -6357506197841616687L);
+ acc2.addFailedFilterBase((byte) 'G', 7825657632162557305L);
+ acc2.addFailedFilterBase((byte) 'G', -4454819942067680881L);
+ acc2.addFailedFilterBase((byte) 'G', -3618246714841591676L);
+ acc2.addFailedFilterBase((byte) 'G', 8557524143668341863L);
+ acc2.addFailedFilterBase((byte) 'G', 7092326401254028467L);
+ acc2.addFailedFilterBase((byte) 'G', 4031901574327385806L);
+ acc2.addFailedFilterBase((byte) 'G', 7382490855834685218L);
+ acc2.addFailedFilterBase((byte) 'G', -7609961797280175578L);
+ acc2.addFailedFilterBase((byte) 'G', 6264118358119675396L);
+ acc2.addFailedFilterBase((byte) 'G', 3774827528163625704L);
+ acc2.addFailedFilterBase((byte) 'G', 8493692125897398515L);
+ acc2.addFailedFilterBase((byte) 'G', 3728160140633686841L);
+ acc2.addFailedFilterBase((byte) 'G', 876929434758225646L);
+ acc2.addFailedFilterBase((byte) 'G', 4853207379767922037L);
+ acc2.addFailedFilterBase((byte) 'G', 7915916741906599442L);
+ acc2.addFailedFilterBase((byte) 'G', -8874197118979016859L);
+ acc2.addFailedFilterBase((byte) 'G', -2135344415274795553L);
+ acc2.addFailedFilterBase((byte) 'G', 7092828207829028047L);
+ acc2.addFailedFilterBase((byte) 'G', 8518363537084736565L);
+ acc2.addFailedFilterBase((byte) 'G', -8751309811463707606L);
+ acc2.addFailedFilterBase((byte) 'G', -8252885584572880588L);
+
+ List accumulators = Arrays.asList(acc1, acc2);
+ assertEquals("CA2;CG4;TG93;_G1", PipelineUtil.getFailedFilterCS(accumulators));
+
+ }
-// @Test
-// public void getSkeletonVcf() {
-// List snps = new ArrayList<>();
-// snps.add(VcfUtils.createVcfRecord(new ChrPointPosition("1", 99),null,"A","C"));
-// snps.add(VcfUtils.createVcfRecord(new ChrPointPosition("1", 101),null,"A","C"));
-// snps.add(VcfUtils.createVcfRecord(new ChrPointPosition("1", 102),null,"A","C"));
-// snps.add(VcfUtils.createVcfRecord(new ChrPointPosition("1", 103),null,"A","C"));
-// snps.add(VcfUtils.createVcfRecord(new ChrPointPosition("1", 104),null,"A","C"));
-// snps.add(VcfUtils.createVcfRecord(new ChrPointPosition("1", 106),null,"A","C"));
-//
-// List> loloVcfs = PipelineUtil.listOfListOfAdjacentVcfs(snps);
-// assertEquals(1, loloVcfs.size());
-// VcfRecord v = PipelineUtil.createSkeletonCompoundSnp(loloVcfs.get(0));
-// assertEquals("1", v.getChrPosition().getChromosome());
-// assertEquals(101, v.getChrPosition().getStartPosition());
-// assertEquals("AAAA", v.getRef());
-// assertEquals("CCCC", v.getAlt());
-//
-// }
+ @Test
+ public void realLifeFunnyBusiness() {
+ List readNames = Arrays.asList("DCW97JN1:295:D1B5AACXX:3:2309:14480:47693" ,
+ "DCW97JN1:295:D1B5AACXX:5:1111:15402:19442" ,
+ "DCW97JN1:295:D1B5AACXX:3:2210:16730:78147" ,
+ "HWI-ST526:219:C16B2ACXX:1:2304:7016:24438" ,
+ "HWI-ST526:219:C16B2ACXX:1:1307:1391:41225" ,
+ "HWI-ST526:219:C16B2ACXX:1:1103:16387:29528" ,
+ "HWI-ST526:219:C16B2ACXX:1:2112:18851:73892" ,
+ "DCW97JN1:295:D1B5AACXX:4:2114:16640:6591" ,
+ "DCW97JN1:295:D1B5AACXX:3:1303:9721:20997" ,
+ "HWI-ST526:219:C16B2ACXX:1:2207:13419:94062" ,
+ "DCW97JN1:295:D1B5AACXX:3:2309:14480:47693" ,
+ "DCW97JN1:295:D1B5AACXX:5:2305:3789:14633" ,
+ "HWI-ST526:219:C16B2ACXX:1:1104:4560:57572" ,
+ "DCW97JN1:295:D1B5AACXX:6:2111:9627:7792" ,
+ "DCW97JN1:295:D1B5AACXX:5:2113:4992:94218" ,
+ "DCW97JN1:295:D1B5AACXX:4:2202:16591:44438" ,
+ "DCW97JN1:295:D1B5AACXX:4:2205:19357:57058" ,
+ "DCW97JN1:295:D1B5AACXX:6:2307:12495:71902" ,
+ "DCW97JN1:295:D1B5AACXX:4:2212:17500:6828" ,
+ "DCW97JN1:295:D1B5AACXX:5:1109:19970:67964" ,
+ "DCW97JN1:295:D1B5AACXX:3:2310:15515:71252" ,
+ "DCW97JN1:295:D1B5AACXX:5:2201:4467:9303" ,
+ "DCW97JN1:295:D1B5AACXX:6:1115:2311:14137" ,
+ "DCW97JN1:295:D1B5AACXX:5:1302:19127:9050" ,
+ "DCW97JN1:295:D1B5AACXX:6:1114:11214:2476" ,
+ "DCW97JN1:295:D1B5AACXX:5:1212:1161:16540" ,
+ "DCW97JN1:295:D1B5AACXX:5:1301:16241:83752" ,
+ "DCW97JN1:295:D1B5AACXX:3:1116:13596:47139" ,
+ "DCW97JN1:295:D1B5AACXX:5:1307:3521:66193" ,
+ "DCW97JN1:295:D1B5AACXX:5:2208:16724:61398" ,
+ "DCW97JN1:295:D1B5AACXX:3:2107:10686:79006" ,
+ "DCW97JN1:295:D1B5AACXX:4:2107:13457:26494" ,
+ "HWI-ST526:219:C16B2ACXX:1:2205:13081:23009" ,
+ "DCW97JN1:295:D1B5AACXX:6:1213:17550:51242" ,
+ "HWI-ST526:219:C16B2ACXX:1:2312:4528:65210" ,
+ "DCW97JN1:295:D1B5AACXX:3:1209:5045:36182" ,
+ "DCW97JN1:295:D1B5AACXX:6:2309:19872:67179" ,
+ "DCW97JN1:295:D1B5AACXX:4:2216:7610:96187" ,
+ "DCW97JN1:295:D1B5AACXX:5:1109:17596:79874" ,
+ "DCW97JN1:295:D1B5AACXX:4:1308:6864:53678" ,
+ "DCW97JN1:295:D1B5AACXX:6:1314:5251:24654" ,
+ "DCW97JN1:295:D1B5AACXX:4:1201:1616:97707" ,
+ "DCW97JN1:295:D1B5AACXX:3:2210:16730:78147" ,
+ "DCW97JN1:295:D1B5AACXX:3:1310:11233:73883" ,
+ "HWI-ST526:219:C16B2ACXX:1:2312:13402:11372" ,
+ "HWI-ST526:219:C16B2ACXX:1:2307:2775:76985" ,
+ "DCW97JN1:295:D1B5AACXX:6:2313:6436:3202" ,
+ "DCW97JN1:295:D1B5AACXX:5:1109:5309:44655" ,
+ "DCW97JN1:295:D1B5AACXX:4:1210:13752:91386" ,
+ "DCW97JN1:295:D1B5AACXX:4:1210:13752:91386" ,
+ "DCW97JN1:295:D1B5AACXX:4:2116:13414:59472" ,
+ "DCW97JN1:295:D1B5AACXX:4:2304:8591:34902" ,
+ "DCW97JN1:295:D1B5AACXX:4:1206:17734:74060" ,
+ "DCW97JN1:295:D1B5AACXX:3:1214:4999:4158" ,
+ "DCW97JN1:295:D1B5AACXX:4:2205:19357:57058");
+ assertEquals(55, readNames.size());
+ Set readNameSet = new HashSet<>();
+ for (String s : readNames) {
+ if (!readNameSet.add(s)) {
+ System.out.println("dup read: " + s);
+ }
+ }
+ assertEquals(51, readNameSet.size());
+
+
+ Accumulator acc1 = new Accumulator(12345);
+ acc1.addFailedFilterBase((byte) 'A', 4086900006971767854L);
+ acc1.addFailedFilterBase((byte) 'A', -4716832531774407080L);
+ acc1.addFailedFilterBase((byte) 'A', 6700611398151820155L);
+ acc1.addFailedFilterBase((byte) 'A', 5186962780778754199L);
+ acc1.addFailedFilterBase((byte) 'A', 5122404141805319601L);
+ acc1.addFailedFilterBase((byte) 'A', 8958843543082877714L);
+ acc1.addFailedFilterBase((byte) 'A', 8831294537041955437L);
+ acc1.addFailedFilterBase((byte) 'A', 6048261452686045528L);
+ acc1.addFailedFilterBase((byte) 'A', -27981145301803143L);
+ acc1.addFailedFilterBase((byte) 'A', -1895240219048882894L);
+ acc1.addFailedFilterBase((byte) 'A', -432780264239122912L);
+ acc1.addFailedFilterBase((byte) 'T', -1441445426238881582L);
+ acc1.addFailedFilterBase((byte) 'T', 7422662345407810992L);
+ acc1.addFailedFilterBase((byte) 'T', -27981145301803143L);
+ acc1.addFailedFilterBase((byte) 'T', -3745913329893305916L);
+ acc1.addFailedFilterBase((byte) 'T', -5872199506007072810L);
+ acc1.addFailedFilterBase((byte) 'T', 8018589148027039207L);
+ acc1.addFailedFilterBase((byte) 'T', -6926956303777753425L);
+ acc1.addFailedFilterBase((byte) 'T', -8269550325241252261L);
+ acc1.addFailedFilterBase((byte) 'T', -1441445426238881582L);
+ acc1.addFailedFilterBase((byte) 'T', -4183403039113486709L);
+ acc1.addFailedFilterBase((byte) 'T', -2252558088007944815L);
+ acc1.addFailedFilterBase((byte) 'T', 5095097565078919292L);
+ acc1.addFailedFilterBase((byte) 'T', -5149194738896197730L);
+ acc1.addFailedFilterBase((byte) 'T', -8386429281397821662L);
+ acc1.addFailedFilterBase((byte) 'T', 9018607088384783716L);
+ acc1.addFailedFilterBase((byte) 'T', -4582515424865507576L);
+ acc1.addFailedFilterBase((byte) 'T', 5006058289362654996L);
+ acc1.addFailedFilterBase((byte) 'T', 1570085767318999878L);
+ acc1.addFailedFilterBase((byte) 'T', 6902405747651132852L);
+ acc1.addFailedFilterBase((byte) 'T', 550354402160307514L);
+ acc1.addFailedFilterBase((byte) 'T', -8462486584110695116L);
+ acc1.addFailedFilterBase((byte) 'T', 3461374088017820567L);
+ acc1.addFailedFilterBase((byte) 'T', 281625484296755152L);
+
+ Accumulator acc2 = new Accumulator(12346);
+ acc2.addFailedFilterBase((byte) 'A',-1441445426238881582L);
+ acc2.addFailedFilterBase((byte) 'A',7422662345407810992L);
+ acc2.addFailedFilterBase((byte) 'A',-27981145301803143L);
+ acc2.addFailedFilterBase((byte) 'A',-3745913329893305916L);
+ acc2.addFailedFilterBase((byte) 'A',-5872199506007072810L);
+ acc2.addFailedFilterBase((byte) 'A',8018589148027039207L);
+ acc2.addFailedFilterBase((byte) 'A',-8780768932796289538L);
+ acc2.addFailedFilterBase((byte) 'A',-6926956303777753425L);
+ acc2.addFailedFilterBase((byte) 'A',-8269550325241252261L);
+ acc2.addFailedFilterBase((byte) 'A',-1441445426238881582L);
+ acc2.addFailedFilterBase((byte) 'A',-4183403039113486709L);
+ acc2.addFailedFilterBase((byte) 'A',-2252558088007944815L);
+ acc2.addFailedFilterBase((byte) 'A',5095097565078919292L);
+ acc2.addFailedFilterBase((byte) 'A',-5149194738896197730L);
+ acc2.addFailedFilterBase((byte) 'A',-8386429281397821662L);
+ acc2.addFailedFilterBase((byte) 'A',9018607088384783716L);
+ acc2.addFailedFilterBase((byte) 'A',-4582515424865507576L);
+ acc2.addFailedFilterBase((byte) 'A',5006058289362654996L);
+ acc2.addFailedFilterBase((byte) 'A',1570085767318999878L);
+ acc2.addFailedFilterBase((byte) 'A',6902405747651132852L);
+ acc2.addFailedFilterBase((byte) 'A',550354402160307514L);
+ acc2.addFailedFilterBase((byte) 'A',-8462486584110695116L);
+ acc2.addFailedFilterBase((byte) 'A',3461374088017820567L);
+ acc2.addFailedFilterBase((byte) 'A',281625484296755152L);
+ acc2.addFailedFilterBase((byte) 'T', 7172062261984156803L);
+ acc2.addFailedFilterBase((byte) 'T', 4537039380766474115L);
+ acc2.addFailedFilterBase((byte) 'T', 4086900006971767854L);
+ acc2.addFailedFilterBase((byte) 'T', -4716832531774407080L);
+ acc2.addFailedFilterBase((byte) 'T', -1351966021709934743L);
+ acc2.addFailedFilterBase((byte) 'T', -330036681603147982L);
+ acc2.addFailedFilterBase((byte) 'T', 1382422122404187806L);
+ acc2.addFailedFilterBase((byte) 'T', 6700611398151820155L);
+ acc2.addFailedFilterBase((byte) 'T', 5186962780778754199L);
+ acc2.addFailedFilterBase((byte) 'T', 5122404141805319601L);
+ acc2.addFailedFilterBase((byte) 'T', 8958843543082877714L);
+ acc2.addFailedFilterBase((byte) 'T', -3052003932123722238L);
+ acc2.addFailedFilterBase((byte) 'T', 6488180788633260269L);
+ acc2.addFailedFilterBase((byte) 'T', 8831294537041955437L);
+ acc2.addFailedFilterBase((byte) 'T', 6048261452686045528L);
+ acc2.addFailedFilterBase((byte) 'T', -27981145301803143L);
+ acc2.addFailedFilterBase((byte) 'T', 8691136542396127591L);
+ acc2.addFailedFilterBase((byte) 'T', 7868192676364940616L);
+ acc2.addFailedFilterBase((byte) 'T', -1895240219048882894L);
+ acc2.addFailedFilterBase((byte) 'T', -432780264239122912L);
+
+ List accumulators = Arrays.asList(acc1, acc2);
+ assertEquals("AT10;TA21;_A1;_T9", PipelineUtil.getFailedFilterCS(accumulators));
+ }
+
+ @Test
+ public void testGetFailedFilterCS() {
+ Accumulator acc1 = new Accumulator(12345);
+ acc1.addFailedFilterBase((byte) 'A', 2609032967337224118L);
+ acc1.addFailedFilterBase((byte) 'A', 1180519307271286853L);
+ acc1.addFailedFilterBase((byte) 'A', 3647809539552025230L);
+
+ Accumulator acc2 = new Accumulator(12346);
+ acc2.addFailedFilterBase((byte) 'A', 2609032967337224118L);
+ acc2.addFailedFilterBase((byte) 'C', 2609032967337224118L);
+ List accumulators = Arrays.asList(acc1, acc2);
+ assertEquals("A_2", PipelineUtil.getFailedFilterCS(accumulators));
+
+ acc1.addFailedFilterBase((byte) 'C', 3647809539552025230L);
+ assertEquals("A_1", PipelineUtil.getFailedFilterCS(accumulators));
+ acc1.addFailedFilterBase((byte) 'G', 555);
+ acc2.addFailedFilterBase((byte) 'G', 555);
+ assertEquals("A_1;GG1", PipelineUtil.getFailedFilterCS(accumulators));
+
+ }
}