Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions q3indel/src/au/edu/qimr/indel/Options.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ public Options(final String[] args) throws IOException, Q3IndelException {
controlVcf = getIOFromIni(iniFile, INI_SEC_IOS, "controlVcf");
} else if (runMode.equalsIgnoreCase(RUNMODE_DEFAULT)) {
String[] inputs = iniFile.get(INI_SEC_IOS).getAll("inputVcf",String[].class);
for (int i = 0; i < inputs.length; i ++) {
pindelVcfs.add(new File(inputs[i]));
}
for (String input : inputs) {
pindelVcfs.add(new File(input));
}
}

nearbyIndelWindow = Integer.parseInt( iniFile.fetch(INI_SEC_PARAM, "window.nearbyIndel"));
Expand Down Expand Up @@ -140,7 +140,7 @@ private File getIOFromIni(Ini ini, String parent, String child) throws Q3IndelEx
}

String f = ini.fetch(parent, child);
if ( StringUtils.isNullOrEmpty(f) || f.toLowerCase().equals("null")) {
if ( StringUtils.isNullOrEmpty(f) || f.equalsIgnoreCase("null")) {
return null;
}

Expand Down Expand Up @@ -236,14 +236,14 @@ public void detectBadOptions() throws Q3IndelException {
throw new Q3IndelException("FILE_EXISTS_ERROR","(control gatk vcf) " + controlVcf.getAbsolutePath());
}
} else if (RUNMODE_DEFAULT.equalsIgnoreCase(runMode)) {
if (pindelVcfs.size() == 0) {
if (pindelVcfs.isEmpty()) {
throw new Q3IndelException("INPUT_OPTION_ERROR","(pindel input vcf) not specified" );
}
for (int i = 0; i < pindelVcfs.size(); i ++) {
if ( pindelVcfs.get(i) != null && ! pindelVcfs.get(i).exists()) {
throw new Q3IndelException("FILE_EXISTS_ERROR","(control indel vcf) " + pindelVcfs.get(i).getAbsolutePath());
}
}
for (File pindelVcf : pindelVcfs) {
if (pindelVcf != null && !pindelVcf.exists()) {
throw new Q3IndelException("FILE_EXISTS_ERROR", "(control indel vcf) " + pindelVcf.getAbsolutePath());
}
}
} else {
throw new Q3IndelException("UNKNOWN_RUNMODE_ERROR", runMode);
}
Expand Down
33 changes: 14 additions & 19 deletions q3indel/src/au/edu/qimr/indel/pileup/IndelMT.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.AbstractQueue;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.CountDownLatch;
Expand Down Expand Up @@ -157,17 +153,16 @@ public void run() {
}
}
/**
* it swap SAMRecord between currentPool and nextPool. After then, the currentPool will contain all SAMRecord overlapping topPos position,
* the nextPool will contain all SAMRecord start after topPos position. All SAMRecord end before topPos position will be remvoved from both pool.
* it swap SAMRecord between currentPool and nextPool. After then, the currentPool will contain all SAMRecord overlapping topPos positions,
* the nextPool will contain all SAMRecord start after topPos position. All SAMRecord end before topPos position will be removed from both pools.
* @param topPos pileup position
* @param currentPool a list of SAMRecord overlapped previous pileup Position
* @param currentPool a list of SAMRecord overlapped the previous pileup Position
* @param nextPool a list of SAMRecord behind previous pileup Position
*/
void resetPool(IndelPosition topPos, List<SAMRecord> currentPool, List<SAMRecord> nextPool) {

List<SAMRecord> tmpCurrentPool = new ArrayList<>();
List<SAMRecord> tmpPool = new ArrayList<>();
tmpPool.addAll(nextPool);
List<SAMRecord> tmpCurrentPool = new ArrayList<>();
List<SAMRecord> tmpPool = new ArrayList<>(nextPool);

//check read record behind on current position
for (SAMRecord re : tmpPool ) {
Expand Down Expand Up @@ -237,14 +232,14 @@ public IndelMT(Options options, QLogger logger) throws IOException {
}
logger.info(indelload.getCountsNewIndel() + " indels are found from control vcf input.");
logger.info(indelload.getCountsMultiIndel() + " indels are split from multi alleles in control vcf.");
logger.info(indelload.getCountsInputLine() + " variant records exsit inside control vcf input.");
logger.info(indelload.getCountsInputMultiAlt() + " variant records with multi alleles exsits inside control vcf input.");
logger.info(indelload.getCountsInputLine() + " variant records exist inside control vcf input.");
logger.info(indelload.getCountsInputMultiAlt() + " variant records with multi alleles exists inside control vcf input.");
}
//then test second column
if (options.getTestInputVcf() != null) {
indelload.appendTestIndels(options.getTestInputVcf());
logger.info(indelload.getCountsInputLine() + " variant records exsit inside test vcf input.");
logger.info(indelload.getCountsInputMultiAlt() + " variants record with multi alleles exsits inside test vcf input.");
logger.info(indelload.getCountsInputLine() + " variant records exist inside test vcf input.");
logger.info(indelload.getCountsInputMultiAlt() + " variants record with multi alleles exists inside test vcf input.");
logger.info(indelload.getCountsMultiIndel() + " indels are split from multi alleles inside test vcf");
logger.info(indelload.getCountsNewIndel() + " new indels are found in test vcf input only.");
logger.info(indelload.getCountsOverlapIndel() + " indels are found in both control and test vcf inputs.");
Expand Down Expand Up @@ -390,7 +385,7 @@ private void writeVCF(AbstractQueue<IndelPileup> tumourQueue, AbstractQueue<Inde
}
}
}
logger.info("outputed VCF record: " + count);
logger.info("outputted VCF record: " + count);
logger.info("including somatic record: " + somaticCount);
}
}
Expand Down Expand Up @@ -470,7 +465,7 @@ private void getHeaderForIndel(VcfHeader header ) throws IOException {
header.addInfo(IndelUtils.INFO_SVTYPE, "1", "String",IndelUtils.DESCRIPTION_INFO_SVTYPE);

header.addFormat(VcfHeaderUtils.FORMAT_GENOTYPE_DETAILS, "1","String", "Genotype details: specific alleles");
header.addFormat(IndelUtils.FORMAT_ACINDEL, ".", "String", IndelUtils.DESCRIPTION_FORMAT_ACINDEL); //vcf validataion
header.addFormat(IndelUtils.FORMAT_ACINDEL, ".", "String", IndelUtils.DESCRIPTION_FORMAT_ACINDEL); //vcf validation

/*
* overwrite the AD and PL header supplied by GATK as we will have samples with no data/coverage, and a number set to 'R' for the AD field causes the validator to complain
Expand All @@ -491,7 +486,7 @@ private void getHeaderForIndel(VcfHeader header ) throws IOException {
* @return a sorted list of IndelPotion on this contig; return whole reference indels if contig is null
*/
private AbstractQueue<IndelPosition> getIndelList( SAMSequenceRecord contig) {
if (positionRecordMap == null || positionRecordMap.size() == 0) {
if (positionRecordMap == null || positionRecordMap.isEmpty()) {
return new ConcurrentLinkedQueue<>();
}

Expand All @@ -504,7 +499,7 @@ private AbstractQueue<IndelPosition> getIndelList( SAMSequenceRecord contig) {
}

//lambda expression to replace abstract method
list.sort( (IndelPosition o1, IndelPosition o2) -> o1.getChrRangePosition().compareTo( o2.getChrRangePosition()) );
list.sort(Comparator.comparing(IndelPosition::getChrRangePosition));

return new ConcurrentLinkedQueue<>(list);
}
Expand Down
10 changes: 5 additions & 5 deletions q3indel/src/au/edu/qimr/indel/pileup/IndelPileup.java
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ private List<SAMRecord> getRegionIndels(List<SAMRecord> pool, int window) {
for (CigarElement ce : cigar.getCigarElements()) {
//insertion only one base, eg, start = 100; end = 101
if (CigarOperator.I == ce.getOperator()) {
//check whether it is supporting or partical indel
//check whether it is supporting or partial indel
//if(refPos == indelStart ){
if (refPos >= indelStart && refPos <= indelEnd) {
if (type.equals(SVTYPE.DEL) ) {
Expand All @@ -122,7 +122,7 @@ private List<SAMRecord> getRegionIndels(List<SAMRecord> pool, int window) {
support = true; // refPos==indelStart=indelEnd
}
} else if (refPos > windowStart && refPos < windowEnd) {
nearby = true; //nearby insertion overlap the window
nearby = true; //nearby insertion overlaps the window
}
} else if ( CigarOperator.D == ce.getOperator()) {
//deletion overlaps variants, full/part supporting reads
Expand All @@ -133,7 +133,7 @@ private List<SAMRecord> getRegionIndels(List<SAMRecord> pool, int window) {
//indel chock have base on both side of indel region
|| (refPos <= indelStart && refPos + ce.getLength() - 1 >= indelEnd)) {
if (type.equals(SVTYPE.INS)) {
nearby = true; //nearyby deletion
nearby = true; //nearby deletion
} else {
support = true; // supporting or partial
}
Expand Down Expand Up @@ -163,7 +163,7 @@ private List<SAMRecord> getRegionIndels(List<SAMRecord> pool, int window) {

this.nearbyIndel = count;

// all nearby/faraway indle reads are removed
// all nearby/faraway indel reads are removed
return regionPool;
}

Expand Down Expand Up @@ -191,7 +191,7 @@ private int[] getCounts(List<SAMRecord> pool, String motif) {
Cigar cigar = re.getCigar();
for (CigarElement ce : cigar.getCigarElements()) {
if (CigarOperator.I == ce.getOperator() && (refPos == indelEnd && type.equals(SVTYPE.INS))) {
//if insert rePos go next cigar block after cigar.I, which is indel end position
//if insert rePos go next cigar block after cigar. I, which is indel end position
if (ce.getLength() != motif.length()) {
partialflag = true;
} else {
Expand Down
26 changes: 10 additions & 16 deletions q3indel/src/au/edu/qimr/indel/pileup/IndelPosition.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ public IndelPosition(VcfRecord re, SVTYPE type) {
position = new ChrRangePosition(fullChromosome, start, end);
}

//next job: check all vcfs are same type, start and end
//next job: check all vcfs are the same type, start and end
public IndelPosition(List<VcfRecord> res, SVTYPE type) {
this(res.get(0), type);
this(res.getFirst(), type);
//append all vcfs
vcfs.clear();
vcfs.addAll(res);
Expand Down Expand Up @@ -154,21 +154,15 @@ public List<String> getMotifs( ) {
@Override
public boolean equals(final Object o) {

if (!(o instanceof IndelPosition)) {
if (!(o instanceof IndelPosition other)) {
return false;
}

final IndelPosition other = (IndelPosition) o;

if (! this.mutationType .equals(other.mutationType)) {
return false;
}

if ( ! this.position.equals(other.position)) {

if (! this.mutationType .equals(other.mutationType)) {
return false;
}
return true;

return this.position.equals(other.position);
}

@Override
Expand Down Expand Up @@ -203,14 +197,14 @@ public VcfRecord getPileupedVcf(int index, final int gematicNNS, final float gem
return re;
}

boolean somatic = (re.getFilter().equals(ReadIndels.FILTER_SOMATIC)) ? true : false;
boolean somatic = re.getFilter().equals(ReadIndels.FILTER_SOMATIC);
if (normalPileup != null && somatic) {
if ( normalPileup.getSupportReadCount(index) > gematicNNS ) {
somatic = false;
} else if (normalPileup.getInformativeCount() > 0) {
int scount = normalPileup.getSupportReadCount(index);
int icount = normalPileup.getInformativeCount();
if ((100 * scount / icount) >= (gematicSOI * 100)) {
if (((float) (100 * scount) / icount) >= (gematicSOI * 100)) {
somatic = false;
}
}
Expand Down Expand Up @@ -285,7 +279,7 @@ public VcfRecord getPileupedVcf(int index, final int gematicNNS, final float gem
//future job should check GT column
//control always on first column and then test
List<String> field = new ArrayList<>();
field.add( 0, (genotypeField.size() > 0) ? genotypeField.get(0) + ":" + IndelUtils.FORMAT_ACINDEL : IndelUtils.FORMAT_ACINDEL );
field.add( 0, (!genotypeField.isEmpty()) ? genotypeField.get(0) + ":" + IndelUtils.FORMAT_ACINDEL : IndelUtils.FORMAT_ACINDEL );
field.add( 1, (genotypeField.size() > 1) ? genotypeField.get(1) + ":" + nd : nd);
field.add( 2, (genotypeField.size() > 2) ? genotypeField.get(2) + ":" + td : td);
re.setFormatFields( field );
Expand Down
25 changes: 12 additions & 13 deletions q3indel/src/au/edu/qimr/indel/pileup/ReadIndels.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ public class ReadIndels {
private VcfHeader header;

private static final int errRecordLimit = 100;
//counts from each input, {No of new indel, No of overlap indel, No of indels with mult Allel, No of inputs variants, No of input variants with multi Allel}
//counts from each input, {No of new indel, No of overlap indel, No of indels with multi Allele, No of inputs variants, No of input variants with multi Allele}
private final int[] counts = {0, 0, 0, 0, 0};

//here key will be uniq for indel: chr, start, end, allel
//here key will be uniq for indel: chr, start, end, allele
private final Map<VcfRecord, VcfRecord> positionRecordMap = new ConcurrentHashMap<>();

public ReadIndels( QLogger logger) {
Expand All @@ -49,7 +49,7 @@ public void appendTestIndels(File f) throws IOException {
List<String> format = vcf.getFormatFields();
if (format != null) {
while (format.size() > 2) {
format.remove(format.size() - 1);
format.removeLast();
}
vcf.setFormatFields(format);
VcfUtils.addMissingDataToFormatFields(vcf, 2);
Expand Down Expand Up @@ -94,12 +94,12 @@ private boolean mergeTestIndel(VcfRecord secVcf) {
//only keep first sample column of second vcf
List<String> secformat = secVcf.getFormatFields();
while (secformat != null && secformat.size() > 2 ) {
secformat.remove(secformat.size() - 1);
secformat.removeLast();
}

//copy secVcf with sample column "FORMAT <missing data> oriSample" only
if (existingvcf == null) {
//the test only indel always set as somatic, even gatkeTest runMode
//the test only indel always set as somatic, even gatk Test runMode
existingvcf = new VcfRecord.Builder(secVcf.getChrPosition(), secVcf.getRef())
.id(secVcf.getId()).allele(secVcf.getAlt()).filter(FILTER_SOMATIC).build();

Expand All @@ -116,15 +116,15 @@ private boolean mergeTestIndel(VcfRecord secVcf) {
return false;
} else {
//gatk mode already set filter as "." (germline) ignore pileup, since they are also appear on control vcf
//only keep first sample column of exsiting vcf
//only keep first sample column of existing vcf
List<String> format1 = existingvcf.getFormatFields();

if (format1 != null) {
while (format1.size() > 2 ) {
format1.remove(format1.size() - 1);
format1.removeLast();
}
existingvcf.setFormatFields(format1);
//merge exiting and second vcf format, the exsitingvcf already inside map
//merge exiting and second vcf format, the existing vcf already inside map
VcfUtils.addAdditionalSampleToFormatField(existingvcf, secformat) ;
}
return true;
Expand Down Expand Up @@ -178,7 +178,7 @@ public void loadIndels(File f) throws IOException {
vcf1.setFilter(Constants.MISSING_DATA_STRING);

if (positionRecordMap.containsKey(vcf1) && (indelOverlap ++) < errRecordLimit) {
logger.warn("same variants already exsits, this one will be discard:\n" + positionRecordMap.get(vcf1).toString() );
logger.warn("same variants already exists, this one will be discard:\n" + positionRecordMap.get(vcf1).toString() );
continue; //no overwrite but just warning
}
positionRecordMap.put(vcf1, vcf1);
Expand All @@ -197,7 +197,7 @@ public void loadIndels(File f) throws IOException {
}

/**
* change the input vcf by putting '.' on "GT" field if there are multi Alleles existis;
* change the input vcf by putting '.' on "GT" field if multi Alleles exists;
* do nothing if not multi Alleles or not GT field on format column
* @param vcf input vcf record
*/
Expand All @@ -211,7 +211,7 @@ public void resetGenotype(VcfRecord vcf) {
//add GD to second field
VcfFormatFieldRecord[] frecords = new VcfFormatFieldRecord[format.size() - 1];
for (int i = 1; i < format.size(); i ++) {
VcfFormatFieldRecord re = new VcfFormatFieldRecord(format.get(0), format.get(i));
VcfFormatFieldRecord re = new VcfFormatFieldRecord(format.getFirst(), format.get(i));
String gd = IndelUtils.getGenotypeDetails(re, vcf.getRef(), vcf.getAlt() );
re.setField(1, VcfHeaderUtils.FORMAT_GENOTYPE_DETAILS, gd == null ? Constants.MISSING_DATA_STRING : gd);

Expand All @@ -229,7 +229,7 @@ public void resetGenotype(VcfRecord vcf) {
for (int i = 1; i < frecords.length; i++) {
//the exception shouldn't happen
if ( !frecords[i].getFormatColumnString().equals(frecords[0].getFormatColumnString())) {
throw new IllegalArgumentException("both sample column with differnt format column: \n"
throw new IllegalArgumentException("both sample column with different format column: \n"
+ frecords[0].getFormatColumnString() + "\n" + frecords[i].getFormatColumnString());
}
format.add(frecords[i].getSampleColumnString());
Expand All @@ -241,7 +241,6 @@ public void resetGenotype(VcfRecord vcf) {
/**
*
* @return a map of, key is the indel position, value is the list a vcf record on that position.
* @throws Exception
*/
public Map<ChrRangePosition, IndelPosition> getIndelMap() throws Q3IndelException {

Expand Down
Loading