From 8b22945bce021bd927e766996e94070fbf97973e Mon Sep 17 00:00:00 2001 From: Oliver Holmes Date: Fri, 14 Jul 2023 10:17:54 +1000 Subject: [PATCH 1/2] fix(qvisualise/qprofiler): deal with new fastq header and make qvis more robust to deal with these 2 issues here, qprofiler was not aware of this new fastq header format and so each header became a unique instrument (all 28 million of them). Qvisualise would then run out of memory trying to deal with them all. HAvave added code to deprperly deal with the new format, in qprofiler, and perhaps more importantly, addeded some limits to the size of collections that can be handled by qvisualise. --- .../qprofiler/fastq/FastqSummaryReport.java | 44 +- .../fastq/FastqSummaryReportTest.java | 124 +- .../src/org/qcmg/qvisualise/QVisualise.java | 109 +- .../qcmg/qvisualise/report/ReportBuilder.java | 1028 ++++++++--------- .../util/QProfilerCollectionsUtils.java | 168 +-- 5 files changed, 770 insertions(+), 703 deletions(-) diff --git a/qprofiler/src/org/qcmg/qprofiler/fastq/FastqSummaryReport.java b/qprofiler/src/org/qcmg/qprofiler/fastq/FastqSummaryReport.java index bbf390a89..386aa387f 100644 --- a/qprofiler/src/org/qcmg/qprofiler/fastq/FastqSummaryReport.java +++ b/qprofiler/src/org/qcmg/qprofiler/fastq/FastqSummaryReport.java @@ -36,14 +36,12 @@ public class FastqSummaryReport extends SummaryReport { private static final Integer i = Integer.MAX_VALUE; //SEQ - private final SummaryByCycleNew2 seqByCycle = new SummaryByCycleNew2(c, 512); - private Map seqLineLengths = null; + private final SummaryByCycleNew2 seqByCycle = new SummaryByCycleNew2<>(c, 512); private final QCMGAtomicLongArray seqBadReadLineLengths = new QCMGAtomicLongArray(128); private final KmersSummary kmersSummary = new KmersSummary( KmersSummary.MAX_KMERS ); //default use biggest mers length //QUAL - private final SummaryByCycleNew2 qualByCycleInteger = new SummaryByCycleNew2(i, 512); - private Map qualLineLengths = null; + private final SummaryByCycleNew2 qualByCycleInteger = new SummaryByCycleNew2<>(i, 512); private final QCMGAtomicLongArray qualBadReadLineLengths = new QCMGAtomicLongArray(128); // Header info @@ -114,8 +112,8 @@ public void toXml(Element parent) { SummaryReportUtils.lengthMapToXml(readNameElement, "QUAL_HEADERS", qualHeaders); // create the length maps here from the cycles objects - seqLineLengths = SummaryByCycleUtils.getLengthsFromSummaryByCycle(seqByCycle, getRecordsParsed()); - qualLineLengths = SummaryByCycleUtils.getLengthsFromSummaryByCycle(qualByCycleInteger, getRecordsParsed()); + Map seqLineLengths = SummaryByCycleUtils.getLengthsFromSummaryByCycle(seqByCycle, getRecordsParsed()); + Map qualLineLengths = SummaryByCycleUtils.getLengthsFromSummaryByCycle(qualByCycleInteger, getRecordsParsed()); // SEQ Element seqElement = createSubElement(element, "SEQ"); @@ -123,7 +121,7 @@ public void toXml(Element parent) { SummaryReportUtils.lengthMapToXmlTallyItem(seqElement, "LengthTally", seqLineLengths); SummaryReportUtils.lengthMapToXml(seqElement, "BadBasesInReads", seqBadReadLineLengths); - kmersSummary.toXml(seqElement,kmersSummary.MAX_KMERS); + kmersSummary.toXml(seqElement, KmersSummary.MAX_KMERS); kmersSummary.toXml(seqElement,1); //add 1-mers kmersSummary.toXml(seqElement,2); //add 2-mers kmersSummary.toXml(seqElement,3); //add 3-mers @@ -137,11 +135,6 @@ public void toXml(Element parent) { } } - /** - * Reads a row from the text file and returns it as a string - * - * @return next row in file - */ public void parseRecord(FastqRecord record) { if (null != record) { @@ -158,12 +151,19 @@ public void parseRecord(FastqRecord record) { byte[] readBases = record.getReadString().getBytes(); SummaryByCycleUtils.parseCharacterSummary(seqByCycle, readBases, reverseStrand); SummaryReportUtils.tallyBadReadsAsString(readBases, seqBadReadLineLengths); - kmersSummary.parseKmers( readBases, false ); //fastq base are all orignal forward + kmersSummary.parseKmers( readBases, false ); //fastq base are all original forward // header stuff - if (record.getReadName().contains(":")) { - String [] headerDetails = TabTokenizer.tokenize(record.getReadName(), ':'); - if (null != headerDetails && headerDetails.length > 0) { + + String headerToUse = record.getReadName(); + int spaceCount = StringUtils.getCount(headerToUse, ' '); + if (spaceCount == 2) { + headerToUse = TabTokenizer.tokenize(headerToUse, ' ')[1]; + } + + if (headerToUse.contains(":")) { + String [] headerDetails = TabTokenizer.tokenize(headerToUse, ':'); + if (headerDetails.length > 0) { //if length is equal to 10, we have the classic Casava 1.8 format @@ -180,7 +180,7 @@ public void parseRecord(FastqRecord record) { // 13051 - x // 2071 - y // 2 - 2nd in pair - if (record.getReadName().contains(" ")) { + if (headerToUse.contains(" ")) { parseFiveElementHeaderWithSpaces(headerDetails); } else { parseFiveElementHeaderNoSpaces(headerDetails); @@ -231,13 +231,13 @@ public void parseRecord(FastqRecord record) { filteredN.incrementAndGet(); } - // skip control bit for now + // skip the control bit for now // indexes if (headerLength > 9) { key = headerDetails[9]; updateMap(indexes, key); - } // thats it!! + } // that's it!! } } } @@ -292,18 +292,18 @@ void parseFiveElementHeaderWithSpaces(String [] params) { // split by space String [] firstElementParams = params[0].split(" "); if (firstElementParams.length != 2) { - throw new UnsupportedOperationException("Incorrect header format encountered in parseFiveElementHeader. Expected '@ERR091788.3104 HSQ955_155:2:1101:13051:2071/2' but recieved: " + Arrays.deepToString(params)); + throw new UnsupportedOperationException("Incorrect header format encountered in parseFiveElementHeader. Expected '@ERR091788.3104 HSQ955_155:2:1101:13051:2071/2' but received: " + Arrays.deepToString(params)); } String [] machineAndReadPosition = firstElementParams[0].split("\\."); if (machineAndReadPosition.length != 2) { - throw new UnsupportedOperationException("Incorrect header format encountered in parseFiveElementHeader. Expected '@ERR091788.3104 HSQ955_155:2:1101:13051:2071/2' but recieved: " + Arrays.deepToString(params)); + throw new UnsupportedOperationException("Incorrect header format encountered in parseFiveElementHeader. Expected '@ERR091788.3104 HSQ955_155:2:1101:13051:2071/2' but received: " + Arrays.deepToString(params)); } updateMap(instruments, machineAndReadPosition[0]); String [] flowCellAndRunId = firstElementParams[1].split("_"); if (flowCellAndRunId.length != 2) { - throw new UnsupportedOperationException("Incorrect header format encountered in parseFiveElementHeader. Expected '@ERR091788.3104 HSQ955_155:2:1101:13051:2071/2' but recieved: " + Arrays.deepToString(params)); + throw new UnsupportedOperationException("Incorrect header format encountered in parseFiveElementHeader. Expected '@ERR091788.3104 HSQ955_155:2:1101:13051:2071/2' but received: " + Arrays.deepToString(params)); } updateMap(flowCellIds, flowCellAndRunId[0]); diff --git a/qprofiler/test/org/qcmg/qprofiler/fastq/FastqSummaryReportTest.java b/qprofiler/test/org/qcmg/qprofiler/fastq/FastqSummaryReportTest.java index b65b459c2..f0255b754 100644 --- a/qprofiler/test/org/qcmg/qprofiler/fastq/FastqSummaryReportTest.java +++ b/qprofiler/test/org/qcmg/qprofiler/fastq/FastqSummaryReportTest.java @@ -1,51 +1,141 @@ package org.qcmg.qprofiler.fastq; -import static org.junit.Assert.assertEquals; import htsjdk.samtools.fastq.FastqRecord; - -import org.junit.Ignore; import org.junit.Test; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + public class FastqSummaryReportTest { @Test public void parseRecordHeader() { - FastqRecord rec = new FastqRecord("@ERR091788.1 HSQ955_155:2:1101:1473:2037/1", + FastqRecord rec = new FastqRecord("ERR091788.1 HSQ955_155:2:1101:1473:2037/1", "GGGCANCCAGCAGCCCTCGGGGCTTCTCTGTTTATGGAGTAGCCATTCTCGTATCCTTCTACTTTCTTAAACTTTCTTTCACTTACAAAAAAATAGTGGA", - "+", + "", "<@@DD#2AFFHHHCCCCCCBBBBAC>:@CCCCCCBBBBAC>:@BFD=DE?B;@DBDED?DCD?BDDDBBBB + */ + FastqRecord rec = new FastqRecord("HWI-ST590:2:1201:12570:134058#0", + "AATAGTCCTAACGTTCTACATAACTTCAAGTAGTAAAATTCACCATCCTCT", + "", + ":BC8?ABCEBEB9CEBFB@BC;>BFD=DE?B;@DBDED?DCD?BDDDBBBB"); + + FastqSummaryReport report = new FastqSummaryReport(); + report.parseRecord(rec); + assertEquals(1, report.getRecordsParsed()); + assertEquals(1, report.instruments.get("HWI-ST590").intValue()); + assertTrue(report.flowCellIds.isEmpty()); + assertEquals(1, report.flowCellLanes.get("2").intValue()); + assertEquals(1, report.tileNumbers.get(1201).intValue()); + assertEquals(0, report.firstInPair.intValue()); + assertEquals(0, report.secondInPair.intValue()); + } + @Test + public void parseHeaderNoSpace2() { + /* + @V350046278L1C001R00100004433/2 +CGCTGAAAATTGAAAGCCCGCTTGGGATAAGTGACATTAAGAACTGGCACCGACTGCAGAACCGCAATTTCCAGTTGACGCTAAGTGGGGGCTTATTTAGCACCCAGCTCTGTTTGCCAACACCCCCTGGGCATGAGAGCTCCCCAAGGG ++ +HGGCGEEGDHBBF(EH:7>>G@GH?G?F@?6>CBE?G???BBG<.F:E?CFD?@?A:#5E>5BE/>BFFD+$E,> + */ + FastqRecord rec = new FastqRecord("V350046278L1C001R00100004433/2", + "CGCTGAAAATTGAAAGCCCGCTTGGGATAAGTGACATTAAGAACTGGCACCGACTGCAGAACCGCAATTTCCAGTTGACGCTAAGTGGGGGCTTATTTAGCACCCAGCTCTGTTTGCCAACACCCCCTGGGCATGAGAGCTCCCCAAGGG", + "", + "HGGCGEEGDHBBF(EH:7>>G@GH?G?F@?6>CBE?G???BBG<.F:E?CFD?@?A:#5E>5BE/>BFFD+$E,>"); + + FastqSummaryReport report = new FastqSummaryReport(); + report.parseRecord(rec); + assertEquals(1, report.getRecordsParsed()); + assertTrue(report.instruments.isEmpty()); + assertTrue(report.flowCellIds.isEmpty()); + assertTrue(report.flowCellLanes.isEmpty()); + assertTrue(report.tileNumbers.isEmpty()); + assertEquals(0, report.firstInPair.intValue()); + assertEquals(0, report.secondInPair.intValue()); + } } diff --git a/qvisualise/src/org/qcmg/qvisualise/QVisualise.java b/qvisualise/src/org/qcmg/qvisualise/QVisualise.java index c9162c440..8de960ccd 100644 --- a/qvisualise/src/org/qcmg/qvisualise/QVisualise.java +++ b/qvisualise/src/org/qcmg/qvisualise/QVisualise.java @@ -6,19 +6,10 @@ */ package org.qcmg.qvisualise; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.util.ArrayList; -import java.util.List; - import org.qcmg.common.log.QLogger; import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.model.ProfileType; import org.qcmg.common.util.FileUtils; -import org.qcmg.common.util.LoadReferencedClasses; import org.qcmg.qvisualise.report.HTMLReportGenerator; import org.qcmg.qvisualise.report.Report; import org.qcmg.qvisualise.report.ReportBuilder; @@ -27,54 +18,74 @@ import org.w3c.dom.Element; import org.w3c.dom.NodeList; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + public class QVisualise { - - private static QLogger logger; - private static final String XML_EXTENTION = "xml"; - private static final String HTML_EXTENTION = "html"; + + private static QLogger logger; + private static final String XML_EXTENSION = "xml"; + private static final String HTML_EXTENSION = "html"; private static final String REPORT = "Report"; - + private String inputFile; - private String outputFile; + private String outputFile; private int exitStatus; - + + public static void examineFileSizeAndXmx(String inputFile) throws IOException { + long fileSize = Files.size(Paths.get(inputFile)); + long xmxSize = Runtime.getRuntime().maxMemory(); + logger.info("input file size: " + fileSize); + logger.info("supplied Xmx memory: " + xmxSize); + double ratio = (double)xmxSize / fileSize; + logger.info("memory / file size ratio: " + ratio); + if (ratio < 8) { + logger.warn("There may not be enough memory to load the xml input file. Please consider loading a smaller xml file or increasing the Xmx value."); + } + } + protected int engage() throws Exception { - Document doc = XmlReportReader.createDocumentFromFile(new File(inputFile)); - + + examineFileSizeAndXmx(inputFile); + + Document doc = XmlReportReader.createDocumentFromFile(new File(inputFile)); + if (null != doc) { - final List reportElements = new ArrayList(); - final List reports = new ArrayList(); - for (ProfileType type : ProfileType.values()) { + final List reportElements = new ArrayList<>(); + final List reports = new ArrayList<>(); + for (ProfileType type : ProfileType.values()) { if (null != doc.getElementsByTagName(type.getReportName() + REPORT).item(0)) reportElements.add(type); } - - for (ProfileType type : reportElements) { - String reportName = type.getReportName(); + + for (ProfileType type : reportElements) { + String reportName = type.getReportName(); // we could have more than 1 report of a particular type NodeList reportNL = doc.getElementsByTagName(reportName + REPORT); for (int i = 0 ; i < reportNL.getLength() ; i++) { -// Report report = ReportBuilder.buildReport(type, (Element) reportNL.item(i), i+1); Report report = ReportBuilder.buildReport(type, (Element) reportNL.item(i), i+1, (Element) doc.getElementsByTagName("qProfiler").item(0) ); - if (null != report && ! report.getTabs().isEmpty()) + if ( ! report.getTabs().isEmpty()) reports.add(report); else logger.info( "no HTML output generated by ReportBuilder.buildReport " + reportName + REPORT ); } } - + if (reports.isEmpty()) { logger.error("no qvisualise output has been generated"); exitStatus = 1; } else { HTMLReportGenerator reportGenerator = new HTMLReportGenerator(reports); String html = reportGenerator.generate(); - - BufferedWriter out = new BufferedWriter(new FileWriter(outputFile)); - try { + + try (BufferedWriter out = new BufferedWriter(new FileWriter(outputFile))) { out.write(html); - } finally { - out.close(); } } } else { @@ -89,15 +100,15 @@ protected int engage() throws Exception { /** * @param args - * @throws Exception + * @throws Exception */ public static void main(String[] args) throws Exception { QVisualise qp = new QVisualise(); int exitStatus = qp.setup(args); System.exit(exitStatus); } - - public int setup(String args[]) throws Exception{ + + public int setup(String [] args) throws Exception{ Options options = new Options(args); if (options.hasHelpOption()) { @@ -113,41 +124,29 @@ public int setup(String args[]) throws Exception{ } else { logger = QLoggerFactory.getLogger(QVisualise.class, options.getLog(), options.getLogLevel()); logger.logInitialExecutionStats("qvisualise", QVisualise.class.getPackage().getImplementationVersion(), args); - + // logger.tool("QVisualise called with following arguments: " + Arrays.deepToString(args)); - + // get list of file names inputFile = options.getInputFile(); outputFile = options.hasOutputOption() ? options.getOutputFile() : inputFile + ".html"; - + // check that input and output files are of the right type - if ( ! FileUtils.isFileTypeValid(inputFile, XML_EXTENTION)) + if ( ! FileUtils.isFileTypeValid(inputFile, XML_EXTENSION)) throw new QVisualiseException("UNSUPPORTED_FILE_TYPE", inputFile); - if ( ! FileUtils.isFileTypeValid(outputFile, HTML_EXTENTION)) + if ( ! FileUtils.isFileTypeValid(outputFile, HTML_EXTENSION)) throw new QVisualiseException("UNSUPPORTED_FILE_TYPE", outputFile); - + // now check that we can read the input and write to the output if ( ! FileUtils.canFileBeRead(inputFile)) throw new QVisualiseException("CANT_READ_INPUT_FILE", inputFile); if ( ! FileUtils.canFileBeWrittenTo(outputFile)) throw new QVisualiseException("CANT_WRITE_TO_OUTPUT_FILE", outputFile); - + // don't like empty input files if (FileUtils.isFileEmpty(inputFile)) throw new QVisualiseException("EMPTY_INPUT_FILE", inputFile); - - // setup proxy if running on qcmg-clustermk2 - try { - String host = InetAddress.getLocalHost().getHostName(); - if (host.startsWith("minion")) { - logger.info("setting proxy..."); - System.setProperty("http.proxyHost", "proxy.imb.uq.edu.au"); - System.setProperty("http.proxyPort", "3128"); - } - } catch (UnknownHostException e) { - logger.warn("could not set proxy"); - } - + logger.tool("running qVisualise with input file: " + inputFile + " and outputFile: " + outputFile); return engage(); } diff --git a/qvisualise/src/org/qcmg/qvisualise/report/ReportBuilder.java b/qvisualise/src/org/qcmg/qvisualise/report/ReportBuilder.java index b408863bb..7031b756c 100644 --- a/qvisualise/src/org/qcmg/qvisualise/report/ReportBuilder.java +++ b/qvisualise/src/org/qcmg/qvisualise/report/ReportBuilder.java @@ -6,20 +6,8 @@ */ package org.qcmg.qvisualise.report; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.TreeMap; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicLongArray; - +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.model.CigarStringComparator; import org.qcmg.common.model.MAPQMiniMatrix; import org.qcmg.common.model.ProfileType; @@ -35,8 +23,15 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; +import java.util.*; +import java.util.Map.Entry; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicLongArray; + public class ReportBuilder { - + + public static QLogger log = QLoggerFactory.getLogger(ReportBuilder.class); + private static final String ISIZE = "TLEN"; private static final String UNMAPPED = "Unmapped"; private static final String DUPLICATE = "Duplicate"; @@ -49,117 +44,109 @@ public class ReportBuilder { private static final int MIN_REPORT_HEIGHT = 540; private static final int MAX_REPORT_WIDTH = 1400; private static final int MIN_REPORT_WIDTH = 800; - + private static final String BAD_READS_DESCRIPTION = Messages.getMessage("BAD_READS_DESCRIPTION"); private static final String BAD_QUALS_DESCRIPTION = Messages.getMessage("BAD_QUALS_DESCRIPTION"); private static final String LINE_LENGTH_DESCRIPTION = Messages.getMessage("LINE_LENGTH_DESCRIPTION"); private static final String TAG_MD_DESCRIPTION = Messages.getMessage("TAG_MD_DESCRIPTION"); private static final String SUMMARY_DESCRIPTION = Messages.getMessage("SUMMARY_DESCRIPTION"); private static final String SUMMARY_NOTES = Messages.getMessage("SUMMARY_NOTES"); - + public static Report buildReport(ProfileType type, Element reportElement, int reportNumberId, Element qProfilerElement) throws QVisualiseException { final Report report = buildReport( type, reportElement, reportNumberId); - + report.setRunBy( qProfilerElement.getAttribute("run_by_user") ); report.setRunOn( qProfilerElement.getAttribute("start_time") ); report.setVersion( qProfilerElement.getAttribute("version") ); - + return report; } public static Report buildReport(ProfileType type, Element reportElement, int reportNumberId) throws QVisualiseException { final String fileName = reportElement.getAttribute("file"); - + final String recordParsed = reportElement.getAttribute("records_parsed"); final String duplicateRecordCount = reportElement.getAttribute("duplicate_records"); - final Report report = new Report(type, fileName, recordParsed, duplicateRecordCount); + final Report report = new Report(type, fileName, recordParsed, duplicateRecordCount); reportID = reportNumberId; - + switch (type) { - case BAM: - createBamHeader(reportElement, report); - createSEQ(reportElement, report); - createQUALS(reportElement, report); - createTAGS(reportElement, report); - createISIZE(reportElement, report); - createRNM(reportElement, report); - createRNEXT(reportElement, report); - createCIGAR(reportElement, report); - createMAPQ(reportElement, report); - createFLAGS(reportElement, report); - createCoverage(reportElement, report); - createMatrix(reportElement, report); - createSummary(reportElement, report); - - break; - case QUAL: - for (ChartTab ct : buildMultiTabCycles(false,"Qual", reportElement, "qual", - "QualityByCycle", "BadQualsInReads", CycleDetailUtils.getQualFileCycle(), null, true)) { - report.addTab(ct); - } - break; - case FASTA: - for (ChartTab ct : buildMultiTabCycles(false,"Fasta", reportElement, "fasta", - "ColourByCycle", "BadColoursInReads", CycleDetailUtils.getTagCSNumericCycle(), CS_COLOURS, false)) { - report.addTab(ct); - } - break; - case FASTQ: - createFastqSummary(reportElement, report); - createSEQ(reportElement, report); - createQUALS(reportElement, report); - break; - case FA: - createFaSummary(reportElement, report); - break; - case MA: - for (ChartTab ct : buildMultiTabCycles(false,"Ma", reportElement, "ma", - "ColourByCycle", "BadBasesInReads", CycleDetailUtils.getTagCSNumericCycle(), CS_COLOURS, false)) { - report.addTab(ct); - } - break; + case BAM: + createBamHeader(reportElement, report); + createSEQ(reportElement, report); + createQUALS(reportElement, report); + createTAGS(reportElement, report); + createISIZE(reportElement, report); + createRNM(reportElement, report); + createRNEXT(reportElement, report); + createCIGAR(reportElement, report); + createMAPQ(reportElement, report); + createFLAGS(reportElement, report); + createCoverage(reportElement, report); + createMatrix(reportElement, report); + createSummary(reportElement, report); + + break; + case QUAL: + for (ChartTab ct : buildMultiTabCycles(false,"Qual", reportElement, "qual", + "QualityByCycle", "BadQualsInReads", CycleDetailUtils.getQualFileCycle(), null, true)) { + report.addTab(ct); + } + break; + case FASTA: + for (ChartTab ct : buildMultiTabCycles(false,"Fasta", reportElement, "fasta", + "ColourByCycle", "BadColoursInReads", CycleDetailUtils.getTagCSNumericCycle(), CS_COLOURS, false)) { + report.addTab(ct); + } + break; + case FASTQ: + createFastqSummary(reportElement, report); + createSEQ(reportElement, report); + createQUALS(reportElement, report); + break; + case FA: + createFaSummary(reportElement, report); + break; + case MA: + for (ChartTab ct : buildMultiTabCycles(false,"Ma", reportElement, "ma", + "ColourByCycle", "BadBasesInReads", CycleDetailUtils.getTagCSNumericCycle(), CS_COLOURS, false)) { + report.addTab(ct); + } + break; } return report; } - + private static void addEntryToSummaryMap(Element reportElement, String elementName, String mapEntryName, Map> summaryMap) { final NodeList nodeList = reportElement.getElementsByTagName(elementName); - if (null != nodeList) { - final Element element = (Element) nodeList.item(0); - if (null != element) { - Map sourceMap = new HashMap<>(); - QProfilerCollectionsUtils.populateTallyItemMap(element, sourceMap, false); - - for (Entry entry : sourceMap.entrySet()) { - // get map from summaryMap - Map map = summaryMap.get(mapEntryName); - if (null == map) { - map = new HashMap(); - summaryMap.put(mapEntryName, map); - } - map.put(entry.getKey(), entry.getValue()); - } - } else { - System.out.println("null " + elementName + " element"); + final Element element = (Element) nodeList.item(0); + if (null != element) { + Map sourceMap = new HashMap<>(); + QProfilerCollectionsUtils.populateTallyItemMap(element, sourceMap, false); + for (Entry entry : sourceMap.entrySet()) { + // get map from summaryMap + Map map = summaryMap.computeIfAbsent(mapEntryName, k -> new HashMap<>()); + map.put(entry.getKey(), entry.getValue()); } } else { - System.out.println("null " + elementName + " NL"); + log.warn("null " + elementName + " element"); } } - + private static void createFaSummary(Element reportElement, Report report) { - + // setup parent tab - report.addTab(addTop100Chart(reportElement, "KMERS", "Kmer", "kmer", "Top 100 6-mers seen in reference genome", 100, true)); + report.addTab(addTop100Chart(reportElement, "KMERS", "Kmer", "kmer", "Top 100 6-mers seen in reference genome", 100)); } + private static void createFastqSummary(Element reportElement, Report report) { - + // setup parent tab ChartTab parentCT = new ChartTab("Summary", "summ" + reportID); - + // table with instrument, run ids, flow cell ids, tile numbers, etc Map> summaryMap = new LinkedHashMap<>(); - + // instruments first addEntryToSummaryMap(reportElement, "INSTRUMENTS", "Instrument", summaryMap); addEntryToSummaryMap(reportElement, "RUN_IDS", "Run Id", summaryMap); @@ -168,180 +155,167 @@ private static void createFastqSummary(Element reportElement, Report report) { addEntryToSummaryMap(reportElement, "PAIR_INFO", "Pair", summaryMap); addEntryToSummaryMap(reportElement, "FILTER_INFO", "Filter", summaryMap); addEntryToSummaryMap(reportElement, "TILE_NUMBERS", "Tile Number", summaryMap); - - + + ChartTab ct = new ChartTab("Summary", "summ" + reportID); ct.setData(HTMLReportUtils.generateGoogleDataForTableStringMapPair(summaryMap, ct.getName())); ct.setChartInfo(HTMLReportUtils.generateGoogleSingleTable(ct.getName(), 0, 1300)); - + ct.setRenderingInfo(HTMLReportUtils.generateRenderingTableInfo(ct.getName(), 1, false)); - + parentCT.addChild(ct); - ChartTab indexesTab = addTop100Chart(reportElement, "INDEXES", "Index", "index", "Top 50 indexes seen in fastq sequencing reads", 50, true); + ChartTab indexesTab = addTop100Chart(reportElement, "INDEXES", "Index", "index", "Top 50 indexes seen in fastq sequencing reads", 50); if (null != indexesTab) { parentCT.addChild(indexesTab); } - + report.addTab(parentCT); } - - private static ChartTab addTop100Chart(Element reportElement, String nodeName, String charTitle, String chartId, String description, int number, boolean logScale) { + + private static ChartTab addTop100Chart(Element reportElement, String nodeName, String charTitle, String chartId, String description, int number) { final NodeList nodeList = reportElement.getElementsByTagName(nodeName); - if (null != nodeList) { - final Element element = (Element) nodeList.item(0); - if (null != element) { - - Map map = new HashMap<>(); - QProfilerCollectionsUtils.populateTallyItemMap(element, map, false); - List list = new ArrayList<>(); - - for (Entry entry : map.entrySet()) { - list.add(entry.getValue() + "-" + entry.getKey()); - } - Collections.sort(list, new Comparator() { - @Override - public int compare(String arg0, String arg1) { - //strip the number part out of the string - int arg0Tally = Integer.parseInt(arg0.substring(0, arg0.indexOf("-"))); - int arg1Tally = Integer.parseInt(arg1.substring(0, arg1.indexOf("-"))); - return arg1Tally - arg0Tally; - } - }); - - int magicNumber = Math.min(number, list.size()); - - Map top100Entries = new LinkedHashMap<>(); - for (int i = 0 ; i < magicNumber ; i++) { - String entry = list.get(i); - - int dashIndex = entry.indexOf("-"); - String key = entry.substring(dashIndex + 1); - AtomicLong al = new AtomicLong(Long.parseLong(entry.substring(0, dashIndex))); - - top100Entries.put(key, al); - } - - final ChartTab charTab = new ChartTab(charTitle, chartId); - charTab.setData(HTMLReportUtils.generateGoogleData(top100Entries, charTab.getName(), true)); - charTab.setChartInfo(HTMLReportUtils.generateGoogleChart(charTab.getName(), - charTitle, 1200, MIN_REPORT_HEIGHT, - HTMLReportUtils.COLUMN_CHART, logScale, false)); - charTab.setDescription(description + " (total number: " + map.size() + ")"); - - return charTab; - } else { - System.out.println("null " + nodeName + " Element"); + final Element element = (Element) nodeList.item(0); + if (null != element) { + + Map map = new HashMap<>(); + QProfilerCollectionsUtils.populateTallyItemMap(element, map, false); + List list = new ArrayList<>(); + + for (Entry entry : map.entrySet()) { + list.add(entry.getValue() + "-" + entry.getKey()); } + list.sort((arg0, arg1) -> { + //strip the number part out of the string + int arg0Tally = Integer.parseInt(arg0.substring(0, arg0.indexOf("-"))); + int arg1Tally = Integer.parseInt(arg1.substring(0, arg1.indexOf("-"))); + return arg1Tally - arg0Tally; + }); + + int magicNumber = Math.min(number, list.size()); + + Map top100Entries = new LinkedHashMap<>(); + for (int i = 0 ; i < magicNumber ; i++) { + String entry = list.get(i); + + int dashIndex = entry.indexOf("-"); + String key = entry.substring(dashIndex + 1); + AtomicLong al = new AtomicLong(Long.parseLong(entry.substring(0, dashIndex))); + + top100Entries.put(key, al); + } + + final ChartTab charTab = new ChartTab(charTitle, chartId); + charTab.setData(HTMLReportUtils.generateGoogleData(top100Entries, charTab.getName(), true)); + charTab.setChartInfo(HTMLReportUtils.generateGoogleChart(charTab.getName(), + charTitle, 1200, MIN_REPORT_HEIGHT, + HTMLReportUtils.COLUMN_CHART, true, false)); + charTab.setDescription(description + " (total number: " + map.size() + ")"); + + return charTab; } else { - System.out.println("null " + nodeName + "NL"); + System.out.println("null " + nodeName + " Element"); } return null; } - - + + private static void createSummary(Element reportElement, Report report) { - + final NodeList summaryNL = reportElement.getElementsByTagName("SUMMARY"); - - if (null != summaryNL) { - final Element summaryElement = (Element) summaryNL.item(0); - if (null != summaryElement) { - - Map summaryMap = new LinkedHashMap<>(); - NodeList summaryNodes = summaryElement.getChildNodes(); - - if (null != summaryNodes) { - for (int i = 0 ; i < summaryNodes.getLength() ; i++) { - - Node n = summaryNodes.item(i); - String nodeName = n.getNodeName(); - - final String startVBlock = "{v: '"; - final String endVBlock = "', p: {style: 'text-align: right'}}]}" ; - switch (nodeName) { - case "FirstInPairAveLength": - summaryMap.put("Average read length of first in pair reads", startVBlock + n.getAttributes().getNamedItem("value").getNodeValue()+ endVBlock); - break; - case "SecondInPairAveLength": - summaryMap.put("Average read length of second in pair reads", startVBlock + n.getAttributes().getNamedItem("value").getNodeValue()+ endVBlock); - break; - case "MDMismatchCycles": - int noOfCylces = Integer.parseInt(n.getAttributes().getNamedItem("value").getNodeValue()); - - String rag = "', p:{ style: 'text-align: right; background-color: "; - rag += (noOfCylces > 20) ? "tomato;'}}]}" : (noOfCylces > 10) ? "yellow;'}}]}" : "palegreen;'}}]}" ; - - summaryMap.put("Number of cycles with >1% mismatches", startVBlock + noOfCylces+ rag); - break; - case "Failed_Secondary_Supplementary": - summaryMap.put("Discarded reads (FailedVendorQuality, secondary, supplementary)", startVBlock + n.getAttributes().getNamedItem("value").getNodeValue()+ endVBlock); - break; - case "inputedReads": - summaryMap.put("Total inputed reads including counted and discarded reads", startVBlock + n.getAttributes().getNamedItem("value").getNodeValue()+ endVBlock); - break; - } - } - - ChartTab ct = new ChartTab("Summary", "summ" + reportID); - String str = HTMLReportUtils.generateGoogleDataForTableStringMap(summaryMap, ct.getName()+1, "Property", "Value" ); - - //add RG table - final NodeList isizeNL = reportElement.getElementsByTagName("ISIZE"); - final Element isizeElement = (isizeNL == null)? null : (Element) isizeNL.item(0); - - summaryMap = createRgMap( summaryElement, isizeElement ); //coding - String[] arr = summaryMap.remove("Read Group").split(","); //table header - arr[0] = ct.getName()+2; - str += HTMLReportUtils.generateGoogleDataForTableStringMap(summaryMap, arr ); - ct.setData(str); - - str = HTMLReportUtils.generateGoogleSingleTable(ct.getName() + 1, 0, 600); - str += HTMLReportUtils.generateGoogleSingleTable(ct.getName() + 2, 0, null); - ct.setChartInfo(str); - - str = "\n
" - +"

" - +"

" - + HTMLReportUtils.generateDescriptionButton(ct.getName()+1, SUMMARY_NOTES, "Summary Notes") - + HTMLReportUtils.generateDescriptionButton(ct.getName()+2, SUMMARY_DESCRIPTION, "Column Description") - + "
"; - ct.setRenderingInfo( str ); - - // add summary report to the front - List chartTabs = report.getTabs(); - chartTabs.add(0, ct); - report.setTabs(chartTabs); - } else { - System.out.println("summaryNodes was null"); + + final Element summaryElement = (Element) summaryNL.item(0); + if (null != summaryElement) { + + Map summaryMap = new LinkedHashMap<>(); + NodeList summaryNodes = summaryElement.getChildNodes(); + + for (int i = 0; i < summaryNodes.getLength(); i++) { + + Node n = summaryNodes.item(i); + String nodeName = n.getNodeName(); + + final String startVBlock = "{v: '"; + final String endVBlock = "', p: {style: 'text-align: right'}}]}"; + switch (nodeName) { + case "FirstInPairAveLength": + summaryMap.put("Average read length of first in pair reads", startVBlock + n.getAttributes().getNamedItem("value").getNodeValue() + endVBlock); + break; + case "SecondInPairAveLength": + summaryMap.put("Average read length of second in pair reads", startVBlock + n.getAttributes().getNamedItem("value").getNodeValue() + endVBlock); + break; + case "MDMismatchCycles": + int noOfCylces = Integer.parseInt(n.getAttributes().getNamedItem("value").getNodeValue()); + + String rag = "', p:{ style: 'text-align: right; background-color: "; + rag += (noOfCylces > 20) ? "tomato;'}}]}" : (noOfCylces > 10) ? "yellow;'}}]}" : "palegreen;'}}]}"; + + summaryMap.put("Number of cycles with >1% mismatches", startVBlock + noOfCylces + rag); + break; + case "Failed_Secondary_Supplementary": + summaryMap.put("Discarded reads (FailedVendorQuality, secondary, supplementary)", startVBlock + n.getAttributes().getNamedItem("value").getNodeValue() + endVBlock); + break; + case "inputedReads": + summaryMap.put("Total inputed reads including counted and discarded reads", startVBlock + n.getAttributes().getNamedItem("value").getNodeValue() + endVBlock); + break; } } + + ChartTab ct = new ChartTab("Summary", "summ" + reportID); + String str = HTMLReportUtils.generateGoogleDataForTableStringMap(summaryMap, ct.getName()+1, "Property", "Value" ); + + //add RG table + final NodeList isizeNL = reportElement.getElementsByTagName("ISIZE"); + final Element isizeElement = (Element) isizeNL.item(0); + + summaryMap = createRgMap( summaryElement, isizeElement ); //coding + String[] arr = summaryMap.remove("Read Group").split(","); //table header + arr[0] = ct.getName()+2; + str += HTMLReportUtils.generateGoogleDataForTableStringMap(summaryMap, arr ); + ct.setData(str); + + str = HTMLReportUtils.generateGoogleSingleTable(ct.getName() + 1, 0, 600); + str += HTMLReportUtils.generateGoogleSingleTable(ct.getName() + 2, 0, null); + ct.setChartInfo(str); + + str = "\n
" + +"

" + +"

" + + HTMLReportUtils.generateDescriptionButton(ct.getName()+1, SUMMARY_NOTES, "Summary Notes") + + HTMLReportUtils.generateDescriptionButton(ct.getName()+2, SUMMARY_DESCRIPTION, "Column Description") + + "
"; + ct.setRenderingInfo( str ); + + // add summary report to the front + List chartTabs = report.getTabs(); + chartTabs.add(0, ct); + report.setTabs(chartTabs); } } - + private static void createMatrix(Element reportElement, Report report) { - + final ChartTab matrixParent = new ChartTab("MAPQ Matricies"); - + //Matricies final NodeList matrixCMNL = reportElement.getElementsByTagName("MAPQMatrixCM"); final Element matrixCMElement = (Element) matrixCMNL.item(0); ChartTab ct = createMatrixChartTab(matrixCMElement, "CM Matrix", "cm"); if (null != ct) matrixParent.addChild(ct); - + NodeList matrixSMNL = reportElement.getElementsByTagName("MAPQMatrixSM"); Element matrixSMElement = (Element) matrixSMNL.item(0); ct = createMatrixChartTab(matrixSMElement, "SM Matrix", "sm"); if (null != ct) matrixParent.addChild(ct); - + NodeList matrixLengthNL = reportElement.getElementsByTagName("MAPQMatrixLength"); Element matrixLengthElement = (Element) matrixLengthNL.item(0); ct = createMatrixChartTab(matrixLengthElement, "Length Matix", "len"); if (null != ct) matrixParent.addChild(ct); - + if ( ! matrixParent.getChildren().isEmpty()) report.addTab(matrixParent); } @@ -367,42 +341,42 @@ private static void createFLAGS(Element reportElement, Report report) { final Element flagElement = (Element) flagNL.item(0); final ChartTab flagTab = new ChartTab( FLAG ); Map flags = getMapFromElement(flagElement); - - Map flagsKeyChange = new LinkedHashMap(); + + Map flagsKeyChange = new LinkedHashMap<>(); for (Entry entry : flags.entrySet()) { - String[] flagStirngArray = entry.getKey().split(", "); - flagsKeyChange.put((flagStirngArray.length > 1 ? flagStirngArray[1] + ", ": "") + flagStirngArray[0], entry.getValue()); + String[] flagStringArray = entry.getKey().split(", "); + flagsKeyChange.put((flagStringArray.length > 1 ? flagStringArray[1] + ", ": "") + flagStringArray[0], entry.getValue()); } - - flagTab.addChild(getChartTabFromMap( "Flag", "fl", HTMLReportUtils.BAR_CHART, true, flagsKeyChange) ); - + + flagTab.addChild(getChartTabFromMap( "Flag", "fl", HTMLReportUtils.BAR_CHART, flagsKeyChange) ); + // duplicates - final Map dupMap = new HashMap(); + final Map dupMap = new HashMap<>(); dupMap.put("d", "Duplicates"); final Map duplicateFlags = QProfilerCollectionsUtils.splitFlagTallyByDistinguisher(flags, dupMap, "Singletons"); - flagTab.addChild(getChartTabFromMap(DUPLICATE, "fld", HTMLReportUtils.PIE_CHART, true, duplicateFlags)); + flagTab.addChild(getChartTabFromMap(DUPLICATE, "fld", HTMLReportUtils.PIE_CHART, duplicateFlags)); // vendor check - final Map vendorMap = new HashMap(); + final Map vendorMap = new HashMap<>(); vendorMap.put("f", "Failed Vendor Check"); final Map failedFlags = QProfilerCollectionsUtils.splitFlagTallyByDistinguisher(flags, vendorMap, "Passed Vendor Check"); - flagTab.addChild(getChartTabFromMap("Vendor Check", "flf", HTMLReportUtils.PIE_CHART, true, failedFlags)); + flagTab.addChild(getChartTabFromMap("Vendor Check", "flf", HTMLReportUtils.PIE_CHART, failedFlags)); // first and second - final Map firstSecondMap = new HashMap(); + final Map firstSecondMap = new HashMap<>(); firstSecondMap.put("2", "Second"); firstSecondMap.put("1", "First"); final Map firstSecondFlags = QProfilerCollectionsUtils.splitFlagTallyByDistinguisher(flags, firstSecondMap, null); - flagTab.addChild(getChartTabFromMap("Read in Pair", "flfs", HTMLReportUtils.PIE_CHART, true, firstSecondFlags)); - - final Map mappedMap = new HashMap(); + flagTab.addChild(getChartTabFromMap("Read in Pair", "flfs", HTMLReportUtils.PIE_CHART, firstSecondFlags)); + + final Map mappedMap = new HashMap<>(); mappedMap.put("u", UNMAPPED); final Map mappedFlags = QProfilerCollectionsUtils.splitFlagTallyByDistinguisher(flags, mappedMap, "Mapped"); - flagTab.addChild(getChartTabFromMap(UNMAPPED, "flm", HTMLReportUtils.PIE_CHART, true, mappedFlags)); - - final Map primaryMap = new HashMap(); + flagTab.addChild(getChartTabFromMap(UNMAPPED, "flm", HTMLReportUtils.PIE_CHART, mappedFlags)); + + final Map primaryMap = new HashMap<>(); primaryMap.put("s", "secondary"); primaryMap.put("S", "Supplementary"); final Map primaryFlags = QProfilerCollectionsUtils.splitFlagTallyByDistinguisher(flags, primaryMap, "Primary"); - flagTab.addChild(getChartTabFromMap("Primary", "fls", HTMLReportUtils.PIE_CHART, true, primaryFlags)); + flagTab.addChild(getChartTabFromMap("Primary", "fls", HTMLReportUtils.PIE_CHART, primaryFlags)); report.addTab(flagTab); } @@ -432,108 +406,108 @@ private static void createISIZE(Element reportElement, Report report) throws QVi if (null != iSizeCT) report.addTab(iSizeCT); } - - //coverge for each chromosome and readGroup + + //coverage for each chromosome and readGroup private static void createRNM(Element reportElement, Report report) { final NodeList rnmNL = reportElement.getElementsByTagName("RNAME_POS"); final Element rnmElement = (Element) rnmNL.item(0); final NodeList nlTop = rnmElement.getElementsByTagName("RNAME"); - + /* * if we don't have the data in the xml, don't try and create a chart */ if (nlTop.getLength()<= 0) { return; } - + //get data from xml Map> contigMaps = new HashMap<>(); Map> rgCountsMaps = new HashMap<>(); - + List chromos = new ArrayList<>(); - List readGroups = null; - + List readGroups = null; + int cellingValue = 0; - for (int i = 0 , length = nlTop.getLength() ; i < length ; i++) { + for (int i = 0 , length = nlTop.getLength() ; i < length ; i++) { final Element nameElementTop = (Element) nlTop.item(i); String chromosome = nameElementTop.getAttribute("value"); int contigLength = Integer.parseInt(nameElementTop.getAttribute("maxPosition")); - + if(readGroups == null){ - readGroups = new LinkedList ( Arrays.asList(nameElementTop.getAttribute("readGroups").split(",")) ); + readGroups = new LinkedList<>( Arrays.asList(nameElementTop.getAttribute("readGroups").split(",")) ); readGroups.remove("unkown_readgroup_id"); readGroups.add(0, "Total"); } - - //viral have 6000 contig's lots of them big then 1M, it cause html can't show well on browers - //chrMT is small but special - if (null != chromosome && (contigLength > 50 * 1000 * 1000 || chromosome.toUpperCase().startsWith("CHR")) ){ + + //viral have 6000 contig's lots of them big then 1M, it causes html can't show well on browsers + //chrMT is small but special + if (contigLength > 50 * 1000 * 1000 || chromosome.toUpperCase().startsWith("CHR")){ chromos.add(chromosome); - + final NodeList nl = nameElementTop.getElementsByTagName("RangeTally"); final Element nameElement = (Element) nl.item(0); - final TreeMap map = (TreeMap) createRangeTallyMap(nameElement); + final TreeMap map = (TreeMap) createRangeTallyMap(nameElement); if ( ! map.isEmpty()) contigMaps.put(chromosome, map); - + final TreeMap map1 = ( TreeMap ) createRgCountMap(nameElement); - rgCountsMaps.put(chromosome, map1); + rgCountsMaps.put(chromosome, map1); if (cellingValue == 0) { cellingValue = Integer.parseInt(nameElementTop.getAttribute("visuallCellingValue")); } } - } - + } + //tab 1 StringBuilder dataSB = new StringBuilder(); - StringBuilder chartSB = new StringBuilder(); + StringBuilder chartSB = new StringBuilder(); String tabName = "rnmref"; for (Entry> map : contigMaps.entrySet()) { - String keyWithOutPeriods = map.getKey().replace(".",""); + String keyWithOutPeriods = map.getKey().replace(".",""); dataSB.append(HTMLReportUtils.generateGoogleData(map.getValue(), tabName + keyWithOutPeriods, false)); chartSB.append(HTMLReportUtils.generateGoogleScatterChart(tabName + keyWithOutPeriods, keyWithOutPeriods, 600, MIN_REPORT_HEIGHT, true)); } - ChartTab child1 = new ChartTab( "Coverage overall" , tabName); + ChartTab child1 = new ChartTab( "Coverage overall" , tabName); child1.setData(dataSB.toString()); child1.setChartInfo(chartSB.toString()); - Collections.sort(chromos, new ReferenceNameComparator()); - child1.setRenderingInfo(HTMLReportUtils.generateRenderingTableInfo(tabName , chromos, 2)); - - //tab 2 + chromos.sort(new ReferenceNameComparator()); + child1.setRenderingInfo(HTMLReportUtils.generateRenderingTableInfo(tabName , chromos, 2)); + + //tab 2 dataSB = new StringBuilder(); chartSB = new StringBuilder(); tabName = "rnmrg"; - - readGroups.remove(0); - + + assert readGroups != null; + readGroups.remove(0); + for (Entry> map : rgCountsMaps.entrySet()) { - String keyWithOutPeriods = map.getKey().replace(".",""); - TreeMap notTotalMap = new TreeMap(); + String keyWithOutPeriods = map.getKey().replace(".",""); + TreeMap notTotalMap = new TreeMap<>(); for(Entry entry: map.getValue().entrySet()){ AtomicLongArray rgArray = new AtomicLongArray(readGroups.size()); for(int j = 1; j <= readGroups.size(); j ++) { rgArray.addAndGet(j-1, entry.getValue().get(j)); } notTotalMap.putIfAbsent(entry.getKey(), rgArray); - } - - dataSB.append( HTMLReportUtils.generateGoogleaArrayToDataTable(notTotalMap, tabName + keyWithOutPeriods, false, readGroups, false ) ); - int max = cellingValue; - String extraOption = String.format(", vAxis: { viewWindowMode:'explicit', viewWindow:{ max: %d }}, fontSize:12, legend: {position: 'right', textStyle: {color: 'blue'}}, crosshair: {trigger: 'both'}, lineWidth: 2", max); - chartSB.append(HTMLReportUtils.generateGoogleChart(tabName + keyWithOutPeriods, keyWithOutPeriods, "$(window).width()", MIN_REPORT_HEIGHT/4, false, HTMLReportUtils.LINE_CHART, + } + + dataSB.append( HTMLReportUtils.generateGoogleaArrayToDataTable(notTotalMap, tabName + keyWithOutPeriods, false, readGroups, false ) ); + String extraOption = String.format(", vAxis: { viewWindowMode:'explicit', viewWindow:{ max: %d }}, fontSize:12, legend: {position: 'right', textStyle: {color: 'blue'}}, crosshair: {trigger: 'both'}, lineWidth: 2", cellingValue); + chartSB.append(HTMLReportUtils.generateGoogleChart(tabName + keyWithOutPeriods, keyWithOutPeriods, "$(window).width()", MIN_REPORT_HEIGHT/4, false, HTMLReportUtils.LINE_CHART, null, extraOption ) ); - + } - - ChartTab child2 = new ChartTab( "Coverage by readGroup" , tabName); + + ChartTab child2 = new ChartTab( "Coverage by readGroup" , tabName); child2.setData(dataSB.toString()); child2.setChartInfo(chartSB.toString()); - child2.setRenderingInfo(HTMLReportUtils.generateRenderingTableInfo(tabName, chromos, 1)); - - ChartTab parentCT = new ChartTab("RNAME"); + child2.setRenderingInfo(HTMLReportUtils.generateRenderingTableInfo(tabName, chromos, 1)); + + ChartTab parentCT = new ChartTab("RNAME"); parentCT.addChild(child1); - parentCT.addChild(child2); - report.addTab(parentCT); + parentCT.addChild(child2); + report.addTab(parentCT); } @@ -541,7 +515,7 @@ private static void createTAGS(Element reportElement, Report report) { // TAG final NodeList tagNL = reportElement.getElementsByTagName("TAG"); final Element tagElement = (Element) tagNL.item(0); - + //TAG-CS final NodeList tagCSNL = tagElement.getElementsByTagName("CS"); if (tagCSNL.getLength() > 1) { @@ -550,7 +524,7 @@ private static void createTAGS(Element reportElement, Report report) { report.addTab(ct); } } - + //TAG-CQ final NodeList tagCQNL = tagElement.getElementsByTagName("CQ"); if (tagCQNL.getLength() > 1) { @@ -560,22 +534,22 @@ private static void createTAGS(Element reportElement, Report report) { report.addTab(ct); } } - + //TAG-RG final NodeList tagRGNL = tagElement.getElementsByTagName("RG"); final Element tagRGElement = (Element) tagRGNL.item(0); if (tagRGElement.hasChildNodes()) report.addTab(generateTallyChartTab(tagRGElement, "TAG RG", "trg", HTMLReportUtils.BAR_CHART, true)); - + //TAG-ZM final NodeList tagZMNL = tagElement.getElementsByTagName("ZM"); final Element tagZMElement = (Element) tagZMNL.item(0); if (tagZMElement.hasChildNodes()) report.addTab(generateTallyChartTab(tagZMElement, "TAG ZM", "tzm", HTMLReportUtils.SCATTER_CHART, false)); - + //TAG-MD ChartTab mdCT = createMDChartTab(tagElement); - if (null != mdCT) report.addTab(mdCT); + if (null != mdCT) report.addTab(mdCT); } private static void createQUALS(Element reportElement, Report report) { @@ -591,83 +565,81 @@ public static void createSEQ(Element reportElement, Report report) { //SEQ final NodeList seqNL = reportElement.getElementsByTagName("SEQ"); final Element seqElement = (Element) seqNL.item(0); - + //SEQ show mainMainTab is true, so only one element on the list - List tabs = buildMultiTabCycles(true,"SEQ", seqElement, "s", "BaseByCycle", "BadBasesInReads", + List tabs = buildMultiTabCycles(true,"SEQ", seqElement, "s", "BaseByCycle", "BadBasesInReads", CycleDetailUtils.getSeqCycle(), SEQ_COLOURS, false); - + //add kmers tab ChartTab parentCT = tabs.get(0); /* * check to see if we have kmer data before heading down this path */ NodeList nl = seqElement.getElementsByTagName("mers1"); - if (null != nl && nl.getLength() > 0) { + if (nl.getLength() > 0) { parentCT.addChild( createKmersTab((Element) nl.item(0), "kmer_1" ) ); } nl = seqElement.getElementsByTagName("mers2"); - if (null != nl && nl.getLength() > 0) { + if (nl.getLength() > 0) { parentCT.addChild( createKmersTab((Element) nl.item(0), "kmer_2" ) ); } nl = seqElement.getElementsByTagName("mers3"); - if (null != nl && nl.getLength() > 0) { + if (nl.getLength() > 0) { parentCT.addChild( createKmersTab((Element) nl.item(0), "kmer_3" ) ); } nl = seqElement.getElementsByTagName("mers6"); - if (null != nl && nl.getLength() > 0) { + if (nl.getLength() > 0) { parentCT.addChild( createKmersTab((Element) nl.item(0), "kmer_6" ) ); } report.addTab(parentCT); - - + + } - + private static ChartTab createKmersTab(Element mersElement, String tabTitle ){ - - Map map = new TreeMap(); + + Map map = new TreeMap<>(); NodeList nl = mersElement.getElementsByTagName("CycleTally"); Element tallyElement = (Element) nl.item(0); - + List kmers = Arrays.asList( tallyElement.getAttribute("possibleValues").split(",")); - - nl = tallyElement.getElementsByTagName("Cycle"); + + nl = tallyElement.getElementsByTagName("Cycle"); for (int i = 0, size = nl.getLength() ; i < size ; i++) { - Element e = (Element) nl.item(i); + Element e = (Element) nl.item(i); String[] sValues = e.getAttribute("counts").split(","); long[] nValues = new long[sValues.length]; for(int j = 0; j < sValues.length; j ++) nValues[j] = Long.parseLong(sValues[j]); map.put(Integer.parseInt(e.getAttribute("value")), new AtomicLongArray( nValues)); - + } - + String dataSB = HTMLReportUtils.generateGoogleaArrayToDataTable(map, tabTitle, false, kmers, false); - String chartSB = HTMLReportUtils.generateGoogleChart(tabTitle, "Kmers Distribution", "$(window).width()", MAX_REPORT_HEIGHT, false, HTMLReportUtils.LINE_CHART, "Cycle", + String chartSB = HTMLReportUtils.generateGoogleChart(tabTitle, "Kmers Distribution", "$(window).width()", MAX_REPORT_HEIGHT, false, HTMLReportUtils.LINE_CHART, "Cycle", ", vAxis: { viewWindowMode:'explicit' }, fontSize:12, legend: { position: 'right', textStyle: { color: 'blue' } }, crosshair: { trigger: 'both' }, lineWidth: 2"); ChartTab ct = new ChartTab(tabTitle, tabTitle); ct.setData( dataSB); ct.setChartInfo( chartSB); - - return ct; + + return ct; } - + private static void createBamHeader(Element reportElement, Report report) { final NodeList headerNL = reportElement.getElementsByTagName("HEADER"); - if (null != headerNL) { - final Element headerElement = (Element) headerNL.item(0); - if (null != headerElement) { - Map> headerList = QProfilerCollectionsUtils.convertHeaderTextToMap(headerElement.getTextContent()); - if ( ! headerList.isEmpty()) { - ChartTab ct = new ChartTab("BAMHeader", "head" + reportID); - ct.setData(HTMLReportUtils.generateGoogleDataForTable(headerList, ct.getName())); - String str = ""; - for(int i = 1; i<= headerList.size(); i ++) - str += HTMLReportUtils.generateGoogleSingleTable(ct.getName() + i,null, null); - ct.setChartInfo(str); - ct.setRenderingInfo(HTMLReportUtils.generateRenderingTableInfo(ct.getName(), headerList.size(),false)); - report.addTab(ct); - } + final Element headerElement = (Element) headerNL.item(0); + if (null != headerElement) { + Map> headerList = QProfilerCollectionsUtils.convertHeaderTextToMap(headerElement.getTextContent()); + if ( ! headerList.isEmpty()) { + ChartTab ct = new ChartTab("BAMHeader", "head" + reportID); + ct.setData(HTMLReportUtils.generateGoogleDataForTable(headerList, ct.getName())); + String str = ""; + for(int i = 1; i<= headerList.size(); i ++) + str += HTMLReportUtils.generateGoogleSingleTable(ct.getName() + i,null, null); + ct.setChartInfo(str); + ct.setRenderingInfo(HTMLReportUtils.generateRenderingTableInfo(ct.getName(), headerList.size(),false)); + report.addTab(ct); } } } @@ -679,7 +651,7 @@ private static ChartTab createMDChartTab(Element tagElement) { ChartTab onePercentCT = null; ChartTab forwardCT = null; ChartTab reverseCT = null; - + //TAG-MD final NodeList tagMDNL = tagElement.getElementsByTagName("MD"); final Element tagMDElement = (Element) tagMDNL.item(0); @@ -688,7 +660,7 @@ private static ChartTab createMDChartTab(Element tagElement) { // get data final SummaryByCycle cycle = QProfilerCollectionsUtils.generateSummaryByCycleFromElement(tagMDElement, "MismatchByCycle"); final Map cyclePercentages = QProfilerCollectionsUtils.generatePercentagesMapFromElement(tagMDElement, "MismatchByCycle"); - + // create cycle tab mismatchCT = new ChartTab(MD, "tmd"); mismatchCT.setData(HTMLReportUtils.generateGoogleDataCycles(cycle, mismatchCT @@ -698,10 +670,10 @@ private static ChartTab createMDChartTab(Element tagElement) { HTMLReportUtils.BAR_CHART, false, true, cycle.getPossibleValues().size(), SEQ_COLOURS)); mismatchCT.setDescription(TAG_MD_DESCRIPTION); - - + + Map sortedPercentageMap = new TreeMap<>(); - + for (Entry entry : cyclePercentages.entrySet()) { String value = entry.getValue(); double percentage = Double.parseDouble(value.substring(0, value.length() - 1)); @@ -709,12 +681,12 @@ private static ChartTab createMDChartTab(Element tagElement) { sortedPercentageMap.put(entry.getKey(), percentage); } } - + onePercentCT = new ChartTab(MD + " 1 PERC", "tmd1pc"); onePercentCT.setData(HTMLReportUtils.generateGoogleData(sortedPercentageMap, onePercentCT.getName(),false, "Error Percentage", "Cycle")); onePercentCT.setChartInfo(HTMLReportUtils.generateGoogleSingleTable(onePercentCT.getName(), 0 , 400)); } - + //TAG-MD_forward final NodeList tagMDForward = tagElement.getElementsByTagName("MD_mutation_forward"); final Element tagMDFElement = (Element) tagMDForward.item(0); @@ -725,10 +697,10 @@ private static ChartTab createMDChartTab(Element tagElement) { final Element tagMDRElement = (Element) tagMDReverse.item(0); if (null != tagMDRElement && tagMDRElement.hasChildNodes()) reverseCT = generateTallyChartTab(tagMDRElement, "Mutation Reverse Strand", "mdr", HTMLReportUtils.BAR_CHART, true, true); - - + + // deal with all charts being null, or just the forward and reverse charts being null, in which case no need for parent ct - + if (null != mismatchCT && null != forwardCT && null != reverseCT) { parentCT = new ChartTab("TAG MD"); mismatchCT.setTitle("MD mismatch"); @@ -742,7 +714,7 @@ private static ChartTab createMDChartTab(Element tagElement) { } return parentCT; } - + private static ChartTab createISizeChartTab(Element element) throws QVisualiseException { ChartTab parentCT = null; String id = "is" + reportID; @@ -758,22 +730,22 @@ private static ChartTab createISizeChartTab(Element element) throws QVisualiseEx } } int noOfReadGroups = readGroups.size(); - + if (noOfReadGroups == 0) { throw new QVisualiseException("ISIZE_ERROR"); } - + TreeMap arrayMap = new TreeMap<>(); - + int counter = 0; for (String rg : readGroups) { final NodeList nl = element.getChildNodes(); for (int k = 0 ; k < nl.getLength() ; k++) { Node n = childNodes.item(k); - if (n.getNodeName().equals("ReadGroup") && n.getAttributes().getNamedItem("id").getNodeValue().equals(rg)) { + if (n.getNodeName().equals("ReadGroup") && n.getAttributes().getNamedItem("id").getNodeValue().equals(rg)) { final Element nameElement = (Element) nl.item(k); final TreeMap map = (TreeMap) createRangeTallyMap(nameElement); - + // add map data to arrayMap for (Entry entry : map.entrySet()) { // get entry from arrayMap @@ -786,44 +758,44 @@ private static ChartTab createISizeChartTab(Element element) throws QVisualiseEx } } } - + counter++; } - + if ( ! arrayMap.isEmpty()) { - + // decide if we need to create 2 tabs if (arrayMap.lastKey() > 1500) { // split into 3 tabs // create parent parentCT = new ChartTab(ISIZE); - + // add in the summary tab String tabTitle = "0 to 1500" + (noOfReadGroups == 1 ? "" : " - Summary"); String longTitle = ISIZE + ", 0 to 1500, summed across all read groups"; final ChartTab ct1Sum = new ChartTab(tabTitle, (id + idCounter++)); - + TreeMap summaryMap = new TreeMap<>(); for (Entry entry : arrayMap.entrySet()) { summaryMap.put(entry.getKey(), QProfilerCollectionsUtils.tallyArrayValues(entry.getValue())); } - ct1Sum.setData(HTMLReportUtils.generateGoogleData( summaryMap.subMap(0, true, 1500, false), ct1Sum.getName(), false)); - ct1Sum.setChartInfo(HTMLReportUtils.generateGoogleScatterChart(ct1Sum.getName(), longTitle, 1400, MAX_REPORT_HEIGHT, true)); + ct1Sum.setData(HTMLReportUtils.generateGoogleData(summaryMap.subMap(0, true, 1500, false), ct1Sum.getName(), false)); + ct1Sum.setChartInfo(HTMLReportUtils.generateGoogleScatterChart(ct1Sum.getName(), longTitle, 1400, MAX_REPORT_HEIGHT, true)); parentCT.addChild(ct1Sum); - + if (noOfReadGroups > 1) { - + // first tab shows 0 - 1500 longTitle = ISIZE + ", 0 to 1500, split by read group"; final ChartTab ct1All = new ChartTab("0 to 1500 - All", (id + idCounter++)); - + ct1All.setData( HTMLReportUtils.generateGoogleaArrayToDataTable((arrayMap).subMap(0, true, 1500, false), ct1All.getName(), false, readGroups, true )); - ct1All.setChartInfo(HTMLReportUtils.generateGoogleChart(ct1All.getName(), longTitle, 1400+"", MAX_REPORT_HEIGHT, true, HTMLReportUtils.SCATTER_CHART, + ct1All.setChartInfo(HTMLReportUtils.generateGoogleChart(ct1All.getName(), longTitle, 1400+"", MAX_REPORT_HEIGHT, true, HTMLReportUtils.SCATTER_CHART, null, ", legend: {position: 'right', textStyle: {color: 'blue', fontSize: 14}}, crosshair: {trigger: 'both'}, pointSize: 2, lineWidth: 1")); - + parentCT.addChild(ct1All); } - + // next tab shows 0 - 5000 tabTitle = "0 to 5000" + (noOfReadGroups == 1 ? "" : " - Summary"); longTitle = ISIZE + ", 0 to 5000, summed across all read groups"; @@ -833,18 +805,18 @@ private static ChartTab createISizeChartTab(Element element) throws QVisualiseEx ct2sum.getName(), false)); ct2sum.setChartInfo(HTMLReportUtils.generateGoogleScatterChart(ct2sum.getName(), longTitle, 1400, MAX_REPORT_HEIGHT, true)); - + parentCT.addChild(ct2sum); - - if (noOfReadGroups > 1) { - + + if (noOfReadGroups > 1) { + // tab shows 0 - 5000 longTitle = ISIZE + ", 0 to 5000, split by read group"; final ChartTab ct2All = new ChartTab("0 to 5000 - All", (id + idCounter++)); - ct2All.setData(HTMLReportUtils.generateGoogleaArrayToDataTable((arrayMap).subMap(0, true, 5000, false), ct2All.getName(), false, readGroups, true) ); + ct2All.setData(HTMLReportUtils.generateGoogleaArrayToDataTable((arrayMap).subMap(0, true, 5000, false), ct2All.getName(), false, readGroups, true) ); ct2All.setChartInfo(HTMLReportUtils.generateGoogleChart(ct2All.getName(),longTitle, 1400+"", MAX_REPORT_HEIGHT, true, HTMLReportUtils.SCATTER_CHART, null, ", legend: {position: 'right', textStyle: {color: 'blue', fontSize: 14}}, crosshair: {trigger: 'both'}, pointSize: 2, lineWidth: 1")); - + parentCT.addChild(ct2All); } } else { @@ -857,20 +829,20 @@ private static ChartTab createISizeChartTab(Element element) throws QVisualiseEx } return parentCT; } - + private static ChartTab createCigarChartTab(Element element) { final String title = "CIGAR"; final String id = "cig" + reportID; final ChartTab parentCT = new ChartTab(title); final int height = 800; - + final Map cigars = getMapFromElement(element); - - final Map cigarsD = new TreeMap(new CigarStringComparator()); - final Map cigarsI = new TreeMap(new CigarStringComparator()); - final Map cigarsH = new TreeMap(new CigarStringComparator()); - final Map cigarsS = new TreeMap(new CigarStringComparator()); - + + final Map cigarsD = new TreeMap<>(new CigarStringComparator()); + final Map cigarsI = new TreeMap<>(new CigarStringComparator()); + final Map cigarsH = new TreeMap<>(new CigarStringComparator()); + final Map cigarsS = new TreeMap<>(new CigarStringComparator()); + for (String key : cigars.keySet()) { if (key.endsWith("D")) cigarsD.put(key, cigars.get(key)); @@ -880,9 +852,9 @@ else if (key.endsWith("H")) cigarsH.put(key, cigars.get(key)); else if (key.endsWith("S")) cigarsS.put(key, cigars.get(key)); - + } - + if ( ! cigarsD.isEmpty()) { final ChartTab ctd = new ChartTab("Deletions", id+"d"); ctd.setData(HTMLReportUtils.generateGoogleData( @@ -891,10 +863,10 @@ else if (key.endsWith("S")) int width = cigarsD.size() > 100 ? 1600 : 1000; ctd.setChartInfo(HTMLReportUtils.generateGoogleChart(ctd.getName(), title + ", Deletions", width, height, HTMLReportUtils.COLUMN_CHART, true, false)); - + parentCT.addChild(ctd); } - + if ( ! cigarsI.isEmpty()) { final ChartTab cti = new ChartTab("Insertions", id+"i"); cti.setData(HTMLReportUtils.generateGoogleData( @@ -903,10 +875,10 @@ else if (key.endsWith("S")) int width = cigarsI.size() > 100 ? 1600 : 1000; cti.setChartInfo(HTMLReportUtils.generateGoogleChart(cti.getName(), title + ", Insertions", width, height, HTMLReportUtils.COLUMN_CHART, true, false)); - + parentCT.addChild(cti); } - + if ( ! cigarsH.isEmpty()) { final ChartTab cth = new ChartTab("Hard clips", id+"h"); cth.setData(HTMLReportUtils.generateGoogleData( @@ -915,10 +887,10 @@ else if (key.endsWith("S")) int width = cigarsH.size() > 100 ? 1600 : 1000; cth.setChartInfo(HTMLReportUtils.generateGoogleChart(cth.getName(), title + ", Hard clips", width, height, HTMLReportUtils.COLUMN_CHART, true, false)); - + parentCT.addChild(cth); } - + if ( ! cigarsS.isEmpty()) { final ChartTab cts = new ChartTab("Soft clips", id+"s"); cts.setData(HTMLReportUtils.generateGoogleData( @@ -927,65 +899,65 @@ else if (key.endsWith("S")) int width = cigarsS.size() > 100 ? 1600 : 1000; cts.setChartInfo(HTMLReportUtils.generateGoogleChart(cts.getName(), title + ", Soft clips", width, height, HTMLReportUtils.COLUMN_CHART, true, false)); - + parentCT.addChild(cts); } - + return parentCT; } - + private static ChartTab createCoverageChartTab(Element element) { final String title = "Coverage"; final String id = "cov" + reportID; - + ChartTab parentCT = null; - + int width = MAX_REPORT_WIDTH + 200; - + final Map tallys = generateLengthsTally(element, "ValueTally"); if ( ! tallys.isEmpty()) { - + parentCT = new ChartTab(title); - - TreeMap tallysTM = new TreeMap(tallys); - final int maxCoverageValue = tallysTM.lastKey().intValue(); - + + TreeMap tallysTM = new TreeMap<>(tallys); + final int maxCoverageValue = tallysTM.lastKey(); + // want 2 tabs - standard and cumulative - + // standard // 0 - 100 no binning final ChartTab ct = new ChartTab("0-100", id+"s"); ct.setData(HTMLReportUtils.generateGoogleData(tallysTM.headMap(100, true), ct.getName(), true)); ct.setChartInfo(HTMLReportUtils.generateGoogleChart(ct.getName(), title + " - Standard, 0-100", width, MAX_REPORT_HEIGHT, HTMLReportUtils.COLUMN_CHART, true, false)); - + parentCT.addChild(ct); - + // do 0-1000, 0-10000, etc.. int multiplier = 100; while (true) { - + if (multiplier < maxCoverageValue) { - // set multplier to the next level + // set multiplier to the next level multiplier *= 10; - + final Map binMap = QProfilerCollectionsUtils.convertMapIntoBinnedMap( tallysTM.headMap(multiplier, true) , multiplier / 100, true); - + final ChartTab ctBin10 = new ChartTab("0-" + multiplier + " (binned by " + multiplier/100 + ")", id+"s"+multiplier/100); - + ctBin10.setData(HTMLReportUtils.generateGoogleData(binMap, ctBin10.getName(), true)); ctBin10.setChartInfo(HTMLReportUtils.generateGoogleChart(ctBin10.getName(), title + " - Standard, 0-" + multiplier + " (binned by " + multiplier/100 + ")", width, MAX_REPORT_HEIGHT, HTMLReportUtils.COLUMN_CHART, true, false)); - + parentCT.addChild(ctBin10); } else { break; } } - + // cumulative - Map cumulativeTallys = new TreeMap(); + Map cumulativeTallys = new TreeMap<>(); long count = 0; int i = 0; for (Entry entry : tallys.entrySet()) { @@ -993,23 +965,23 @@ private static ChartTab createCoverageChartTab(Element element) { if (i++ % 10 == 0) cumulativeTallys.put(entry.getKey(), new AtomicLong(count)); } - + ChartTab ctCumulative = new ChartTab("Cumulative", id+"c"); ctCumulative.setData(HTMLReportUtils.generateGoogleData(cumulativeTallys, ctCumulative.getName(), false)); ctCumulative.setChartInfo(HTMLReportUtils.generateGoogleScatterChart(ctCumulative.getName(), title + " - Cumulative", width, MAX_REPORT_HEIGHT, false)); - + parentCT.addChild(ctCumulative); } return parentCT; } - + private static ChartTab createMatrixChartTab(Element element, String title, String idSuffix) { String id = "mx" + reportID; - Map tallys = generateMatrixCollection(element, "ValueTally"); - - - ChartTab ct = null; + Map tallys = generateMatrixCollection(element); + + + ChartTab ct = null; if ( ! tallys.isEmpty()) { ct = new ChartTab(title, id+idSuffix); ct.setData(HTMLReportUtils.generateGoogleMatrixData(tallys, ct.getName(), true)); @@ -1018,69 +990,67 @@ private static ChartTab createMatrixChartTab(Element element, String title, Stri } return ct; } - + private static Map createRangeTallyMap(Element element) { - Map map = new TreeMap(); + Map map = new TreeMap<>(); final NodeList nl = element.getElementsByTagName("RangeTallyItem"); for (int i = 0, size = nl.getLength() ; i < size ; i++) { Element e = (Element) nl.item(i); - + int start = Integer.parseInt(e.getAttribute("start")); int end = Integer.parseInt(e.getAttribute("end")); Integer key = Math.round((float)(start + end) / 2); - + map.put(key, new AtomicLong(Integer.parseInt(e.getAttribute("count")))); } - + return map; } - - private static Map< Integer, AtomicLongArray > createRgCountMap(Element element) { -// private static Map createRgCountMap(Element element) { - //Map map = new TreeMap(); - Map map = new TreeMap(); + + private static Map< Integer, AtomicLongArray > createRgCountMap(Element element) { + Map map = new TreeMap<>(); final NodeList nl = element.getElementsByTagName("RangeTallyItem"); for (int i = 0, size = nl.getLength() ; i < size ; i++) { Element e = (Element) nl.item(i); - + int start = Integer.parseInt(e.getAttribute("start")); int end = Integer.parseInt(e.getAttribute("end")); Integer key = Math.round((float)(start + end) / 2); - + String[] sValues = e.getAttribute("rgCount").split(","); long[] nValues = new long[sValues.length+1]; for(int j = 0; j < sValues.length-1; j ++) nValues[j+1] = Long.parseLong(sValues[j].replace(" ", "")); - nValues[0] = Long.parseLong(e.getAttribute("count")); //total coverage - + nValues[0] = Long.parseLong(e.getAttribute("count")); //total coverage + map.put(key, new AtomicLongArray( nValues)); } - + return map; } - - private static ChartTab generateTallyChartTab(Element element, String name, - String id, String chartType, boolean isValueString) { + + private static ChartTab generateTallyChartTab(Element element, String name, + String id, String chartType, boolean isValueString) { return generateTallyChartTab(element, name, id, chartType, isValueString, false); } - - private static ChartTab generateTallyChartTab(Element element, String name, - String id, String chartType, boolean isValueString, boolean turnOffLogScale) { + + private static ChartTab generateTallyChartTab(Element element, String name, + String id, String chartType, boolean isValueString, boolean turnOffLogScale) { Map cycleCount = getMapFromElement(element); return getChartTabFromMap(name, id, chartType, isValueString, cycleCount, turnOffLogScale); } - + private static ChartTab getChartTabFromMap(String name, String id, - String chartType, boolean isValueString, - Map cycleCount) { - return getChartTabFromMap(name, id, chartType, isValueString, cycleCount, false); + String chartType, + Map cycleCount) { + return getChartTabFromMap(name, id, chartType, true, cycleCount, false); } private static ChartTab getChartTabFromMap(String name, String id, - String chartType, boolean isValueString, - Map cycleCount, boolean turnOffLogScale) { - + String chartType, boolean isValueString, + Map cycleCount, boolean turnOffLogScale) { + ChartTab ct = new ChartTab(name, id + reportID); int width = MIN_REPORT_WIDTH; int height = MIN_REPORT_HEIGHT; @@ -1092,41 +1062,41 @@ private static ChartTab getChartTabFromMap(String name, String id, width = MAX_REPORT_WIDTH - 100; height = MAX_REPORT_HEIGHT + 300; } - } - + } + ct.setData(HTMLReportUtils.generateGoogleData(cycleCount, ct.getName(), isValueString)); - + if (HTMLReportUtils.SCATTER_CHART.equals(chartType)) { - + ct.setChartInfo(HTMLReportUtils.generateGoogleScatterChart( - ct.getName(), name + " Tally", width, height, ! turnOffLogScale)); - + ct.getName(), name + " Tally", width, height, ! turnOffLogScale)); + } else { - + ct.setChartInfo(HTMLReportUtils.generateGoogleChart( ct.getName(), name + " Tally", width, height, chartType, ! turnOffLogScale, false)); } - + return ct; } private static Map getMapFromElement(Element element) { - final Map cycleCount = new LinkedHashMap(); - + final Map cycleCount = new LinkedHashMap<>(); + // get ValueTally final NodeList nl = element.getElementsByTagName("ValueTally"); final Element nameElement = (Element) nl.item(0); - + QProfilerCollectionsUtils.populateTallyItemMap(nameElement, cycleCount, false); return cycleCount; } private static Map generateLengthsTally(Element tabElement, - String name) { - - final Map lengths = new LinkedHashMap(); + String name) { + + final Map lengths = new LinkedHashMap<>(); if (null != tabElement) { NodeList nl = tabElement.getElementsByTagName(name); //FIXME hack to get around the "NEW" tag added to lengths @@ -1134,41 +1104,40 @@ private static Map generateLengthsTally(Element tabElement, nl = tabElement.getElementsByTagName(name + "NEW"); } Element nameElement = (Element) nl.item(0); - + QProfilerCollectionsUtils.populateTallyItemMap(nameElement, lengths, true); } return lengths; } - - private static Map generateMatrixCollection(Element tabElement, - String name) { - - final Map lengths = new LinkedHashMap(); - + + private static Map generateMatrixCollection(Element tabElement) { + + final Map lengths = new LinkedHashMap<>(); + if (null != tabElement) { - NodeList nl = tabElement.getElementsByTagName(name); + NodeList nl = tabElement.getElementsByTagName("ValueTally"); //FIXME hack to get around the "NEW" tag added to lengths if (nl.getLength() == 0) { - nl = tabElement.getElementsByTagName(name + "NEW"); + nl = tabElement.getElementsByTagName("ValueTally" + "NEW"); } Element nameElement = (Element) nl.item(0); - + QProfilerCollectionsUtils.populateMatrixMap(nameElement, lengths); } return lengths; } private static Map generateValueTally(Element tabElement, - String name) { + String name) { final NodeList nl = tabElement.getElementsByTagName(name); final Element nameElement = (Element) nl.item(0); return generateLengthsTally(nameElement, "ValueTally"); } - + private static List buildMultiTabCycles(boolean showMainTab, String mainTabName, - Element tabElement, String id, String cycleName, - String badReadName, List columns, String[] colours, boolean isQualData) { - + Element tabElement, String id, String cycleName, + String badReadName, List columns, String[] colours, boolean isQualData) { + final SummaryByCycle cycle = QProfilerCollectionsUtils.generateSummaryByCycleFromElement(tabElement, cycleName); final Map lengths = generateLengthsTally(tabElement, @@ -1181,10 +1150,10 @@ private static List buildMultiTabCycles(boolean showMainTab, String ma } private static List createCyclesTabFromCollections(boolean showMainTab, String mainTabName, - String id, List columns, String[] colours, - SummaryByCycle cycle, Map lengths, - Map badReads, boolean isQualData) { - + String id, List columns, String[] colours, + SummaryByCycle cycle, Map lengths, + Map badReads, boolean isQualData) { + // create cycle tab final ChartTab cycleCT = new ChartTab("Cycles", id + "c"); cycleCT.setData(HTMLReportUtils.generateGoogleDataCycles(cycle, cycleCT @@ -1206,19 +1175,19 @@ private static List createCyclesTabFromCollections(boolean showMai MIN_REPORT_HEIGHT, HTMLReportUtils.COLUMN_CHART, true, false)); lineLengthCT.setDescription(LINE_LENGTH_DESCRIPTION); } - // + // int badReadsWidth = badReads.size() > 25 ? 1200 : 950; // create bad reads final ChartTab badReadsCT = new ChartTab("Bad Reads", id + "br"); badReadsCT.setData(HTMLReportUtils.generateGoogleData(badReads, badReadsCT.getName(), true)); badReadsCT.setChartInfo(HTMLReportUtils.generateGoogleChart(badReadsCT - .getName(), mainTabName + " Bad Reads", badReadsWidth, MIN_REPORT_HEIGHT, + .getName(), mainTabName + " Bad Reads", badReadsWidth, MIN_REPORT_HEIGHT, HTMLReportUtils.COLUMN_CHART, true, false)); badReadsCT.setDescription(isQualData ? BAD_QUALS_DESCRIPTION : BAD_READS_DESCRIPTION); - List tabs = new ArrayList(); - + List tabs = new ArrayList<>(); + if (showMainTab) { final ChartTab main = new ChartTab(mainTabName); main.addChild(cycleCT); @@ -1232,108 +1201,109 @@ private static List createCyclesTabFromCollections(boolean showMai tabs.add(lineLengthCT); tabs.add(badReadsCT); } - + return tabs; } + //xu code - private static Map createRgMap( Element summaryElement, Element isizeElement ) { - - Map duplicateMap = new HashMap<>(); + private static Map createRgMap( Element summaryElement, Element isizeElement ) { + + Map duplicateMap = new HashMap<>(); Map maxLengthMap = new HashMap<>(); Map aveLengthMap = new HashMap<>(); Map totalReadsMap = new LinkedHashMap<>(); - Map unmappedMap = new HashMap<>(); - Map nonCanonicalMap = new HashMap<>(); + Map unmappedMap = new HashMap<>(); + Map nonCanonicalMap = new HashMap<>(); Map isizeMap = new HashMap<>(); Map hardClipMap = new HashMap<>(); Map softClipMap = new HashMap<>(); Map overlapMap = new HashMap<>(); Map lostMap = new HashMap<>(); Map trimmedMap = new HashMap<>(); - + //isize - NodeList isizeNodes = isizeElement.getElementsByTagName("ReadGroup"); - if (null != isizeNodes) - for (int i = 0 ; i < isizeNodes.getLength() ; i++){ - String rg = isizeNodes.item(i).getAttributes().getNamedItem("id").getNodeValue(); - String modal = isizeNodes.item(i).getAttributes().getNamedItem("ModalISize").getNodeValue(); - isizeMap.put( rg, modal); - } - + NodeList isizeNodes = isizeElement.getElementsByTagName("ReadGroup"); + for (int i = 0 ; i < isizeNodes.getLength() ; i++){ + String rg = isizeNodes.item(i).getAttributes().getNamedItem("id").getNodeValue(); + String modal = isizeNodes.item(i).getAttributes().getNamedItem("ModalISize").getNodeValue(); + isizeMap.put( rg, modal); + } + //reads information - NodeList readsChildren = ( (Element) summaryElement.getElementsByTagName("Reads").item(0) ).getElementsByTagName("ReadGroup"); - - int rgNum = (null != readsChildren)? readsChildren.getLength() : 0; - for (int i = 0 ; i < rgNum ; i++){ + NodeList readsChildren = ( (Element) summaryElement.getElementsByTagName("Reads").item(0) ).getElementsByTagName("ReadGroup"); + + int rgNum = readsChildren.getLength(); + for (int i = 0 ; i < rgNum ; i++){ String rg = readsChildren.item(i).getAttributes().getNamedItem("id").getNodeValue(); - //a NodeList of all descendant Elements - NodeList rgNodes = ((Element) readsChildren.item(i)).getElementsByTagName("*"); - + //a NodeList of all descendant Elements + NodeList rgNodes = ((Element) readsChildren.item(i)).getElementsByTagName("*"); + for(int j = 0; j < rgNodes.getLength(); j ++){ String nodeName = rgNodes.item(j).getNodeName(); NamedNodeMap nodeMap = rgNodes.item(j).getAttributes(); String percentage = (nodeMap.getNamedItem("percentage") != null )? nodeMap.getNamedItem("percentage").getNodeValue() : - (nodeMap.getNamedItem("basePercentage") != null )? nodeMap.getNamedItem("basePercentage").getNodeValue() : null; + (nodeMap.getNamedItem("basePercentage") != null )? nodeMap.getNamedItem("basePercentage").getNodeValue() : null; switch (nodeName) { case "duplicate": duplicateMap.put(rg, percentage); break; - case "unmapped" : unmappedMap.put(rg,percentage); break; - case "nonCanonicalPair" : nonCanonicalMap.put(rg, percentage); ; break; - case "softClip" : softClipMap.put(rg, percentage); break; - case "hardClip" : hardClipMap.put(rg, percentage); break; - case "overlap" : overlapMap.put(rg, percentage) ; break; - case "trimmedBase" : trimmedMap.put(rg, percentage) ; break; + case "unmapped" : unmappedMap.put(rg,percentage); break; + case "nonCanonicalPair" : nonCanonicalMap.put(rg, percentage); break; + case "softClip" : softClipMap.put(rg, percentage); break; + case "hardClip" : hardClipMap.put(rg, percentage); break; + case "overlap" : overlapMap.put(rg, percentage) ; break; + case "trimmedBase" : trimmedMap.put(rg, percentage) ; break; case "overall" : { - maxLengthMap.put(rg, nodeMap.getNamedItem("maxLength").getNodeValue()); + maxLengthMap.put(rg, nodeMap.getNamedItem("maxLength").getNodeValue()); aveLengthMap.put(rg, nodeMap.getNamedItem("aveLength").getNodeValue()); totalReadsMap.put(rg, nodeMap.getNamedItem("countedReads").getNodeValue()); - lostMap.put(rg, nodeMap.getNamedItem("lostBases").getNodeValue()); - }; break; - } - } - } - + lostMap.put(rg, nodeMap.getNamedItem("lostBases").getNodeValue()); + } + break; + } + } + } + Map summaryMap = new LinkedHashMap<>(); final String startVBlock = "{v: '"; final String endVBlock = "', p: {style: 'text-align: right'}}" ; - final String finalVBlock = "]}"; - + final String finalVBlock = "]}"; + //add header line summaryMap.put("Read Group", "TableName,Read Group,Read Count,Average
Read
Length,Max
Read
Length,Mode
TLEN,Unmapped
Reads,Non-canonical
ReadPair,Duplicate
Reads," + "Within
ReadPair
Overlap,Soft
Clipping
(CIGAR),Hard
Clipping
(CIGAR),Adaptor
Trimming,Total
Bases
Lost"); - - String overallEle = null; + + String overallEle = null; for ( String rg : totalReadsMap.keySet()) { - String lostColor = endVBlock; - try{ - float lost = Float.valueOf(lostMap.get(rg).replace("%", "").trim()); - String color = (lost > 40)? "tomato":"yellow"; + String lostColor = endVBlock; + try{ + float lost = Float.parseFloat(lostMap.get(rg).replace("%", "").trim()); + String color = (lost > 40)? "tomato":"yellow"; if (lost < 20) { color = "palegreen"; } lostColor = "', p: {style: 'text-align: right; background-color:" + color +";'}}" ; - }catch(NumberFormatException e){ } //do nothing - - StringBuilder ele = new StringBuilder(startVBlock).append(totalReadsMap.get(rg)).append(endVBlock) + } catch (NumberFormatException ignored){ } //do nothing + + StringBuilder ele = new StringBuilder(startVBlock).append(totalReadsMap.get(rg)).append(endVBlock) .append(",").append(startVBlock).append(aveLengthMap.get(rg)).append(endVBlock ) - .append(",").append(startVBlock).append(maxLengthMap.get(rg)).append(endVBlock ) - .append(",").append(startVBlock).append((isizeMap.get(rg) == null ? "-" : isizeMap.get(rg) )).append(endVBlock) + .append(",").append(startVBlock).append(maxLengthMap.get(rg)).append(endVBlock) + .append(",").append(startVBlock).append((isizeMap.get(rg) == null ? "-" : isizeMap.get(rg) )).append(endVBlock) .append(",").append(startVBlock).append( unmappedMap.get(rg)).append( endVBlock ) - .append(",").append(startVBlock).append(nonCanonicalMap.get(rg)).append(endVBlock) - .append(",").append(startVBlock).append(duplicateMap.get(rg)).append(endVBlock) + .append(",").append(startVBlock).append(nonCanonicalMap.get(rg)).append(endVBlock) + .append(",").append(startVBlock).append(duplicateMap.get(rg)).append(endVBlock) .append(",").append(startVBlock).append(overlapMap.get(rg)).append( endVBlock ) .append(",").append(startVBlock).append(softClipMap.get(rg)).append(endVBlock ) - .append(",").append(startVBlock).append(hardClipMap.get(rg)).append( endVBlock) + .append(",").append(startVBlock).append(hardClipMap.get(rg)).append( endVBlock) .append(",").append(startVBlock).append(trimmedMap.get(rg)).append(endVBlock) .append(",").append(startVBlock).append(lostMap.get(rg)).append(lostColor).append(finalVBlock); if( ! rg.equals("overall")) { - summaryMap.put(rg, ele.toString()); + summaryMap.put(rg, ele.toString()); } else { - overallEle = ele.toString(); + overallEle = ele.toString(); } } summaryMap.put("overall", overallEle); return summaryMap; - } + } } diff --git a/qvisualise/src/org/qcmg/qvisualise/util/QProfilerCollectionsUtils.java b/qvisualise/src/org/qcmg/qvisualise/util/QProfilerCollectionsUtils.java index 95e6c5de1..fa3198610 100644 --- a/qvisualise/src/org/qcmg/qvisualise/util/QProfilerCollectionsUtils.java +++ b/qvisualise/src/org/qcmg/qvisualise/util/QProfilerCollectionsUtils.java @@ -18,13 +18,17 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLongArray; +import org.qcmg.common.log.QLogger; +import org.qcmg.common.log.QLoggerFactory; import org.qcmg.common.model.MAPQMiniMatrix; import org.qcmg.common.util.Constants; import org.w3c.dom.Element; import org.w3c.dom.NodeList; public class QProfilerCollectionsUtils { - + + public static QLogger log = QLoggerFactory.getLogger(QProfilerCollectionsUtils.class); + /** * Splits the flags collection * @param @@ -36,62 +40,62 @@ public class QProfilerCollectionsUtils { @SuppressWarnings("unchecked") public static Map splitFlagTallyByDistinguisher(Map flags, Map distinguisherMap, T nonDistDesc) { Map splitFlags = new LinkedHashMap<>(); - - // loop through flags, if contains the specified value, add to value + + // loop through flags, if contains the specified value, add to value for (Entry entry : flags.entrySet()) { final String[] flagSplit = ((String)entry.getKey()).split(", "); - + String flagSummaryString = ""; if (flagSplit.length == 2) { flagSummaryString = flagSplit[1]; } - - + + T value = null; for (Entry distMapEntry : distinguisherMap.entrySet()) { if (flagSummaryString.contains(distMapEntry.getKey())) { value = distMapEntry.getValue(); } } - + if (null == value) value = null != nonDistDesc ? nonDistDesc : (T) "Other"; - + final AtomicLong currentCount = splitFlags.get(value); - + if (null == currentCount) { splitFlags.put(value, new AtomicLong(entry.getValue().get())); } else { currentCount.addAndGet(entry.getValue().get()); } - + } - + return splitFlags; } - + public static SummaryByCycle generateSummaryByCycleFromElement(Element element, String name) { ConcurrentMap> tally = new ConcurrentHashMap<>(); final NodeList nl = element.getElementsByTagName(name); final Element nameElement = (Element) nl.item(0); - + if (null != nameElement) { - // now get the cycletally underneath.. + // now get the cycle tally underneath.. if (nameElement.hasChildNodes()) { Element cycleTallyElement = (Element) nameElement.getElementsByTagName("CycleTally").item(0); - + // get the cycles final NodeList cycles = cycleTallyElement.getElementsByTagName("Cycle"); for (int i = 0, size = cycles.getLength() ; i < size ; i++) { if (cycles.item(i) instanceof Element) { final Element cycleElement = (Element) cycles.item(i); - - // get tallyitems - ConcurrentMap cycleCount = new ConcurrentHashMap(); + + // get tally items + ConcurrentMap cycleCount = new ConcurrentHashMap<>(); populateTallyItemMap(cycleElement, cycleCount, false); - + tally.put(Integer.parseInt(cycleElement.getAttribute("value")), cycleCount); } @@ -99,32 +103,32 @@ public static SummaryByCycle generateSummaryByCycleFromElement(Element el } } - return new SummaryByCycle(tally); + return new SummaryByCycle<>(tally); } - + public static Map generatePercentagesMapFromElement(Element element, - String name) { - - ConcurrentMap tally = new ConcurrentHashMap(); + String name) { + + ConcurrentMap tally = new ConcurrentHashMap<>(); final NodeList nl = element.getElementsByTagName(name); final Element nameElement = (Element) nl.item(0); - - + + if (null != nameElement) { - // now get the cycletally underneath.. + // now get the cycle tally underneath.. if (nameElement.hasChildNodes()) { Element cycleTallyElement = (Element) nameElement.getElementsByTagName("CycleTally").item(0); - + // get the cycles final NodeList cycles = cycleTallyElement.getElementsByTagName("Cycle"); for (int i = 0, size = cycles.getLength() ; i < size ; i++) { if (cycles.item(i) instanceof Element) { final Element cycleElement = (Element) cycles.item(i); - - // get tallyitems + + // get tally items final NodeList tallyItemsNL = cycleElement.getElementsByTagName("TallyItem"); - - if (null != tallyItemsNL && tallyItemsNL.item(0) instanceof Element) { + + if (tallyItemsNL.item(0) instanceof Element) { Element tallyItemElement = (Element) tallyItemsNL.item(0); if (tallyItemElement.getAttribute("percent").length() > 0) { String percent = tallyItemElement.getAttribute("percent"); @@ -138,41 +142,45 @@ public static Map generatePercentagesMapFromElement(Element ele } return tally; } - + @SuppressWarnings("unchecked") public static void populateTallyItemMap(Element cycleElement, - Map map, boolean isInteger) { + Map map, boolean isInteger) { if (null != cycleElement) { - + final NodeList tallyItemsNL = cycleElement.getElementsByTagName("TallyItem"); - - for (int j = 0, size = tallyItemsNL.getLength() ; j < size ; j++) { - if (tallyItemsNL.item(j) instanceof Element) { - Element tallyItemElement = (Element) tallyItemsNL.item(j); - if (tallyItemElement - .getAttribute("count").length() > 0) { - - final long count = Long.parseLong(tallyItemElement - .getAttribute("count")); - - map.put(isInteger ? (T) Integer.valueOf(tallyItemElement.getAttribute("value")) - : (T) tallyItemElement.getAttribute("value") , new AtomicLong(count)); - + if (tallyItemsNL.getLength() < 100000) { + for (int j = 0, size = tallyItemsNL.getLength(); j < size; j++) { + if (tallyItemsNL.item(j) instanceof Element) { + Element tallyItemElement = (Element) tallyItemsNL.item(j); + if (tallyItemElement + .getAttribute("count").length() > 0) { + + final long count = Long.parseLong(tallyItemElement + .getAttribute("count")); + + map.put(isInteger ? (T) Integer.valueOf(tallyItemElement.getAttribute("value")) + : (T) tallyItemElement.getAttribute("value"), new AtomicLong(count)); + + } } } + } else { + log.warn(cycleElement.getTagName() + " has too many elements: " + tallyItemsNL.getLength() + " - will leave out of report"); + } } } - + public static void populateMatrixMap(Element cycleElement, - Map map) { + Map map) { if (null != cycleElement) { - NodeList tallyItemsNL = cycleElement.getElementsByTagName("TallyItem"); + NodeList tallyItemsNL = cycleElement.getElementsByTagName("TallyItem"); for (int j = 0, size = tallyItemsNL.getLength() ; j < size ; j++) { Element tallyItemElement = (Element) tallyItemsNL.item(j); long count = Long.parseLong(tallyItemElement .getAttribute("count")); - + //TODO must be a better way of extracting the matrix details from the string String matrix = tallyItemElement.getAttribute("value"); if (matrix.startsWith("MAPQMiniMatrix")) { @@ -181,14 +189,14 @@ public static void populateMatrixMap(Element cycleElement, int mapQ = Integer.parseInt(m[0].substring(m[0].indexOf("=")+1)); int value = Integer.parseInt(m[1].substring(m[1].indexOf("=")+1)); MAPQMiniMatrix mmm = new MAPQMiniMatrix(mapQ, value); - + map.put(mmm, new AtomicLong(count)); } - + } } } - + public static AtomicLong tallyArrayValues(AtomicLongArray array) { long l = 0; for (int i = 0 , len = array.length() ; i < len ; i++) { @@ -198,17 +206,17 @@ public static AtomicLong tallyArrayValues(AtomicLongArray array) { } /** - * Converts a map containing values and their corresponding counts into a Map that bins the values - * into ranges determined by the supplied noOfBins value and the max value in the original map, + * Converts a map containing values and their corresponding counts into a Map that bins the values + * into ranges determined by the supplied noOfBins value and the max value in the original map, * and the values are the summed values of all entries in the original map that fall within the range *
* Note that the supplied Map needs to be of type TreeMap as the method relies on the map being ordered. *
- * The returned Map contains a string as its key which corresponds to the range (eg. 0-100) + * The returned Map contains a string as its key which corresponds to the range (e.g. 0-100) *
* Note it is assumed that the lowest key value is 0
* ie. this will not work when there are negative values in the original map - * + * * @param map TreeMap map containing Integer keys and values, whose values are to be binned * @param binSize int corresponding to the number of bins that are required. The range each bin will have is dependent on the max number * @return Map of String, Integer pairs relating to the range, and number within that range @@ -220,21 +228,21 @@ public static Map convertMapIntoBinnedMap(Map(map.size() / binSize); - + + binnedMap = new LinkedHashMap<>(map.size() / binSize); + // get max number from map - map contains absolute values so getting the last key should be safe int maxValue = ((SortedMap) map).lastKey(); int minValue = ((SortedMap) map).firstKey(); - + long count = 0; int fromPosition = startFromZero ? 0 : minValue; for (int i = fromPosition ; i <= maxValue ; i++) { - + AtomicLong mi = map.get(i); if (null != mi) count += mi.get(); - + if ((i+1) % binSize == 0 || i == maxValue) { // add count to binnedMap binnedMap.put(fromPosition + " - " + i, new AtomicLong(count)); @@ -246,24 +254,24 @@ public static Map convertMapIntoBinnedMap(Map> convertHeaderTextToMap(String headerText) { if (null == headerText) return Collections.emptyMap(); - + String [] params = headerText.split("\n"); - + Map> results = new LinkedHashMap<>(8,0.9f); - + //init the map for order String[] heads = {"Header","Sequence","Read Group","Program","Comments","Other"}; for(String key : heads) { results.put(key, null); } - + if (params.length > 1) { for (String param : params) { - + String key; if (param.startsWith(Constants.HEADER_PREFIX)) { key = "Header"; @@ -307,17 +315,17 @@ public static Map> convertHeaderTextToMap(String headerText } } } - + //remove entries in map that came from the heads array and that have null values - for (String key : heads) { - if (results.get(key) == null) { - results.remove(key); - } - } - + for (String key : heads) { + if (results.get(key) == null) { + results.remove(key); + } + } + return results; } - + private static void addDataToList(Map> map, String key, String data) { map.computeIfAbsent(key, v -> new ArrayList<>()).add(data); } From 3353fd1143cfcefe30f29be9e44c9f15ddf7a974 Mon Sep 17 00:00:00 2001 From: Oliver Holmes Date: Mon, 17 Jul 2023 15:35:07 +1000 Subject: [PATCH 2/2] refactor(qprofiler qvisualise): adding comments and tweaking logging as per reviewer suggestions --- .../src/org/qcmg/qprofiler/fastq/FastqSummaryReport.java | 6 +++--- qvisualise/src/org/qcmg/qvisualise/QVisualise.java | 7 ++++++- .../qcmg/qvisualise/util/QProfilerCollectionsUtils.java | 5 +++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/qprofiler/src/org/qcmg/qprofiler/fastq/FastqSummaryReport.java b/qprofiler/src/org/qcmg/qprofiler/fastq/FastqSummaryReport.java index 386aa387f..93bfd0b87 100644 --- a/qprofiler/src/org/qcmg/qprofiler/fastq/FastqSummaryReport.java +++ b/qprofiler/src/org/qcmg/qprofiler/fastq/FastqSummaryReport.java @@ -292,18 +292,18 @@ void parseFiveElementHeaderWithSpaces(String [] params) { // split by space String [] firstElementParams = params[0].split(" "); if (firstElementParams.length != 2) { - throw new UnsupportedOperationException("Incorrect header format encountered in parseFiveElementHeader. Expected '@ERR091788.3104 HSQ955_155:2:1101:13051:2071/2' but received: " + Arrays.deepToString(params)); + throw new IllegalArgumentException("Incorrect header format encountered in parseFiveElementHeaderWithSpaces. Expected a space (e.g. @ERR091788.3104 HSQ955_155) in the first element in the array, but received: " + Arrays.deepToString(params)); } String [] machineAndReadPosition = firstElementParams[0].split("\\."); if (machineAndReadPosition.length != 2) { - throw new UnsupportedOperationException("Incorrect header format encountered in parseFiveElementHeader. Expected '@ERR091788.3104 HSQ955_155:2:1101:13051:2071/2' but received: " + Arrays.deepToString(params)); + throw new IllegalArgumentException("Incorrect header format encountered in parseFiveElementHeaderWithSpaces. Expected a single dot (e.g. @ERR091788.3104 HSQ955_155) in the first part of the first element in the array, but received: " + Arrays.deepToString(params)); } updateMap(instruments, machineAndReadPosition[0]); String [] flowCellAndRunId = firstElementParams[1].split("_"); if (flowCellAndRunId.length != 2) { - throw new UnsupportedOperationException("Incorrect header format encountered in parseFiveElementHeader. Expected '@ERR091788.3104 HSQ955_155:2:1101:13051:2071/2' but received: " + Arrays.deepToString(params)); + throw new IllegalArgumentException("Incorrect header format encountered in parseFiveElementHeaderWithSpaces. Expected a single underscore (e.g. @ERR091788.3104 HSQ955_155) in the second part of the first element in the array but received: " + Arrays.deepToString(params)); } updateMap(flowCellIds, flowCellAndRunId[0]); diff --git a/qvisualise/src/org/qcmg/qvisualise/QVisualise.java b/qvisualise/src/org/qcmg/qvisualise/QVisualise.java index 8de960ccd..bbc134114 100644 --- a/qvisualise/src/org/qcmg/qvisualise/QVisualise.java +++ b/qvisualise/src/org/qcmg/qvisualise/QVisualise.java @@ -45,8 +45,13 @@ public static void examineFileSizeAndXmx(String inputFile) throws IOException { logger.info("supplied Xmx memory: " + xmxSize); double ratio = (double)xmxSize / fileSize; logger.info("memory / file size ratio: " + ratio); + /* + Warn the user when the xml file size to Xmx setting ratio is below a certain point + (8 in testing performed using java 8 oracle JVM) + as this could result in OOM exceptions + */ if (ratio < 8) { - logger.warn("There may not be enough memory to load the xml input file. Please consider loading a smaller xml file or increasing the Xmx value."); + logger.warn("There may not be enough memory to load the xml input file. Please consider visualising a smaller xml file or increasing the Xmx value."); } } diff --git a/qvisualise/src/org/qcmg/qvisualise/util/QProfilerCollectionsUtils.java b/qvisualise/src/org/qcmg/qvisualise/util/QProfilerCollectionsUtils.java index fa3198610..ca25dcc9d 100644 --- a/qvisualise/src/org/qcmg/qvisualise/util/QProfilerCollectionsUtils.java +++ b/qvisualise/src/org/qcmg/qvisualise/util/QProfilerCollectionsUtils.java @@ -27,6 +27,7 @@ public class QProfilerCollectionsUtils { + public static final int MAX_TALLY_ITEMS_LENGTH = 100000; public static QLogger log = QLoggerFactory.getLogger(QProfilerCollectionsUtils.class); /** @@ -149,7 +150,7 @@ public static void populateTallyItemMap(Element cycleElement, if (null != cycleElement) { final NodeList tallyItemsNL = cycleElement.getElementsByTagName("TallyItem"); - if (tallyItemsNL.getLength() < 100000) { + if (tallyItemsNL.getLength() < MAX_TALLY_ITEMS_LENGTH) { for (int j = 0, size = tallyItemsNL.getLength(); j < size; j++) { if (tallyItemsNL.item(j) instanceof Element) { Element tallyItemElement = (Element) tallyItemsNL.item(j); @@ -166,7 +167,7 @@ public static void populateTallyItemMap(Element cycleElement, } } } else { - log.warn(cycleElement.getTagName() + " has too many elements: " + tallyItemsNL.getLength() + " - will leave out of report"); + log.warn(cycleElement.getTagName() + " has too many elements: " + tallyItemsNL.getLength() + " - will leave out of report (maximum number of elements set to: " + MAX_TALLY_ITEMS_LENGTH + ")"); } }