Skip to content

Commit 0968b8c

Browse files
authored
Merge pull request #391 from AdamaJava/nanno_contig_sort
fix(nanno): default comparator for ChrPosition incorrectly sorting contig names
2 parents bd2a768 + 3509518 commit 0968b8c

File tree

8 files changed

+197
-165
lines changed

8 files changed

+197
-165
lines changed

qannotate/test/au/edu/qimr/qannotate/nanno/AnnotateTest.java

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ public void jsonInputs() throws IOException {
3434
createJsonInputs(inputJson, annotationSource, "blah", false, 3, 4, true);
3535

3636
AnnotationInputs ais = AnnotateUtils.getInputs(inputJson.getAbsolutePath());
37-
assertTrue(ais != null);
38-
assert ais != null;
37+
assertNotNull(ais);
3938
assertEquals(1, ais.getInputs().size());
4039

4140
List<AnnotationSource> sources = new ArrayList<>();
@@ -57,7 +56,7 @@ public void jsonInputsTSVMissingHeader() throws IOException {
5756
AnnotateUtils.populateAnnotationSources(ais, new ArrayList<>());
5857
Assert.fail();
5958
} catch (IllegalArgumentException iae) {
60-
assertEquals(true, iae.getMessage().contains("No headers for AnnotationSourceTSV!"));
59+
assertTrue(iae.getMessage().contains("No headers for AnnotationSourceTSV!"));
6160
}
6261

6362
/*
@@ -68,7 +67,7 @@ public void jsonInputsTSVMissingHeader() throws IOException {
6867
AnnotateUtils.populateAnnotationSources(ais, new ArrayList<>());
6968
Assert.fail();
7069
} catch (IllegalArgumentException iae) {
71-
assertEquals(true, iae.getMessage().contains("No headers for AnnotationSourceTSV!"));
70+
assertTrue(iae.getMessage().contains("No headers for AnnotationSourceTSV!"));
7271
}
7372

7473
/*
@@ -79,7 +78,7 @@ public void jsonInputsTSVMissingHeader() throws IOException {
7978
AnnotateUtils.populateAnnotationSources(ais, new ArrayList<>());
8079
Assert.fail();
8180
} catch (IllegalArgumentException iae) {
82-
assertEquals(true, iae.getMessage().contains("Could not find requested fields (blah) in header"));
81+
assertTrue(iae.getMessage().contains("Could not find requested fields (blah) in header"));
8382
}
8483
}
8584

@@ -169,8 +168,8 @@ public void endToEnd() throws IOException {
169168
* check output file
170169
*/
171170
List<String> lines = Files.readAllLines(Paths.get(outputFile.getAbsolutePath()));
172-
assertEquals(true, lines.contains("chr1 655650 A C C 0/1 17,15 M "));
173-
assertEquals(true, lines.contains("chr1 655652 A T T 1/1 0,46 M "));
171+
assertTrue(lines.contains("chr1 655650 A C C 0/1 17,15 M "));
172+
assertTrue(lines.contains("chr1 655652 A T T 1/1 0,46 M "));
174173

175174
}
176175

@@ -211,7 +210,7 @@ public void endToEndSnpEff() throws IOException {
211210
assertEquals("chr1\t889689\tG\tA\tA,C\t1/2\t22,4,18\tOR4F16-SAMD11\tENSG00000284662.2-ENSG00000187634.13\tintergenic_region\tintergenic_region\t\t\t\tMODIFIER\tn.889689G>A\t\t\t\t\t\"GENE\"+(\"889689G>A\"|\"889689G->A\"|\"889689G-->A\"|\"889689G/A\")", lines.get(lines.size() - 4));
212211
assertEquals("chr1\t889689\tG\tC\tA,C\t1/2\t22,4,18\tOR4F16-SAMD11\tENSG00000284662.2-ENSG00000187634.13\tintergenic_region\tintergenic_region\t\t\t\tMODIFIER\tn.889689G>C\t\t\t\t\t\"GENE\"+(\"889689G>C\"|\"889689G->C\"|\"889689G-->C\"|\"889689G/C\")", lines.get(lines.size() - 3));
213212
assertEquals("chr1\t1130186\tCAAAAAA\tC\tC,CAAAAAAAAAAAAAA\t1/2\t0,3,2\tC1orf159-TTLL10\tENSG00000131591.18-ENSG00000162571.14\tintergenic_region\tintergenic_region\t\t\t\tMODIFIER\tn.1130187_1130192delAAAAAA\t\t\t\t\t", lines.get(lines.size() - 2));
214-
assertEquals("chr1\t1130186\tCAAAAAA\tCAAAAAAAAAAAAAA\tC,CAAAAAAAAAAAAAA\t1/2\t0,3,2\tC1orf159-TTLL10\tENSG00000131591.18-ENSG00000162571.14\tintergenic_region\tintergenic_region\t\t\t\tMODIFIER\tn.1130192_1130193insAAAAAAAA\t\t\t\t\t", lines.get(lines.size() - 1));
213+
assertEquals("chr1\t1130186\tCAAAAAA\tCAAAAAAAAAAAAAA\tC,CAAAAAAAAAAAAAA\t1/2\t0,3,2\tC1orf159-TTLL10\tENSG00000131591.18-ENSG00000162571.14\tintergenic_region\tintergenic_region\t\t\t\tMODIFIER\tn.1130192_1130193insAAAAAAAA\t\t\t\t\t", lines.getLast());
215214
}
216215

217216
private int executeTest(File inputVcf, File inputJson, File outputFile, File log) throws IOException {
@@ -253,7 +252,7 @@ public static void createJsonInputs(File jsonFile, File annotationFile, String a
253252
"}"
254253
);
255254

256-
try (BufferedWriter out = new BufferedWriter(new FileWriter(jsonFile));) {
255+
try (BufferedWriter out = new BufferedWriter(new FileWriter(jsonFile))) {
257256
for (String line : data) {
258257
out.write(line + "\n");
259258
}
@@ -297,13 +296,13 @@ public void loadJSONInputs() throws IOException {
297296
"}"
298297
);
299298

300-
try (BufferedWriter out = new BufferedWriter(new FileWriter(inputJson));) {
299+
try (BufferedWriter out = new BufferedWriter(new FileWriter(inputJson))) {
301300
for (String line : data) {
302301
out.write(line + "\n");
303302
}
304303
}
305304
AnnotationInputs ais = AnnotateUtils.getInputs(inputJson.getAbsolutePath());
306-
assertEquals(true, ais.isIncludeSearchTerm());
305+
assertTrue(ais.isIncludeSearchTerm());
307306
assertEquals("test1,test2,test3", ais.getAdditionalEmptyFields());
308307
assertEquals(3, ais.getAnnotationSourceThreadCount());
309308
assertEquals("field_1,field_2,field_3", ais.getOutputFieldOrder());
@@ -324,7 +323,7 @@ static void createVcf(File f, List<String> records) throws IOException {
324323
// "chr1 60781981 rs5015226 T G . . FLANK=ACCAAGTGCCT;DP=53;FS=0.000;MQ=60.00;QD=31.16;SOR=0.730;IN=1,2;DB;HOM=0,CGAAAACCAAgTGCCTGCATT;EFF=intergenic_region(MODIFIER||||||||||1) GT:AD:CCC:CCM:DP:EOR:FF:FT:GQ:INF:NNS:OABS:QL 1/1:0,46:Germline:34:47:G0[]1[]:G5;T1:PASS:.:.:42:C0[0]1[12];G23[40.04]23[36.83]:. 1/1:0,57:Germline:34:57:G2[]0[]:G3:PASS:.:.:51:G24[39]33[39.18]:. 1/1:0,53:Germline:34:53:.:.:PASS:99:.:.:.:2418.77 1/1:0,60:Germline:34:60:.:.:PASS:99:.:.:.:2749.77"
325324
);
326325

327-
try (BufferedWriter out = new BufferedWriter(new FileWriter(f));) {
326+
try (BufferedWriter out = new BufferedWriter(new FileWriter(f))) {
328327
for (String line : data) {
329328
out.write(line + "\n");
330329
}
@@ -343,7 +342,7 @@ static void createSnpEFfAnnotationFile(File f, boolean addHeader) throws IOExcep
343342
"chr1 889689 . G A,C 679.1 PASS AC=1,1;AF=0.500,0.500;AN=2;BaseQRankSum=-2.586e+00;DP=63;ExcessHet=0.0000;FS=2.623;MLEAC=1,1;MLEAF=0.500,0.500;MQ=52.67;MQRankSum=-1.696e+00;QD=15.43;ReadPosRankSum=-9.520e-01;SOR=0.984;ANN=A|intergenic_region|MODIFIER|OR4F16-SAMD11|ENSG00000284662.2-ENSG00000187634.13|intergenic_region|ENSG00000284662.2-ENSG00000187634.13|||n.889689G>A||||||,C|intergenic_region|MODIFIER|OR4F16-SAMD11|ENSG00000284662.2-ENSG00000187634.13|intergenic_region|ENSG00000284662.2-ENSG00000187634.13|||n.889689G>C|||||| GT:AD:DP:GQ:PL 1/2:22,4,18:44:99:696,580,775,165,0,111".replaceAll("\\s+", "\t"),
344343
"chr1 1130186 . CAAAAAA C,CAAAAAAAAAAAAAA 125.02 HardFiltered AC=1,1;AF=0.500,0.500;AN=2;DP=17;ExcessHet=0.0000;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=59.72;QD=25.00;SOR=3.611;ANN=C|intergenic_region|MODIFIER|C1orf159-TTLL10|ENSG00000131591.18-ENSG00000162571.14|intergenic_region|ENSG00000131591.18-ENSG00000162571.14|||n.1130187_1130192delAAAAAA||||||,CAAAAAAAAAAAAAA|intergenic_region|MODIFIER|C1orf159-TTLL10|ENSG00000131591.18-ENSG00000162571.14|intergenic_region|ENSG00000131591.18-ENSG00000162571.14|||n.1130192_1130193insAAAAAAAA|||||| GT:AD:DP:GQ:PL 1/2:0,3,2:5:67:142,74,232,72,0,67".replaceAll("\\s+", "\t"));
345344

346-
try (BufferedWriter out = new BufferedWriter(new FileWriter(f));) {
345+
try (BufferedWriter out = new BufferedWriter(new FileWriter(f))) {
347346
if (addHeader) {
348347
out.write(header + "\n");
349348
}
@@ -368,7 +367,7 @@ static void createAnnotationFile(File f, boolean addHeader) throws IOException {
368367
"1 655652 A T M L . 1 65565 1 55428 1 OR4F56 ENSG00000186092 ENST00000641515 ENSP00000493376 A0A2U3U0J3 A0A2U3U0J3_HUMAN . . . . c.1A>C p.Met1?"
369368
);
370369

371-
try (BufferedWriter out = new BufferedWriter(new FileWriter(f));) {
370+
try (BufferedWriter out = new BufferedWriter(new FileWriter(f))) {
372371
if (addHeader) {
373372
out.write(header + "\n");
374373
}
@@ -415,7 +414,7 @@ static void createAnnotationFileIncorrectOrder(File f) throws IOException {
415414
"6 655652 A G M L . 1 65565 1 55428 1 OR4F5 ENSG00000186092 ENST00000641515 ENSP00000493376 A0A2U3U0J3 A0A2U3U0J3_HUMAN . . . . c.1A>C p.Met1?"
416415
);
417416

418-
try (BufferedWriter out = new BufferedWriter(new FileWriter(f));) {
417+
try (BufferedWriter out = new BufferedWriter(new FileWriter(f))) {
419418
out.write(header + "\n");
420419
for (String line : data) {
421420
out.write(line + "\n");

qcommon/src/org/qcmg/common/model/ReferenceNameComparator.java

Lines changed: 63 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,50 +9,82 @@
99
import java.io.Serial;
1010
import java.io.Serializable;
1111
import java.util.Comparator;
12+
import java.util.Map;
1213

1314
public class ReferenceNameComparator implements Comparator<String> , Serializable {
1415

1516
@Serial
1617
private static final long serialVersionUID = 3528840046906334666L;
17-
18-
private static final char CHR = 'c';
19-
private static final char GL = 'G';
18+
19+
private static final Map<String, Integer> STANDARD_CONTIGS = Map.ofEntries(
20+
// Numeric chromosomes
21+
Map.entry("1", 1),
22+
Map.entry("2", 2),
23+
Map.entry("3", 3),
24+
Map.entry("4", 4),
25+
Map.entry("5", 5),
26+
Map.entry("6", 6),
27+
Map.entry("7", 7),
28+
Map.entry("8", 8),
29+
Map.entry("9", 9),
30+
Map.entry("10", 10),
31+
Map.entry("11", 11),
32+
Map.entry("12", 12),
33+
Map.entry("13", 13),
34+
Map.entry("14", 14),
35+
Map.entry("15", 15),
36+
Map.entry("16", 16),
37+
Map.entry("17", 17),
38+
Map.entry("18", 18),
39+
Map.entry("19", 19),
40+
Map.entry("20", 20),
41+
Map.entry("21", 21),
42+
Map.entry("22", 22),
43+
// Special chromosomes
44+
Map.entry("X", 23),
45+
Map.entry("Y", 24),
46+
Map.entry("M", 25),
47+
Map.entry("MT", 25)
48+
);
2049

2150
@Override
2251
public int compare(String s1, String s2) {
23-
24-
final char s1FirstChar = s1.charAt(0);
25-
final char s2FirstChar = s2.charAt(0);
26-
if (CHR == s1FirstChar && CHR == s2FirstChar && isNumeric(s1.charAt(3)) && isNumeric(s2.charAt(3))) {
27-
// chr comparison - only want to compare numeric chr values here, not X,Y,MT
28-
if (s1.length() < 6 && s2.length() < 6) {
29-
return Integer.valueOf(s1.substring(3)).compareTo(Integer.valueOf(s2.substring(3)));
30-
} else {
31-
// need to cater for long chr names eg chr17_ctg5_hap1
32-
String s1Int = s1.length() < 6 ? s1.substring(3) : getIntegerFromString(s1.substring(3));
33-
String s2Int = s2.length() < 6 ? s2.substring(3) : getIntegerFromString(s2.substring(3));
34-
35-
if (s1Int.equals(s2Int)) {
36-
return s1.compareToIgnoreCase(s2);
37-
} else {
38-
return Integer.valueOf(s1Int).compareTo(Integer.valueOf(s2Int));
52+
53+
// Early equality check
54+
if (s1.equals(s2)) return 0;
55+
56+
// Strip "chr" prefix if present
57+
s1 = s1.startsWith("chr") ? s1.substring(3) : s1;
58+
s2 = s2.startsWith("chr") ? s2.substring(3) : s2;
59+
60+
// Check equality after stripping prefix
61+
if (s1.equals(s2)) return 0;
62+
63+
int s1Index = STANDARD_CONTIGS.getOrDefault(s1, 0);
64+
int s2Index = STANDARD_CONTIGS.getOrDefault(s2, 0);
65+
// Handle special chromosomes
66+
if (s1Index != 0 && s2Index != 0) {
67+
return Integer.compare(s1Index, s2Index);
68+
} else if (s1Index != 0) {
69+
return -1;
70+
} else if (s2Index != 0) {
71+
return 1;
72+
}
73+
74+
if (s1.length() > 2 && s2.length() > 2) {
75+
if (Character.isDigit(s1.charAt(0)) && Character.isDigit(s2.charAt(0))) {
76+
int i1 = Integer.parseInt(getIntegerFromString(s1));
77+
int i2 = Integer.parseInt(getIntegerFromString(s2));
78+
if (i1 != i2) {
79+
return Integer.compare(i1, i2);
3980
}
4081
}
41-
42-
} else if (GL == s1FirstChar && GL == s2FirstChar) {
43-
// GL comparison - use float as we have 'GL000192.1' values
44-
return Float.compare(Float.parseFloat(s1.substring(2)) , Float.parseFloat(s2.substring(2)));
45-
} else if (isNumeric(s1FirstChar) && isNumeric(s2FirstChar)) {
46-
return Integer.compare(Integer.parseInt(s1), Integer.parseInt(s2));
47-
} else {
48-
return s1.compareToIgnoreCase(s2);
4982
}
83+
84+
// Default string comparison for all other cases
85+
return s1.compareToIgnoreCase(s2);
5086
}
5187

52-
private boolean isNumeric(char ch) {
53-
return Character.isDigit(ch);
54-
}
55-
5688
private String getIntegerFromString(String string) {
5789
// assume number is at beginning of string
5890
// eg 17_ctg5_hap1

qcommon/test/org/qcmg/common/model/ChrPositionRefAltTest.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,7 @@ public void testComparator() {
5353
positions.add(cp3);
5454

5555
positions.sort(null);
56-
assertEquals(cp3, positions.get(0));
57-
assertEquals(cp1, positions.get(1));
58-
assertEquals(cp2, positions.get(2));
56+
assertEquals(cp2, positions.getLast());
5957

6058
// empty collection asn re-populate
6159
positions.clear();

qcommon/test/org/qcmg/common/model/ChrPositionTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@ public void testComparator() {
4747
positions.add(cp3);
4848

4949
Collections.sort(positions);
50-
assertEquals(cp3, positions.get(0));
51-
assertEquals(cp1, positions.get(1));
52-
assertEquals(cp2, positions.get(2));
50+
// assertEquals(cp3, positions.get(0));
51+
// assertEquals(cp1, positions.get(1));
52+
assertEquals(cp2, positions.getLast());
5353

5454
// empty collection asn re-populate
5555
positions.clear();

qcommon/test/org/qcmg/common/model/ReferenceNameComparatorTest.java

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,11 @@ public void compareNonChr() {
8080
if (i < 23) {
8181
Assert.assertEquals(i, Integer.parseInt(key));
8282
} else if (i == 23) {
83-
Assert.assertEquals("MT", key);
84-
} else if (i == 24) {
8583
Assert.assertEquals("X", key);
86-
} else if (i == 25) {
84+
} else if (i == 24) {
8785
Assert.assertEquals("Y", key);
86+
} else if (i == 25) {
87+
Assert.assertEquals("MT", key);
8888
}
8989
i++;
9090
}
@@ -146,7 +146,7 @@ public void testCompareMix() {
146146
map.put("chr14", 1);
147147

148148
assertEquals("chr1", map.firstKey());
149-
assertEquals("chrY", map.lastKey());
149+
assertEquals("chrMT", map.lastKey());
150150
}
151151

152152
@Test
@@ -167,7 +167,7 @@ public void testCompareMix2() {
167167
map.put("chr14", 1);
168168

169169
assertEquals("chr2", map.firstKey());
170-
assertEquals("chrY", map.lastKey());
170+
assertEquals("chrMT", map.lastKey());
171171
}
172172

173173
@Test
@@ -188,7 +188,7 @@ public void testCompareWithRandom() {
188188
map.put("chr14", 1);
189189

190190
assertEquals("chr1", map.firstKey());
191-
assertEquals("chrY", map.lastKey());
191+
assertEquals("chrMT", map.lastKey());
192192

193193
}
194194

@@ -216,9 +216,9 @@ public void testWithLongContigName() {
216216

217217
assertEquals("chr1", map.firstKey());
218218
Assert.assertTrue(map.containsKey("chr1_ctg5_hap1"));
219-
Assert.assertTrue(map.containsKey("chr17_ctg5_hap1"));
219+
Assert.assertTrue(map.containsKey("chrMT"));
220220
Assert.assertTrue(map.containsKey("chr17"));
221-
assertEquals("chrY", map.lastKey());
221+
assertEquals("chr17_ctg5_hap1", map.lastKey());
222222
}
223223

224224
@Test
@@ -258,7 +258,13 @@ public void testNewReferenceChrNames() {
258258
map.put("chr10", 1);
259259
assertEquals(4, map.size());
260260

261+
map.put("chrM", 1);
262+
map.put("chrY", 1);
263+
map.put("chrX", 1);
264+
261265
assertEquals("chr1", map.firstKey());
262266
assertEquals("chr10_gl000191_random", map.lastKey());
267+
map.put("chrXX", 1);
268+
assertEquals("chrXX", map.lastKey());
263269
}
264270
}

qmule/test/org/qcmg/qmule/WiggleFromPileupTest.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,7 @@ public void testIsPositionInBaitMultipleGffMultipleChromosomes() {
159159
gffs.add(gff6);
160160
Iterator<Gff3Record> iter = gffs.iterator();
161161

162-
Assert.assertFalse(WiggleFromPileup.isPositionInBait("chr0", 0, iter, iter.next()));
163-
Assert.assertFalse(WiggleFromPileup.isPositionInBait("chr1", 0, iter, gff1));
162+
Assert.assertFalse(WiggleFromPileup.isPositionInBait("chr1", 0, iter, iter.next()));
164163

165164
assertTrue(WiggleFromPileup.isPositionInBait("chr1", 1, iter, gff1));
166165
assertTrue(WiggleFromPileup.isPositionInBait("chr1", 10, iter, gff1));

0 commit comments

Comments
 (0)