Skip to content

Commit 583c61f

Browse files
authored
Fix ArrayIndexOutOfBoundsException on second pdf import (#4426)
* Fix ArrayIndexOutOfBoundsException on second pdf import The variable formally known as i is a global variable which had -1 after the first run and therefore threw an exception * add changelog and fix * add test
1 parent 11a62dd commit 583c61f

File tree

3 files changed

+58
-38
lines changed

3 files changed

+58
-38
lines changed

CHANGELOG.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
3131
- Files without a defined external file type are now directly opened with the default application of the operating system
3232
- We streamlined the process to rename and move files by removing the confirmation dialogs.
3333
- We removed the redundant new lines of markings and wrapped the summary in the File annotation tab. [#3823](https://github.com/JabRef/jabref/issues/3823)
34-
- We add auto url formatting when user paste link to URL field in entry editor. [#254](https://github.com/koppor/jabref/issues/254)
34+
- We add auto url formatting when user paste link to URL field in entry editor. [koppor#254](https://github.com/koppor/jabref/issues/254)
3535
- We added a minimal height for the entry editor so that it can no longer be hidden by accident. [#4279](https://github.com/JabRef/jabref/issues/4279)
3636
- We added a new keyboard shortcut so that the entry editor could be closed by <kbd>Ctrl<kbd> + <kbd>E<kbd>. [#4222] (https://github.com/JabRef/jabref/issues/4222)
3737
- We added an option in the preference dialog box, that allows user to pick the dark or light theme option. [#4130] (https://github.com/JabRef/jabref/issues/4130)
@@ -74,8 +74,8 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
7474
- We fixed an issue where files added via the "Attach file" contextmenu of an entry were not made relative. [#4201](https://github.com/JabRef/jabref/issues/4201) and [#4241](https://github.com/JabRef/jabref/issues/4241)
7575
- We fixed an issue where author list parser can't generate bibtex for Chinese author. [#4169](https://github.com/JabRef/jabref/issues/4169)
7676
- We fixed an issue where the list of XMP Exclusion fields in the preferences was not be saved [#4072](https://github.com/JabRef/jabref/issues/4072)
77-
- We fixed an issue where the ArXiv Fetcher did not support HTTP URLs [#4367](https://github.com/JabRef/jabref/pull/4367)
78-
77+
- We fixed an issue where the ArXiv Fetcher did not support HTTP URLs [koppor#328](https://github.com/koppor/jabref/issues/328)
78+
- We fixed an issue where only one PDF file could be imported [#4422](https://github.com/JabRef/jabref/issues/4422)
7979

8080

8181

src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,14 @@ public class PdfContentImporter extends Importer {
4545
// input lines into several lines
4646
private String[] lines;
4747
// current index in lines
48-
private int i;
48+
private int lineIndex;
4949
private String curString;
5050
private String year;
5151

5252

5353
public PdfContentImporter(ImportFormatPreferences importFormatPreferences) {
5454
this.importFormatPreferences = importFormatPreferences;
55+
5556
}
5657
/**
5758
* Removes all non-letter characters at the end
@@ -225,17 +226,19 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
225226
// the different lines are joined into one and thereby separated by " "
226227
lines = firstPageContents.split(System.lineSeparator());
227228

229+
lineIndex = 0; //to prevent array index out of bounds exception on second run we need to reset i to zero
230+
228231
proceedToNextNonEmptyLine();
229-
if (i >= lines.length) {
232+
if (lineIndex >= lines.length) {
230233
// PDF could not be parsed or is empty
231234
// return empty list
232235
return new ParserResult();
233236
}
234237

235238
// we start at the current line
236-
curString = lines[i];
239+
curString = lines[lineIndex];
237240
// i might get incremented later and curString modified, too
238-
i = i + 1;
241+
lineIndex = lineIndex + 1;
239242

240243
String author;
241244
String editor = null;
@@ -279,10 +282,10 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
279282

280283
// after title: authors
281284
author = null;
282-
while ((i < lines.length) && !"".equals(lines[i])) {
285+
while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
283286
// author names are unlikely to be lines among different lines
284287
// treat them line by line
285-
curString = streamlineNames(lines[i]);
288+
curString = streamlineNames(lines[lineIndex]);
286289
if (author == null) {
287290
author = curString;
288291
} else {
@@ -292,38 +295,38 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
292295
author = author.concat(" and ").concat(curString);
293296
}
294297
}
295-
i++;
298+
lineIndex++;
296299
}
297300
curString = "";
298-
i++;
301+
lineIndex++;
299302

300303
// then, abstract and keywords follow
301-
while (i < lines.length) {
302-
curString = lines[i];
304+
while (lineIndex < lines.length) {
305+
curString = lines[lineIndex];
303306
if ((curString.length() >= "Abstract".length()) && "Abstract".equalsIgnoreCase(curString.substring(0, "Abstract".length()))) {
304307
if (curString.length() == "Abstract".length()) {
305308
// only word "abstract" found -- skip line
306309
curString = "";
307310
} else {
308311
curString = curString.substring("Abstract".length() + 1).trim().concat(System.lineSeparator());
309312
}
310-
i++;
313+
lineIndex++;
311314
// fillCurStringWithNonEmptyLines() cannot be used as that uses " " as line separator
312315
// whereas we need linebreak as separator
313-
while ((i < lines.length) && !"".equals(lines[i])) {
314-
curString = curString.concat(lines[i]).concat(System.lineSeparator());
315-
i++;
316+
while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
317+
curString = curString.concat(lines[lineIndex]).concat(System.lineSeparator());
318+
lineIndex++;
316319
}
317320
abstractT = curString.trim();
318-
i++;
321+
lineIndex++;
319322
} else if ((curString.length() >= "Keywords".length()) && "Keywords".equalsIgnoreCase(curString.substring(0, "Keywords".length()))) {
320323
if (curString.length() == "Keywords".length()) {
321324
// only word "Keywords" found -- skip line
322325
curString = "";
323326
} else {
324327
curString = curString.substring("Keywords".length() + 1).trim();
325328
}
326-
i++;
329+
lineIndex++;
327330
fillCurStringWithNonEmptyLines();
328331
keywords = removeNonLettersAtEnd(curString);
329332
} else {
@@ -340,18 +343,18 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
340343
}
341344
}
342345

343-
i++;
346+
lineIndex++;
344347
proceedToNextNonEmptyLine();
345348
}
346349
}
347350

348-
i = lines.length - 1;
351+
lineIndex = lines.length - 1;
349352

350353
// last block: DOI, detailed information
351354
// sometimes, this information is in the third last block etc...
352355
// therefore, read until the beginning of the file
353356

354-
while (i >= 0) {
357+
while (lineIndex >= 0) {
355358
readLastBlock();
356359
// i now points to the block before or is -1
357360
// curString contains the last block, separated by " "
@@ -522,8 +525,8 @@ private void extractYear() {
522525
* proceed to next non-empty line
523526
*/
524527
private void proceedToNextNonEmptyLine() {
525-
while ((i < lines.length) && "".equals(lines[i].trim())) {
526-
i++;
528+
while ((lineIndex < lines.length) && "".equals(lines[lineIndex].trim())) {
529+
lineIndex++;
527530
}
528531
}
529532

@@ -540,16 +543,16 @@ private void proceedToNextNonEmptyLine() {
540543
private void fillCurStringWithNonEmptyLines() {
541544
// ensure that curString does not end with " "
542545
curString = curString.trim();
543-
while ((i < lines.length) && !"".equals(lines[i])) {
544-
String curLine = lines[i].trim();
546+
while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
547+
String curLine = lines[lineIndex].trim();
545548
if (!"".equals(curLine)) {
546549
if (!curString.isEmpty()) {
547550
// insert separating space if necessary
548551
curString = curString.concat(" ");
549552
}
550-
curString = curString.concat(lines[i]);
553+
curString = curString.concat(lines[lineIndex]);
551554
}
552-
i++;
555+
lineIndex++;
553556
}
554557

555558
proceedToNextNonEmptyLine();
@@ -563,22 +566,22 @@ private void fillCurStringWithNonEmptyLines() {
563566
* invariant before/after: i points to line before the last handled block
564567
*/
565568
private void readLastBlock() {
566-
while ((i >= 0) && "".equals(lines[i].trim())) {
567-
i--;
569+
while ((lineIndex >= 0) && "".equals(lines[lineIndex].trim())) {
570+
lineIndex--;
568571
}
569572
// i is now at the end of a block
570573

571-
int end = i;
574+
int end = lineIndex;
572575

573576
// find beginning
574-
while ((i >= 0) && !"".equals(lines[i])) {
575-
i--;
577+
while ((lineIndex >= 0) && !"".equals(lines[lineIndex])) {
578+
lineIndex--;
576579
}
577580
// i is now the line before the beginning of the block
578581
// this fulfills the invariant
579582

580583
curString = "";
581-
for (int j = i + 1; j <= end; j++) {
584+
for (int j = lineIndex + 1; j <= end; j++) {
582585
curString = curString.concat(lines[j].trim());
583586
if (j != end) {
584587
curString = curString.concat(" ");

src/test/java/org/jabref/logic/importer/fileformat/PdfContentImporterTest.java

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.jabref.logic.importer.fileformat;
22

3-
import java.net.URISyntaxException;
43
import java.nio.charset.StandardCharsets;
54
import java.nio.file.Path;
65
import java.nio.file.Paths;
@@ -10,6 +9,8 @@
109
import org.jabref.logic.importer.ImportFormatPreferences;
1110
import org.jabref.logic.util.StandardFileType;
1211
import org.jabref.model.entry.BibEntry;
12+
import org.jabref.model.entry.BibtexEntryTypes;
13+
import org.jabref.model.entry.FieldName;
1314

1415
import org.junit.jupiter.api.BeforeEach;
1516
import org.junit.jupiter.api.Test;
@@ -34,14 +35,30 @@ public void testsGetExtensions() {
3435
@Test
3536
public void testGetDescription() {
3637
assertEquals(
37-
"PdfContentImporter parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported.",
38-
importer.getDescription());
38+
"PdfContentImporter parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported.",
39+
importer.getDescription());
3940
}
4041

4142
@Test
42-
public void doesNotHandleEncryptedPdfs() throws URISyntaxException {
43+
public void doesNotHandleEncryptedPdfs() throws Exception {
4344
Path file = Paths.get(PdfContentImporter.class.getResource("/pdfs/encrypted.pdf").toURI());
4445
List<BibEntry> result = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries();
4546
assertEquals(Collections.emptyList(), result);
4647
}
48+
49+
@Test
50+
public void importTwiceWorksAsExpected() throws Exception {
51+
Path file = Paths.get(PdfContentImporter.class.getResource("/pdfs/minimal.pdf").toURI());
52+
List<BibEntry> result = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries();
53+
54+
BibEntry expected = new BibEntry(BibtexEntryTypes.INPROCEEDINGS);
55+
expected.setField(FieldName.AUTHOR, "1 ");
56+
expected.setField(FieldName.TITLE, "Hello World");
57+
58+
List<BibEntry> resultSecondImport = importer.importDatabase(file, StandardCharsets.UTF_8).getDatabase().getEntries();
59+
assertEquals(Collections.singletonList(expected), result);
60+
assertEquals(Collections.singletonList(expected), resultSecondImport);
61+
62+
}
63+
4764
}

0 commit comments

Comments
 (0)