Skip to content

Commit 020cc97

Browse files
Improved detection of long DOI's within text (#7260)
* Improved detection of long DOI's within text. fixes #7256. * Fix checkstyle Signed-off-by: Dominik Voigt <[email protected]> Co-authored-by: Nikolaus Koopmann <[email protected]>
1 parent 27864e9 commit 020cc97

File tree

2 files changed

+12
-1
lines changed
  • src
    • main/java/org/jabref/model/entry/identifier
    • test/java/org/jabref/model/entry/identifier

2 files changed

+12
-1
lines changed

src/main/java/org/jabref/model/entry/identifier/DOI.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public class DOI implements Identifier {
4545
+ "10" // directory indicator
4646
+ "(?:\\.[0-9]+)+" // registrant codes
4747
+ "[/:]" // divider
48-
+ "(?:[^\\s]+)" // suffix alphanumeric without space
48+
+ "(?:[^\\s,;]+[^,;(\\.\\s)])" // suffix alphanumeric without " "/","/";" and not ending on "."/","/";"
4949
+ ")"; // end group \1
5050

5151
// Regex (Short DOI)

src/test/java/org/jabref/model/entry/identifier/DOITest.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,23 @@ private static Stream<Arguments> testData() {
165165
// findDoiInsideArbitraryText
166166
Arguments.of("10.1006/jmbi.1998.2354",
167167
DOI.findInText("other stuff 10.1006/jmbi.1998.2354 end").get().getDOI()),
168+
Arguments.of("10.1007/s10549-018-4743-9",
169+
DOI.findInText("Breast Cancer Res Treat. 2018 July ; 170(1): 77–87. doi:10.1007/s10549-018-4743-9. ").get().getDOI()),
170+
Arguments.of("10.1007/s10549-018-4743-9",
171+
DOI.findInText("Breast Cancer Res Treat. 2018 July ; 170(1): 77–87. doi:10.1007/s10549-018-4743-9, ").get().getDOI()),
172+
Arguments.of("10.1007/s10549-018-4743-9",
173+
DOI.findInText("Breast Cancer Res Treat. 2018 July ; 170(1): 77–87. doi:10.1007/s10549-018-4743-9;something else").get().getDOI()),
174+
Arguments.of("10.1007/s10549-018-4743-9.1234",
175+
DOI.findInText("bla doi:10.1007/s10549-018-4743-9.1234 with . in doi").get().getDOI()),
168176

169177
// findShortDoiInsideArbitraryText
170178
Arguments.of("10/12ab", DOI.findInText("other stuff doi:10/12ab end").get().getDOI()),
171179
Arguments.of("10/12ab", DOI.findInText("other stuff /urn:doi:10/12ab end").get().getDOI()),
172180
Arguments.of("10%12ab", DOI.findInText("other stuff doi:10%12ab end").get().getDOI()),
173181
Arguments.of("10%12ab", DOI.findInText("other stuff /doi:10%12ab end").get().getDOI()),
182+
Arguments.of("10%12ab", DOI.findInText("other stuff /doi:10%12ab, end").get().getDOI()),
183+
Arguments.of("10%12ab", DOI.findInText("other stuff /doi:10%12ab. end").get().getDOI()),
184+
Arguments.of("10%12ab", DOI.findInText("other stuff /doi:10%12ab; end").get().getDOI()),
174185
Arguments.of("10/1234", DOI.findInText("10/B(C)/15 \n" +
175186
" \n" +
176187
"10:51 \n" +

0 commit comments

Comments
 (0)