Skip to content

Commit 075901a

Browse files
authored
Fix arXiv fetcher tests (#7686)
* Add some code comments and remove some empty lines * Fix tests (arXiv does not use https URLs) * Fix https * Readbility fixes
1 parent 0363863 commit 075901a

File tree

3 files changed

+18
-25
lines changed

3 files changed

+18
-25
lines changed

src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import javax.xml.parsers.DocumentBuilderFactory;
1717
import javax.xml.parsers.ParserConfigurationException;
1818

19-
import org.jabref.logic.cleanup.CleanupJob;
2019
import org.jabref.logic.cleanup.EprintCleanup;
2120
import org.jabref.logic.help.HelpFile;
2221
import org.jabref.logic.importer.FetcherException;
@@ -80,7 +79,6 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException {
8079
.map(Optional::get)
8180
.findFirst();
8281
pdfUrl.ifPresent(url -> LOGGER.info("Fulltext PDF found @ arXiv."));
83-
8482
return pdfUrl;
8583
} catch (FetcherException e) {
8684
LOGGER.warn("arXiv API request failed", e);
@@ -117,11 +115,12 @@ private Optional<ArXivEntry> searchForEntryById(String id) throws FetcherExcepti
117115
}
118116
}
119117

120-
private List<ArXivEntry> searchForEntries(BibEntry entry) throws FetcherException {
121-
entry = (BibEntry) entry.clone();
122-
CleanupJob cleanupJob = new EprintCleanup();
123-
cleanupJob.cleanup(entry);
124-
// 1. Eprint
118+
private List<ArXivEntry> searchForEntries(BibEntry originalEntry) throws FetcherException {
119+
// We need to clone the entry, because we modify it by a cleanup job.
120+
final BibEntry entry = (BibEntry) originalEntry.clone();
121+
122+
// 1. Check for Eprint
123+
new EprintCleanup().cleanup(entry);
125124
Optional<String> identifier = entry.getField(StandardField.EPRINT);
126125
if (StringUtil.isNotBlank(identifier)) {
127126
try {
@@ -133,26 +132,21 @@ private List<ArXivEntry> searchForEntries(BibEntry entry) throws FetcherExceptio
133132
}
134133

135134
// 2. DOI and other fields
136-
String query;
137-
138-
Optional<String> doi = entry.getField(StandardField.DOI).flatMap(DOI::parse).map(DOI::getNormalized);
139-
if (doi.isPresent()) {
140-
// Search for an entry in the ArXiv which is linked to the doi
141-
query = "doi:" + doi.get();
142-
} else {
143-
Optional<String> authorQuery = entry.getField(StandardField.AUTHOR).map(author -> "au:" + author);
144-
Optional<String> titleQuery = entry.getField(StandardField.TITLE).map(title -> "ti:" + StringUtil.ignoreCurlyBracket(title));
145-
query = OptionalUtil.toList(authorQuery, titleQuery).stream().collect(Collectors.joining("+AND+"));
146-
}
147-
135+
String query = entry.getField(StandardField.DOI)
136+
.flatMap(DOI::parse)
137+
.map(DOI::getNormalized)
138+
.map(doiString -> "doi:" + doiString)
139+
.orElseGet(() -> {
140+
Optional<String> authorQuery = entry.getField(StandardField.AUTHOR).map(author -> "au:" + author);
141+
Optional<String> titleQuery = entry.getField(StandardField.TITLE).map(title -> "ti:" + StringUtil.ignoreCurlyBracket(title));
142+
return String.join("+AND+", OptionalUtil.toList(authorQuery, titleQuery));
143+
});
148144
Optional<ArXivEntry> arxivEntry = searchForEntry(query);
149-
150145
if (arxivEntry.isPresent()) {
151146
// Check if entry is a match
152147
StringSimilarity match = new StringSimilarity();
153148
String arxivTitle = arxivEntry.get().title.orElse("");
154149
String entryTitle = StringUtil.ignoreCurlyBracket(entry.getField(StandardField.TITLE).orElse(""));
155-
156150
if (match.isSimilar(arxivTitle, entryTitle)) {
157151
return OptionalUtil.toList(arxivEntry);
158152
}
@@ -175,7 +169,7 @@ private List<ArXivEntry> queryApi(String searchQuery, List<ArXivIdentifier> ids,
175169

176170
/**
177171
* Queries the API.
178-
*
172+
* <p>
179173
* If only {@code searchQuery} is given, then the API will return results for each article that matches the query.
180174
* If only {@code ids} is given, then the API will return results for each article in the list.
181175
* If both {@code searchQuery} and {@code ids} are given, then the API will return each article in

src/main/java/org/jabref/model/strings/StringUtil.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,6 @@ public static String boldHTML(String input) {
654654
* Return string enclosed in HTML bold tags if not null, otherwise return alternative text in HTML bold tags
655655
*/
656656
public static String boldHTML(String input, String alternative) {
657-
658657
if (input == null) {
659658
return "<b>" + alternative + "</b>";
660659
}

src/test/java/org/jabref/logic/importer/fetcher/ArXivTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ void findFullTextByTitle() throws IOException {
9999
void findFullTextByTitleWithCurlyBracket() throws IOException {
100100
entry.setField(StandardField.TITLE, "Machine versus {Human} {Attention} in {Deep} {Reinforcement} {Learning} {Tasks}");
101101

102-
assertEquals(Optional.of(new URL("https://arxiv.org/pdf/2010.15942v2")), fetcher.findFullText(entry));
102+
assertEquals(Optional.of(new URL("http://arxiv.org/pdf/2010.15942v2")), fetcher.findFullText(entry));
103103
}
104104

105105
@Test
@@ -131,7 +131,7 @@ void findFullTextByTitleWithCurlyBracketAndPartOfAuthor() throws IOException {
131131
entry.setField(StandardField.TITLE, "Machine versus {Human} {Attention} in {Deep} {Reinforcement} {Learning} {Tasks}");
132132
entry.setField(StandardField.AUTHOR, "Zhang, Ruohan and Guo");
133133

134-
assertEquals(Optional.of(new URL("https://arxiv.org/pdf/2010.15942v2")), fetcher.findFullText(entry));
134+
assertEquals(Optional.of(new URL("http://arxiv.org/pdf/2010.15942v2")), fetcher.findFullText(entry));
135135
}
136136

137137
@Test

0 commit comments

Comments
 (0)