Skip to content

Commit c7bcaa7

Browse files
committed
Fixes #2574 Add logic for new Sciencedirect pages
1 parent 4da74e5 commit c7bcaa7

File tree

2 files changed

+33
-7
lines changed

2 files changed

+33
-7
lines changed

src/main/java/org/jabref/logic/importer/fetcher/ScienceDirect.java

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ public class ScienceDirect implements FulltextFetcher {
3636
@Override
3737
public Optional<URL> findFullText(BibEntry entry) throws IOException {
3838
Objects.requireNonNull(entry);
39-
Optional<URL> pdfLink = Optional.empty();
4039

4140
// Try unique DOI first
4241
Optional<DOI> doi = entry.getField(FieldName.DOI).flatMap(DOI::build);
@@ -46,21 +45,35 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException {
4645
try {
4746
String sciLink = getUrlByDoi(doi.get().getDOI());
4847

48+
// scrape the web page not as mobile client!
4949
if (!sciLink.isEmpty()) {
50-
// Retrieve PDF link
51-
Document html = Jsoup.connect(sciLink).ignoreHttpErrors(true).get();
50+
Document html = Jsoup.connect(sciLink)
51+
.userAgent("Mozilla/5.0 (Windows; U; WindowsNT 5.1; en-US; rv1.8.1.6) Gecko/20070725 Firefox/2.0.0.6")
52+
.referrer("http://www.google.com")
53+
.ignoreHttpErrors(true).get();
54+
55+
// Retrieve PDF link (old page)
5256
Element link = html.getElementById("pdfLink");
5357

5458
if (link != null) {
55-
LOGGER.info("Fulltext PDF found @ ScienceDirect.");
56-
pdfLink = Optional.of(new URL(link.attr("pdfurl")));
59+
LOGGER.info("Fulltext PDF found @ ScienceDirect (old page).");
60+
Optional<URL> pdfLink = Optional.of(new URL(link.attr("pdfurl")));
61+
return pdfLink;
62+
}
63+
// Retrieve PDF link (new page)
64+
String url = html.getElementsByClass("pdf-download-btn-link").attr("href");
65+
66+
if (url != null) {
67+
LOGGER.info("Fulltext PDF found @ ScienceDirect (new page).");
68+
Optional<URL> pdfLink = Optional.of(new URL("http://www.sciencedirect.com" + url));
69+
return pdfLink;
5770
}
5871
}
5972
} catch(UnirestException e) {
6073
LOGGER.warn("ScienceDirect API request failed", e);
6174
}
6275
}
63-
return pdfLink;
76+
return Optional.empty();
6477
}
6578

6679
private String getUrlByDoi(String doi) throws UnirestException {

src/test/java/org/jabref/logic/importer/fetcher/ScienceDirectTest.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public void doiNotPresent() throws IOException {
3838
}
3939

4040
@Test
41-
public void findByDOI() throws IOException {
41+
public void findByDOIOldPage() throws IOException {
4242
// CI server is blocked
4343
Assume.assumeFalse(DevEnvironment.isCIServer());
4444

@@ -50,6 +50,19 @@ public void findByDOI() throws IOException {
5050
);
5151
}
5252

53+
@Test
54+
public void findByDOINewPage() throws IOException {
55+
// CI server is blocked
56+
Assume.assumeFalse(DevEnvironment.isCIServer());
57+
58+
entry.setField("doi", "10.1016/j.aasri.2014.09.002");
59+
60+
Assert.assertEquals(
61+
Optional.of(new URL("http://www.sciencedirect.com/science/article/pii/S2212671614001024/pdf?md5=4e2e9a369b4d5b3db5100aba599bef8b&pid=1-s2.0-S2212671614001024-main.pdf")),
62+
finder.findFullText(entry)
63+
);
64+
}
65+
5366
@Test
5467
public void notFoundByDOI() throws IOException {
5568
// CI server is blocked

0 commit comments

Comments
 (0)