diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ScienceDirect.java b/src/main/java/org/jabref/logic/importer/fetcher/ScienceDirect.java
index dd8868c3082..3ebda1c0bbd 100644
--- a/src/main/java/org/jabref/logic/importer/fetcher/ScienceDirect.java
+++ b/src/main/java/org/jabref/logic/importer/fetcher/ScienceDirect.java
@@ -1,9 +1,11 @@
package org.jabref.logic.importer.fetcher;
import java.io.IOException;
+import java.net.MalformedURLException;
import java.net.URL;
import java.util.Objects;
import java.util.Optional;
+import java.util.stream.Collectors;
import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.logic.net.URLDownload;
@@ -21,78 +23,102 @@
import kong.unirest.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * FulltextFetcher implementation that attempts to find a PDF URL at ScienceDirect. See https://dev.elsevier.com/
+ * FulltextFetcher implementation that attempts to find a PDF URL at ScienceDirect.
+ * See https://dev.elsevier.com/.
*/
public class ScienceDirect implements FulltextFetcher {
private static final Logger LOGGER = LoggerFactory.getLogger(ScienceDirect.class);
- private static final String API_URL = "http://api.elsevier.com/content/article/doi/";
+ private static final String API_URL = "https://api.elsevier.com/content/article/doi/";
private static final String API_KEY = new BuildInfo().scienceDirectApiKey;
@Override
public Optional findFullText(BibEntry entry) throws IOException {
Objects.requireNonNull(entry);
- // Try unique DOI first
Optional doi = entry.getField(StandardField.DOI).flatMap(DOI::parse);
+ if (doi.isEmpty()) {
+ // Full text fetching works only if a DOI is present
+ return Optional.empty();
+ }
- if (doi.isPresent()) {
- // Available in catalog?
- try {
- String sciLink = getUrlByDoi(doi.get().getDOI());
-
- // scrape the web page not as mobile client!
- if (!sciLink.isEmpty()) {
- Document html = Jsoup.connect(sciLink)
- .userAgent(URLDownload.USER_AGENT)
- .referrer("http://www.google.com")
- .ignoreHttpErrors(true).get();
-
- // Retrieve PDF link from meta data (most recent)
- Elements metaLinks = html.getElementsByAttributeValue("name", "citation_pdf_url");
-
- if (!metaLinks.isEmpty()) {
- String link = metaLinks.first().attr("content");
- return Optional.of(new URL(link));
- }
-
- URL url = new URL(sciLink);
- String protocol = url.getProtocol();
- String authority = url.getAuthority();
-
- Optional fullLinkToPdf = html
- .getElementsByAttributeValue("type", "application/json")
- .stream()
- .flatMap(element -> element.getElementsByTag("script").stream())
- // get the text element
- .map(element -> element.childNode(0))
- .map(element -> element.toString())
- .map(text -> new JSONObject(text))
- .filter(json -> json.has("article"))
- .map(json -> json.getJSONObject("article"))
- .filter(json -> json.has("pdfDownload"))
- .map(json -> json.getJSONObject("pdfDownload"))
- .filter(json -> json.has("linkToPdf"))
- .map(json -> json.getString("linkToPdf"))
- .map(linkToPdf -> String.format("%s://%s%s", protocol, authority, linkToPdf))
- .findAny();
- if (fullLinkToPdf.isPresent()) {
- LOGGER.info("Fulltext PDF found at ScienceDirect.");
- // new URL may through "MalformedURLException", thus using "isPresent()" above and ".get()"
- Optional pdfLink = Optional.of(new URL(fullLinkToPdf.get()));
- return pdfLink;
- }
- }
- } catch (UnirestException e) {
- LOGGER.warn("ScienceDirect API request failed", e);
- }
+ String urlFromDoi = getUrlByDoi(doi.get().getDOI());
+ if (urlFromDoi.isEmpty()) {
+ return Optional.empty();
+ }
+ // Scrape the web page as desktop client (not as mobile client!)
+ Document html = Jsoup.connect(urlFromDoi)
+ .userAgent(URLDownload.USER_AGENT)
+ .referrer("https://www.google.com")
+ .ignoreHttpErrors(true)
+ .get();
+
+ // Retrieve PDF link from meta data (most recent)
+ Elements metaLinks = html.getElementsByAttributeValue("name", "citation_pdf_url");
+ if (!metaLinks.isEmpty()) {
+ String link = metaLinks.first().attr("content");
+ return Optional.of(new URL(link));
+ }
+
+ // We use the ScienceDirect web page which contains the article (presented using HTML).
+ // This page contains the link to the PDF in some JavaScript code embedded in the web page.
+ // Example page: https://www.sciencedirect.com/science/article/pii/S1674775515001079
+
+ Optional pdfDownloadOptional = html
+ .getElementsByAttributeValue("type", "application/json")
+ .stream()
+ .flatMap(element -> element.getElementsByTag("script").stream())
+ // The first DOM child of the script element is the script itself (represented as HTML text)
+ .map(element -> element.childNode(0))
+ .map(Node::toString)
+ .map(JSONObject::new)
+ .filter(json -> json.has("article"))
+ .map(json -> json.getJSONObject("article"))
+ .filter(json -> json.has("pdfDownload"))
+ .map(json -> json.getJSONObject("pdfDownload"))
+ .findAny();
+
+ if (pdfDownloadOptional.isEmpty()) {
+ LOGGER.debug("No 'pdfDownload' key found in JSON information");
+ return Optional.empty();
+ }
+
+ JSONObject pdfDownload = pdfDownloadOptional.get();
+
+ String fullLinkToPdf;
+ if (pdfDownload.has("linkToPdf")) {
+ String linkToPdf = pdfDownload.getString("linkToPdf");
+ URL url = new URL(urlFromDoi);
+ fullLinkToPdf = String.format("%s://%s%s", url.getProtocol(), url.getAuthority(), linkToPdf);
+ } else if (pdfDownload.has("urlMetadata")) {
+ JSONObject urlMetadata = pdfDownload.getJSONObject("urlMetadata");
+ JSONObject queryParamsObject = urlMetadata.getJSONObject("queryParams");
+ String queryParameters = queryParamsObject.keySet().stream()
+ .map(key -> String.format("%s=%s", key, queryParamsObject.getString(key)))
+ .collect(Collectors.joining("&"));
+ fullLinkToPdf = String.format("https://www.sciencedirect.com/%s/%s%s?%s",
+ urlMetadata.getString("path"),
+ urlMetadata.getString("pii"),
+ urlMetadata.getString("pdfExtension"),
+ queryParameters);
+ } else {
+ LOGGER.debug("No suitable data in JSON information");
+ return Optional.empty();
+ }
+
+ LOGGER.info("Fulltext PDF found at ScienceDirect at {}.", fullLinkToPdf);
+ try {
+ return Optional.of(new URL(fullLinkToPdf));
+ } catch (MalformedURLException e) {
+ LOGGER.error("malformed URL", e);
+ return Optional.empty();
}
- return Optional.empty();
}
@Override
@@ -110,7 +136,9 @@ private String getUrlByDoi(String doi) throws UnirestException {
.asJson();
JSONObject json = jsonResponse.getBody().getObject();
- JSONArray links = json.getJSONObject("full-text-retrieval-response").getJSONObject("coredata").getJSONArray("link");
+ JSONArray links = json.getJSONObject("full-text-retrieval-response")
+ .getJSONObject("coredata")
+ .getJSONArray("link");
for (int i = 0; i < links.length(); i++) {
JSONObject link = links.getJSONObject(i);
diff --git a/src/test/java/org/jabref/logic/importer/fetcher/ScienceDirectTest.java b/src/test/java/org/jabref/logic/importer/fetcher/ScienceDirectTest.java
index 95bcf8b6358..7c955863f24 100644
--- a/src/test/java/org/jabref/logic/importer/fetcher/ScienceDirectTest.java
+++ b/src/test/java/org/jabref/logic/importer/fetcher/ScienceDirectTest.java
@@ -28,7 +28,7 @@ void setUp() {
@Test
@DisabledOnCIServer("CI server is blocked")
- void findByDOIOldPage() throws IOException {
+ void findByDoiOldPage() throws IOException {
entry.setField(StandardField.DOI, "10.1016/j.jrmge.2015.08.004");
assertEquals(
@@ -39,7 +39,7 @@ void findByDOIOldPage() throws IOException {
@Test
@DisabledOnCIServer("CI server is blocked")
- void findByDOINewPage() throws IOException {
+ void findByDoiNewPage() throws IOException {
entry.setField(StandardField.DOI, "10.1016/j.aasri.2014.09.002");
assertEquals(
@@ -62,7 +62,7 @@ void findByDoiWorksForBoneArticle() throws IOException {
@Test
@DisabledOnCIServer("CI server is blocked")
- void notFoundByDOI() throws IOException {
+ void notFoundByDoi() throws IOException {
entry.setField(StandardField.DOI, "10.1016/j.aasri.2014.0559.002");
assertEquals(Optional.empty(), finder.findFullText(entry));