JabRef · Siedlerchr · May 4, 2021 · May 2, 2021 · May 2, 2021 · May 2, 2021
diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ScienceDirect.java b/src/main/java/org/jabref/logic/importer/fetcher/ScienceDirect.java
@@ -1,9 +1,11 @@
 package org.jabref.logic.importer.fetcher;
 
 import java.io.IOException;
+import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.stream.Collectors;
 
 import org.jabref.logic.importer.FulltextFetcher;
 import org.jabref.logic.net.URLDownload;
@@ -21,78 +23,102 @@
 import kong.unirest.json.JSONObject;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Node;
 import org.jsoup.select.Elements;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * FulltextFetcher implementation that attempts to find a PDF URL at ScienceDirect. See <a href="https://dev.elsevier.com/">https://dev.elsevier.com/</a>
+ * FulltextFetcher implementation that attempts to find a PDF URL at <a href="https://www.sciencedirect.com/">ScienceDirect</a>.
+ * See <a href="https://dev.elsevier.com/">https://dev.elsevier.com/</a>.
  */
 public class ScienceDirect implements FulltextFetcher {
     private static final Logger LOGGER = LoggerFactory.getLogger(ScienceDirect.class);
 
-    private static final String API_URL = "http://api.elsevier.com/content/article/doi/";
+    private static final String API_URL = "https://api.elsevier.com/content/article/doi/";
     private static final String API_KEY = new BuildInfo().scienceDirectApiKey;
 
     @Override
     public Optional<URL> findFullText(BibEntry entry) throws IOException {
         Objects.requireNonNull(entry);
 
-        // Try unique DOI first
         Optional<DOI> doi = entry.getField(StandardField.DOI).flatMap(DOI::parse);
+        if (doi.isEmpty()) {
+            // Full text fetching works only if a DOI is present
+            return Optional.empty();
+        }
 
-        if (doi.isPresent()) {
-            // Available in catalog?
-            try {
-                String sciLink = getUrlByDoi(doi.get().getDOI());
-
-                // scrape the web page not as mobile client!
-                if (!sciLink.isEmpty()) {
-                    Document html = Jsoup.connect(sciLink)
-                                         .userAgent(URLDownload.USER_AGENT)
-                                         .referrer("http://www.google.com")
-                                         .ignoreHttpErrors(true).get();
-
-                    // Retrieve PDF link from meta data (most recent)
-                    Elements metaLinks = html.getElementsByAttributeValue("name", "citation_pdf_url");
-
-                    if (!metaLinks.isEmpty()) {
-                        String link = metaLinks.first().attr("content");
-                        return Optional.of(new URL(link));
-                    }
-
-                    URL url = new URL(sciLink);
-                    String protocol = url.getProtocol();
-                    String authority = url.getAuthority();
-
-                    Optional<String> fullLinkToPdf = html
-                            .getElementsByAttributeValue("type", "application/json")
-                            .stream()
-                            .flatMap(element -> element.getElementsByTag("script").stream())
-                            // get the text element
-                            .map(element -> element.childNode(0))
-                            .map(element -> element.toString())
-                            .map(text -> new JSONObject(text))
-                            .filter(json -> json.has("article"))
-                            .map(json -> json.getJSONObject("article"))
-                            .filter(json -> json.has("pdfDownload"))
-                            .map(json -> json.getJSONObject("pdfDownload"))
-                            .filter(json -> json.has("linkToPdf"))
-                            .map(json -> json.getString("linkToPdf"))
-                            .map(linkToPdf -> String.format("%s://%s%s", protocol, authority, linkToPdf))
-                            .findAny();
-                    if (fullLinkToPdf.isPresent()) {
-                        LOGGER.info("Fulltext PDF found at ScienceDirect.");
-                        // new URL may through "MalformedURLException", thus using "isPresent()" above and ".get()"
-                        Optional<URL> pdfLink = Optional.of(new URL(fullLinkToPdf.get()));
-                        return pdfLink;
-                    }
-                }
-            } catch (UnirestException e) {
-                LOGGER.warn("ScienceDirect API request failed", e);
-            }
+        String urlFromDoi = getUrlByDoi(doi.get().getDOI());
+        if (urlFromDoi.isEmpty()) {
+            return Optional.empty();
+        }
+        // Scrape the web page as desktop client (not as mobile client!)
+        Document html = Jsoup.connect(urlFromDoi)
+                             .userAgent(URLDownload.USER_AGENT)
+                             .referrer("https://www.google.com")
+                             .ignoreHttpErrors(true)
+                             .get();
+
+        // Retrieve PDF link from meta data (most recent)
+        Elements metaLinks = html.getElementsByAttributeValue("name", "citation_pdf_url");
+        if (!metaLinks.isEmpty()) {
+            String link = metaLinks.first().attr("content");
+            return Optional.of(new URL(link));
+        }
+
+        // We use the ScienceDirect web page which contains the article (presented using HTML).
+        // This page contains the link to the PDF in some JavaScript code embedded in the web page.
+        // Example page: https://www.sciencedirect.com/science/article/pii/S1674775515001079
+
+        Optional<JSONObject> pdfDownloadOptional = html
+                .getElementsByAttributeValue("type", "application/json")
+                .stream()
+                .flatMap(element -> element.getElementsByTag("script").stream())
+                // The first DOM child of the script element is the script itself (represented as HTML text)
+                .map(element -> element.childNode(0))
+                .map(Node::toString)
+                .map(JSONObject::new)
+                .filter(json -> json.has("article"))
+                .map(json -> json.getJSONObject("article"))
+                .filter(json -> json.has("pdfDownload"))
+                .map(json -> json.getJSONObject("pdfDownload"))
+                .findAny();
+
+        if (pdfDownloadOptional.isEmpty()) {
+            LOGGER.debug("No 'pdfDownload' key found in JSON information");
+            return Optional.empty();
+        }
+
+        JSONObject pdfDownload = pdfDownloadOptional.get();
+
+        String fullLinkToPdf;
+        if (pdfDownload.has("linkToPdf")) {
+            String linkToPdf = pdfDownload.getString("linkToPdf");
+            URL url = new URL(urlFromDoi);
+            fullLinkToPdf = String.format("%s://%s%s", url.getProtocol(), url.getAuthority(), linkToPdf);
+        } else if (pdfDownload.has("urlMetadata")) {
+            JSONObject urlMetadata = pdfDownload.getJSONObject("urlMetadata");
+            JSONObject queryParamsObject = urlMetadata.getJSONObject("queryParams");
+            String queryParameters = queryParamsObject.keySet().stream()
+                                                      .map(key -> String.format("%s=%s", key, queryParamsObject.getString(key)))
+                                                      .collect(Collectors.joining("&"));
+            fullLinkToPdf = String.format("https://www.sciencedirect.com/%s/%s%s?%s",
+                    urlMetadata.getString("path"),
+                    urlMetadata.getString("pii"),
+                    urlMetadata.getString("pdfExtension"),
+                    queryParameters);
+        } else {
+            LOGGER.debug("No suitable data in JSON information");
+            return Optional.empty();
+        }
+
+        LOGGER.info("Fulltext PDF found at ScienceDirect at {}.", fullLinkToPdf);
+        try {
+            return Optional.of(new URL(fullLinkToPdf));
+        } catch (MalformedURLException e) {
+            LOGGER.error("malformed URL", e);
+            return Optional.empty();
         }
-        return Optional.empty();
     }
 
     @Override
@@ -110,7 +136,9 @@ private String getUrlByDoi(String doi) throws UnirestException {
                                                          .asJson();
 
             JSONObject json = jsonResponse.getBody().getObject();
-            JSONArray links = json.getJSONObject("full-text-retrieval-response").getJSONObject("coredata").getJSONArray("link");
+            JSONArray links = json.getJSONObject("full-text-retrieval-response")
+                                  .getJSONObject("coredata")
+                                  .getJSONArray("link");
 
             for (int i = 0; i < links.length(); i++) {
                 JSONObject link = links.getJSONObject(i);

diff --git a/src/test/java/org/jabref/logic/importer/fetcher/ScienceDirectTest.java b/src/test/java/org/jabref/logic/importer/fetcher/ScienceDirectTest.java
@@ -28,7 +28,7 @@ void setUp() {
 
     @Test
     @DisabledOnCIServer("CI server is blocked")
-    void findByDOIOldPage() throws IOException {
+    void findByDoiOldPage() throws IOException {
         entry.setField(StandardField.DOI, "10.1016/j.jrmge.2015.08.004");
 
         assertEquals(
@@ -39,7 +39,7 @@ void findByDOIOldPage() throws IOException {
 
     @Test
     @DisabledOnCIServer("CI server is blocked")
-    void findByDOINewPage() throws IOException {
+    void findByDoiNewPage() throws IOException {
         entry.setField(StandardField.DOI, "10.1016/j.aasri.2014.09.002");
 
         assertEquals(
@@ -62,7 +62,7 @@ void findByDoiWorksForBoneArticle() throws IOException {
 
     @Test
     @DisabledOnCIServer("CI server is blocked")
-    void notFoundByDOI() throws IOException {
+    void notFoundByDoi() throws IOException {
         entry.setField(StandardField.DOI, "10.1016/j.aasri.2014.0559.002");
 
         assertEquals(Optional.empty(), finder.findFullText(entry));