@@ -36,7 +36,6 @@ public class ScienceDirect implements FulltextFetcher {
3636 @ Override
3737 public Optional <URL > findFullText (BibEntry entry ) throws IOException {
3838 Objects .requireNonNull (entry );
39- Optional <URL > pdfLink = Optional .empty ();
4039
4140 // Try unique DOI first
4241 Optional <DOI > doi = entry .getField (FieldName .DOI ).flatMap (DOI ::build );
@@ -46,21 +45,35 @@ public Optional<URL> findFullText(BibEntry entry) throws IOException {
4645 try {
4746 String sciLink = getUrlByDoi (doi .get ().getDOI ());
4847
48+ // scrape the web page not as mobile client!
4949 if (!sciLink .isEmpty ()) {
50- // Retrieve PDF link
51- Document html = Jsoup .connect (sciLink ).ignoreHttpErrors (true ).get ();
50+ Document html = Jsoup .connect (sciLink )
51+ .userAgent ("Mozilla/5.0 (Windows; U; WindowsNT 5.1; en-US; rv1.8.1.6) Gecko/20070725 Firefox/2.0.0.6" )
52+ .referrer ("http://www.google.com" )
53+ .ignoreHttpErrors (true ).get ();
54+
55+ // Retrieve PDF link (old page)
5256 Element link = html .getElementById ("pdfLink" );
5357
5458 if (link != null ) {
55- LOGGER .info ("Fulltext PDF found @ ScienceDirect." );
56- pdfLink = Optional .of (new URL (link .attr ("pdfurl" )));
59+ LOGGER .info ("Fulltext PDF found @ ScienceDirect (old page)." );
60+ Optional <URL > pdfLink = Optional .of (new URL (link .attr ("pdfurl" )));
61+ return pdfLink ;
62+ }
63+ // Retrieve PDF link (new page)
64+ String url = html .getElementsByClass ("pdf-download-btn-link" ).attr ("href" );
65+
66+ if (url != null ) {
67+ LOGGER .info ("Fulltext PDF found @ ScienceDirect (new page)." );
68+ Optional <URL > pdfLink = Optional .of (new URL ("http://www.sciencedirect.com" + url ));
69+ return pdfLink ;
5770 }
5871 }
5972 } catch (UnirestException e ) {
6073 LOGGER .warn ("ScienceDirect API request failed" , e );
6174 }
6275 }
63- return pdfLink ;
76+ return Optional . empty () ;
6477 }
6578
6679 private String getUrlByDoi (String doi ) throws UnirestException {
0 commit comments