Skip to content

Commit b19c3e4

Browse files
authored
Enable automated cross library search using a cross library query lan… (#7124)
* Enable automated cross library search using a cross library query language. Signed-off-by: Dominik Voigt <[email protected]> * Pull Global upward through constructor. * Pull Globals and ImportFormatPreferences up through constructor Signed-off-by: Dominik Voigt <[email protected]> * Integrate requested changes and fix architecture tests by correcting test classes Signed-off-by: Dominik Voigt <[email protected]> * Remove unused imports Signed-off-by: Dominik Voigt <[email protected]>
1 parent 5ca3d0d commit b19c3e4

29 files changed

+1636
-21
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ to the page field for cases where the page numbers are missing. [#7019](https://
2323
- We added a new formatter to output shorthand month format. [#6579](https://github.com/JabRef/jabref/issues/6579)
2424
- We added support for the new Microsoft Edge browser in all platforms. [#7056](https://github.com/JabRef/jabref/pull/7056)
2525
- We reintroduced emacs/bash-like keybindings. [#6017](https://github.com/JabRef/jabref/issues/6017)
26+
- We added a feature to provide automated cross library search using a cross library query language. This provides support for the search step of systematic literature reviews (SLRs). [koppor#369](https://github.com/koppor/jabref/issues/369)
2627

2728
### Changed
2829

build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ dependencies {
139139
exclude group: 'org.apache.lucene', module: 'lucene-sandbox'
140140
}
141141

142+
implementation group: 'org.eclipse.jgit', name: 'org.eclipse.jgit', version: '5.9.0.202009080501-r'
143+
142144
implementation group: 'org.mariadb.jdbc', name: 'mariadb-java-client', version: '2.7.0'
143145

144146
implementation 'org.postgresql:postgresql:42.2.18'

src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,5 @@
9191
requires com.h2database.mvstore;
9292
requires lucene.queryparser;
9393
requires lucene.core;
94+
requires org.eclipse.jgit;
9495
}

src/main/java/org/jabref/gui/JabRefFrame.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,9 @@ private MenuBar createMenu() {
815815
new SeparatorMenuItem(),
816816

817817
factory.createMenuItem(StandardActions.SEND_AS_EMAIL, new SendAsEMailAction(dialogService, stateManager)),
818-
pushToApplicationMenuItem
818+
pushToApplicationMenuItem,
819+
new SeparatorMenuItem(),
820+
factory.createMenuItem(StandardActions.START_SYSTEMATIC_LITERATURE_REVIEW, new StartLiteratureReviewAction(this, Globals.getFileUpdateMonitor(), Globals.prefs.getWorkingDir(), Globals.TASK_EXECUTOR))
819821
);
820822

821823
SidePaneComponent webSearch = sidePaneManager.getComponent(SidePaneType.WEB_SEARCH);
@@ -992,7 +994,7 @@ public void addParserResult(ParserResult parserResult, boolean focusPanel) {
992994
* This method causes all open LibraryTabs to set up their tables anew. When called from PreferencesDialogViewModel,
993995
* this updates to the new settings.
994996
* We need to notify all tabs about the changes to avoid problems when changing the column set.
995-
* */
997+
*/
996998
public void setupAllTables() {
997999
tabbedPane.getTabs().forEach(tab -> {
9981000
LibraryTab libraryTab = (LibraryTab) tab;
@@ -1013,7 +1015,7 @@ private ContextMenu createTabContextMenu(KeyBindingRepository keyBindingReposito
10131015
new SeparatorMenuItem(),
10141016
factory.createMenuItem(StandardActions.OPEN_DATABASE_FOLDER, new OpenDatabaseFolder()),
10151017
factory.createMenuItem(StandardActions.OPEN_CONSOLE, new OpenConsoleAction(stateManager))
1016-
);
1018+
);
10171019

10181020
return contextMenu;
10191021
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package org.jabref.gui;
2+
3+
import java.io.IOException;
4+
import java.nio.file.Path;
5+
import java.util.Optional;
6+
7+
import org.jabref.gui.actions.SimpleCommand;
8+
import org.jabref.gui.importer.actions.OpenDatabaseAction;
9+
import org.jabref.gui.util.BackgroundTask;
10+
import org.jabref.gui.util.FileDialogConfiguration;
11+
import org.jabref.gui.util.TaskExecutor;
12+
import org.jabref.logic.crawler.Crawler;
13+
import org.jabref.logic.importer.ParseException;
14+
import org.jabref.logic.l10n.Localization;
15+
import org.jabref.model.entry.BibEntryTypesManager;
16+
import org.jabref.model.util.FileUpdateMonitor;
17+
import org.jabref.preferences.JabRefPreferences;
18+
19+
import org.eclipse.jgit.api.errors.GitAPIException;
20+
import org.slf4j.Logger;
21+
import org.slf4j.LoggerFactory;
22+
23+
public class StartLiteratureReviewAction extends SimpleCommand {
24+
private static final Logger LOGGER = LoggerFactory.getLogger(StartLiteratureReviewAction.class);
25+
private final JabRefFrame frame;
26+
private final DialogService dialogService;
27+
private final FileUpdateMonitor fileUpdateMonitor;
28+
private final Path workingDirectory;
29+
private final TaskExecutor taskExecutor;
30+
31+
public StartLiteratureReviewAction(JabRefFrame frame, FileUpdateMonitor fileUpdateMonitor, Path standardWorkingDirectory, TaskExecutor taskExecutor) {
32+
this.frame = frame;
33+
this.dialogService = frame.getDialogService();
34+
this.fileUpdateMonitor = fileUpdateMonitor;
35+
this.workingDirectory = getInitialDirectory(standardWorkingDirectory);
36+
this.taskExecutor = taskExecutor;
37+
}
38+
39+
@Override
40+
public void execute() {
41+
FileDialogConfiguration fileDialogConfiguration = new FileDialogConfiguration.Builder()
42+
.withInitialDirectory(workingDirectory)
43+
.build();
44+
45+
Optional<Path> studyDefinitionFile = dialogService.showFileOpenDialog(fileDialogConfiguration);
46+
if (studyDefinitionFile.isEmpty()) {
47+
// Do nothing if selection was canceled
48+
return;
49+
}
50+
final Crawler crawler;
51+
try {
52+
crawler = new Crawler(studyDefinitionFile.get(), fileUpdateMonitor, JabRefPreferences.getInstance().getImportFormatPreferences(), JabRefPreferences.getInstance().getSavePreferences(), new BibEntryTypesManager());
53+
} catch (IOException | ParseException | GitAPIException e) {
54+
LOGGER.error("Error during reading of study definition file.", e);
55+
dialogService.showErrorDialogAndWait(Localization.lang("Error during reading of study definition file."), e);
56+
return;
57+
}
58+
BackgroundTask.wrap(() -> {
59+
crawler.performCrawl();
60+
return 0; // Return any value to make this a callable instead of a runnable. This allows throwing exceptions.
61+
})
62+
.onFailure(e -> {
63+
LOGGER.error("Error during persistence of crawling results.");
64+
dialogService.showErrorDialogAndWait(Localization.lang("Error during persistence of crawling results."), e);
65+
})
66+
.onSuccess(unused -> new OpenDatabaseAction(frame).openFile(Path.of(studyDefinitionFile.get().getParent().toString(), "studyResult.bib"), true))
67+
.executeWith(taskExecutor);
68+
}
69+
70+
/**
71+
* @return Path of current panel database directory or the standard working directory
72+
*/
73+
private Path getInitialDirectory(Path standardWorkingDirectory) {
74+
if (frame.getBasePanelCount() == 0) {
75+
return standardWorkingDirectory;
76+
} else {
77+
Optional<Path> databasePath = frame.getCurrentLibraryTab().getBibDatabaseContext().getDatabasePath();
78+
return databasePath.map(Path::getParent).orElse(standardWorkingDirectory);
79+
}
80+
}
81+
}

src/main/java/org/jabref/gui/actions/StandardActions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ public enum StandardActions implements Action {
8888
PARSE_LATEX(Localization.lang("Search for citations in LaTeX files..."), IconTheme.JabRefIcons.LATEX_CITATIONS),
8989
NEW_SUB_LIBRARY_FROM_AUX(Localization.lang("New sublibrary based on AUX file") + "...", Localization.lang("New BibTeX sublibrary") + Localization.lang("This feature generates a new library based on which entries are needed in an existing LaTeX document."), IconTheme.JabRefIcons.NEW),
9090
WRITE_XMP(Localization.lang("Write XMP metadata to PDFs"), Localization.lang("Will write XMP metadata to the PDFs linked from selected entries."), KeyBinding.WRITE_XMP),
91+
START_SYSTEMATIC_LITERATURE_REVIEW(Localization.lang("Start systematic literature review")),
9192
OPEN_DATABASE_FOLDER(Localization.lang("Reveal in file explorer")),
9293
OPEN_FOLDER(Localization.lang("Open folder"), Localization.lang("Open folder"), KeyBinding.OPEN_FOLDER),
9394
OPEN_FILE(Localization.lang("Open file"), Localization.lang("Open file"), IconTheme.JabRefIcons.FILE, KeyBinding.OPEN_FILE),
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package org.jabref.logic.crawler;
2+
3+
import java.io.IOException;
4+
import java.nio.file.Path;
5+
import java.util.List;
6+
7+
import org.jabref.logic.crawler.git.GitHandler;
8+
import org.jabref.logic.exporter.SavePreferences;
9+
import org.jabref.logic.importer.ImportFormatPreferences;
10+
import org.jabref.logic.importer.ParseException;
11+
import org.jabref.model.entry.BibEntryTypesManager;
12+
import org.jabref.model.study.QueryResult;
13+
import org.jabref.model.study.Study;
14+
import org.jabref.model.util.FileUpdateMonitor;
15+
16+
import org.eclipse.jgit.api.errors.GitAPIException;
17+
18+
/**
19+
* This class provides a service for SLR support by conducting an automated search and persistance
20+
* of studies using the queries and E-Libraries specified in the provided study definition file.
21+
*
22+
* It composes a StudyRepository for repository management,
23+
* and a StudyFetcher that manages the crawling over the selected E-Libraries.
24+
*/
25+
public class Crawler {
26+
private final StudyRepository studyRepository;
27+
private final StudyFetcher studyFetcher;
28+
29+
/**
30+
* Creates a crawler for retrieving studies from E-Libraries
31+
*
32+
* @param studyDefinitionFile The path to the study definition file that contains the list of targeted E-Libraries and used cross-library queries
33+
*/
34+
public Crawler(Path studyDefinitionFile, FileUpdateMonitor fileUpdateMonitor, ImportFormatPreferences importFormatPreferences, SavePreferences savePreferences, BibEntryTypesManager bibEntryTypesManager) throws IllegalArgumentException, IOException, ParseException, GitAPIException {
35+
Path studyRepositoryRoot = studyDefinitionFile.getParent();
36+
studyRepository = new StudyRepository(studyRepositoryRoot, new GitHandler(studyRepositoryRoot), importFormatPreferences, fileUpdateMonitor, savePreferences, bibEntryTypesManager);
37+
Study study = studyRepository.getStudy();
38+
LibraryEntryToFetcherConverter libraryEntryToFetcherConverter = new LibraryEntryToFetcherConverter(study.getActiveLibraryEntries(), importFormatPreferences);
39+
this.studyFetcher = new StudyFetcher(libraryEntryToFetcherConverter.getActiveFetchers(), study.getSearchQueryStrings());
40+
}
41+
42+
/**
43+
* This methods performs the crawling of the active libraries defined in the study definition file.
44+
* This method also persists the results in the same folder the study definition file is stored in.
45+
*
46+
* @throws IOException Thrown if a problem occurred during the persistence of the result.
47+
*/
48+
public void performCrawl() throws IOException, GitAPIException {
49+
List<QueryResult> results = studyFetcher.crawl();
50+
studyRepository.persist(results);
51+
}
52+
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
package org.jabref.logic.crawler;
2+
3+
import java.util.List;
4+
import java.util.Objects;
5+
import java.util.Set;
6+
import java.util.stream.Collectors;
7+
8+
import org.jabref.logic.importer.ImportFormatPreferences;
9+
import org.jabref.logic.importer.SearchBasedFetcher;
10+
import org.jabref.logic.importer.WebFetchers;
11+
import org.jabref.model.entry.BibEntry;
12+
import org.jabref.model.entry.field.UnknownField;
13+
14+
import static org.jabref.model.entry.types.SystematicLiteratureReviewStudyEntryType.LIBRARY_ENTRY;
15+
16+
/**
17+
* Converts library entries from the given study into their corresponding fetchers.
18+
*/
19+
class LibraryEntryToFetcherConverter {
20+
private final List<BibEntry> libraryEntries;
21+
private final ImportFormatPreferences importFormatPreferences;
22+
23+
public LibraryEntryToFetcherConverter(List<BibEntry> libraryEntries, ImportFormatPreferences importFormatPreferences) {
24+
this.libraryEntries = libraryEntries;
25+
this.importFormatPreferences = importFormatPreferences;
26+
}
27+
28+
/**
29+
* Returns a list of instances of all active library fetchers.
30+
*
31+
* A fetcher is considered active if there exists an library entry of the library the fetcher is associated with that is enabled.
32+
*
33+
* @return Instances of all active fetchers defined in the study definition.
34+
*/
35+
public List<SearchBasedFetcher> getActiveFetchers() {
36+
return getFetchersFromLibraryEntries(this.libraryEntries);
37+
}
38+
39+
/**
40+
* Transforms a list of libraryEntries into a list of SearchBasedFetcher instances.
41+
*
42+
* @param libraryEntries List of entries
43+
* @return List of fetcher instances
44+
*/
45+
private List<SearchBasedFetcher> getFetchersFromLibraryEntries(List<BibEntry> libraryEntries) {
46+
return libraryEntries.parallelStream()
47+
.filter(bibEntry -> bibEntry.getType().getName().equals(LIBRARY_ENTRY.getName()))
48+
.map(this::createFetcherFromLibraryEntry)
49+
.filter(Objects::nonNull)
50+
.collect(Collectors.toList());
51+
}
52+
53+
/**
54+
* Transforms a library entry into a SearchBasedFetcher instance. This only works if the library entry specifies a supported fetcher.
55+
*
56+
* @param libraryEntry the entry that will be converted
57+
* @return An instance of the fetcher defined by the library entry.
58+
*/
59+
private SearchBasedFetcher createFetcherFromLibraryEntry(BibEntry libraryEntry) {
60+
Set<SearchBasedFetcher> searchBasedFetchers = WebFetchers.getSearchBasedFetchers(importFormatPreferences);
61+
String libraryNameFromFetcher = libraryEntry.getField(new UnknownField("name")).orElse("");
62+
return searchBasedFetchers.stream()
63+
.filter(searchBasedFetcher -> searchBasedFetcher.getName().toLowerCase().equals(libraryNameFromFetcher.toLowerCase()))
64+
.findAny()
65+
.orElse(null);
66+
}
67+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
package org.jabref.logic.crawler;
2+
3+
import java.util.ArrayList;
4+
import java.util.List;
5+
import java.util.Objects;
6+
import java.util.stream.Collectors;
7+
8+
import org.jabref.logic.importer.FetcherException;
9+
import org.jabref.logic.importer.PagedSearchBasedFetcher;
10+
import org.jabref.logic.importer.SearchBasedFetcher;
11+
import org.jabref.model.database.BibDatabase;
12+
import org.jabref.model.entry.BibEntry;
13+
import org.jabref.model.study.FetchResult;
14+
import org.jabref.model.study.QueryResult;
15+
16+
import org.slf4j.Logger;
17+
import org.slf4j.LoggerFactory;
18+
19+
/**
20+
* Delegates the search of the provided set of targeted E-Libraries with the provided queries to the E-Library specific fetchers,
21+
* and aggregates the results returned by the fetchers by query and E-Library.
22+
*/
23+
class StudyFetcher {
24+
private static final Logger LOGGER = LoggerFactory.getLogger(StudyFetcher.class);
25+
private static final int MAX_AMOUNT_OF_RESULTS_PER_FETCHER = 100;
26+
27+
private final List<SearchBasedFetcher> activeFetchers;
28+
private final List<String> searchQueries;
29+
30+
StudyFetcher(List<SearchBasedFetcher> activeFetchers, List<String> searchQueries) throws IllegalArgumentException {
31+
this.searchQueries = searchQueries;
32+
this.activeFetchers = activeFetchers;
33+
}
34+
35+
/**
36+
* Each Map Entry contains the results for one search term for all libraries.
37+
* Each entry of the internal map contains the results for a given library.
38+
* If any library API is not available, its corresponding entry is missing from the internal map.
39+
*/
40+
public List<QueryResult> crawl() {
41+
return searchQueries.parallelStream()
42+
.map(this::getQueryResult)
43+
.collect(Collectors.toList());
44+
}
45+
46+
private QueryResult getQueryResult(String searchQuery) {
47+
return new QueryResult(searchQuery, performSearchOnQuery(searchQuery));
48+
}
49+
50+
/**
51+
* Queries all Databases on the given searchQuery.
52+
*
53+
* @param searchQuery The query the search is performed for.
54+
* @return Mapping of each fetcher by name and all their retrieved publications as a BibDatabase
55+
*/
56+
private List<FetchResult> performSearchOnQuery(String searchQuery) {
57+
return activeFetchers.parallelStream()
58+
.map(fetcher -> performSearchOnQueryForFetcher(searchQuery, fetcher))
59+
.filter(Objects::nonNull)
60+
.collect(Collectors.toList());
61+
}
62+
63+
private FetchResult performSearchOnQueryForFetcher(String searchQuery, SearchBasedFetcher fetcher) {
64+
try {
65+
List<BibEntry> fetchResult = new ArrayList<>();
66+
if (fetcher instanceof PagedSearchBasedFetcher) {
67+
int pages = ((int) Math.ceil(((double) MAX_AMOUNT_OF_RESULTS_PER_FETCHER) / ((PagedSearchBasedFetcher) fetcher).getPageSize()));
68+
for (int page = 0; page < pages; page++) {
69+
fetchResult.addAll(((PagedSearchBasedFetcher) fetcher).performSearchPaged(searchQuery, page).getContent());
70+
}
71+
} else {
72+
fetchResult = fetcher.performSearch(searchQuery);
73+
}
74+
return new FetchResult(fetcher.getName(), new BibDatabase(fetchResult));
75+
} catch (FetcherException e) {
76+
LOGGER.warn(String.format("%s API request failed", fetcher.getName()), e);
77+
return null;
78+
}
79+
}
80+
}

0 commit comments

Comments
 (0)