Skip to content

Commit ea8ccb3

Browse files
johannes-mannerkoppor
authored andcommitted
Improve Dublin Core (#3710)
This fixes #938 - Reading and writing multiple dublinCore entries works: XMPUtilWriter supports mutliple metadata entries in dublinCore and a single entry in the PDDocumentInformation. If you want to test the reading of multiple entries, the PDF file JabRef_multipleMetaEntries.pdf contains three metadata entries in DublinCore for testing locally. - Removed to much code when refactoring the XMPUtil. Non XMP metadata are also relevent, when retrieving org.apache.pdfbox.pdmodel.PDDocumentInformation - Update pdfbox and fontbox from 1.8.13 to 2.0.8 and migritate from jempbox to xmpbox. See pull #1096. - Refactor extraction from DublinCoreSchema - The tests cover the most important use cases, which include reading and writing metadata from pdf files. Both formats, DublinCore and PDMetadata (which are no XMP metadata) are tested. - Separated XMPUtils in a reader and a writer utitlity class. - add meaningful names in DublinCoreExtractor and use StringUtils.isNullOrEmpty - Log exception in XMPUtilShared
1 parent e9e7bcc commit ea8ccb3

39 files changed

+1389
-3268
lines changed

build.gradle

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,9 @@ dependencies {
9090
compile 'com.jgoodies:jgoodies-common:1.8.1'
9191
compile 'com.jgoodies:jgoodies-forms:1.9.0'
9292

93-
// update to 2.0.x is not possible - see https://github.com/JabRef/jabref/pull/1096#issuecomment-208857517
94-
compile 'org.apache.pdfbox:pdfbox:1.8.13'
95-
compile 'org.apache.pdfbox:fontbox:1.8.13'
96-
compile 'org.apache.pdfbox:jempbox:1.8.13'
93+
compile 'org.apache.pdfbox:pdfbox:2.0.8'
94+
compile 'org.apache.pdfbox:fontbox:2.0.8'
95+
compile 'org.apache.pdfbox:xmpbox:2.0.8'
9796

9897
// required for reading write-protected PDFs - see https://github.com/JabRef/jabref/pull/942#issuecomment-209252635
9998
compile 'org.bouncycastle:bcprov-jdk15on:1.59'
@@ -216,16 +215,6 @@ dependencyUpdates.resolutionStrategy = {
216215
selection.reject("Cannot be upgraded to version 2")
217216
}
218217
}
219-
withModule("org.apache.pdfbox:fontbox") { ComponentSelection selection ->
220-
if (selection.candidate.version ==~ /2.*/) {
221-
selection.reject("update to 2.0.x is not possible - see https://github.com/JabRef/jabref/pull/1096#issuecomment-208857517")
222-
}
223-
}
224-
withModule("org.apache.pdfbox:pdfbox") { ComponentSelection selection ->
225-
if (selection.candidate.version ==~ /2.*/) {
226-
selection.reject("update to 2.0.x is not possible - see https://github.com/JabRef/jabref/pull/1096#issuecomment-208857517")
227-
}
228-
}
229218
withModule("mysql:mysql-connector-java") { ComponentSelection selection ->
230219
if (selection.candidate.version ==~ /[6-9].*/) {
231220
selection.reject("http://dev.mysql.com/downloads/connector/j/ lists the version 5.* as last stable version.")

src/main/java/org/jabref/cli/XMPUtilMain.java

Lines changed: 41 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
package org.jabref.cli;
22

3-
import java.io.File;
43
import java.io.FileReader;
54
import java.io.IOException;
65
import java.io.StringWriter;
7-
import java.nio.charset.StandardCharsets;
6+
import java.nio.file.Paths;
87
import java.util.Collection;
98
import java.util.List;
109
import java.util.Optional;
@@ -17,14 +16,15 @@
1716
import org.jabref.logic.importer.ImportFormatPreferences;
1817
import org.jabref.logic.importer.ParserResult;
1918
import org.jabref.logic.importer.fileformat.BibtexParser;
20-
import org.jabref.logic.xmp.XMPPreferences;
21-
import org.jabref.logic.xmp.XMPUtil;
19+
import org.jabref.logic.xmp.XmpPreferences;
20+
import org.jabref.logic.xmp.XmpUtilReader;
21+
import org.jabref.logic.xmp.XmpUtilWriter;
2222
import org.jabref.model.database.BibDatabaseMode;
2323
import org.jabref.model.entry.BibEntry;
2424
import org.jabref.preferences.JabRefPreferences;
2525

26-
import org.apache.jempbox.impl.XMLUtil;
27-
import org.apache.jempbox.xmp.XMPMetadata;
26+
import org.apache.xmpbox.XMPMetadata;
27+
import org.apache.xmpbox.xml.XmpSerializer;
2828

2929
public class XMPUtilMain {
3030

@@ -62,18 +62,16 @@ public static void main(String[] args) throws IOException, TransformerException
6262
Globals.prefs = JabRefPreferences.getInstance();
6363
}
6464

65-
XMPPreferences xmpPreferences = Globals.prefs.getXMPPreferences();
65+
XmpPreferences xmpPreferences = Globals.prefs.getXMPPreferences();
6666
ImportFormatPreferences importFormatPreferences = Globals.prefs.getImportFormatPreferences();
6767

68-
switch (args.length) {
69-
case 0:
68+
int argsLength = args.length;
69+
if (argsLength == 0) {
7070
usage();
71-
break;
72-
case 1:
73-
71+
} else if (argsLength == 1) {
7472
if (args[0].endsWith(".pdf")) {
7573
// Read from pdf and write as BibTex
76-
List<BibEntry> l = XMPUtil.readXMP(new File(args[0]), xmpPreferences);
74+
List<BibEntry> l = XmpUtilReader.readXmp(args[0], xmpPreferences);
7775

7876
BibEntryWriter bibtexEntryWriter = new BibEntryWriter(
7977
new LatexFieldFormatter(Globals.prefs.getLatexFieldFormatterPreferences()), false);
@@ -92,63 +90,62 @@ public static void main(String[] args) throws IOException, TransformerException
9290

9391
if (entries.isEmpty()) {
9492
System.err.println("Could not find BibEntry in " + args[0]);
95-
} else {
96-
System.out.println(XMPUtil.toXMP(entries, result.getDatabase(), xmpPreferences));
9793
}
9894
}
9995
} else {
10096
usage();
10197
}
102-
break;
103-
case 2:
98+
} else if (argsLength == 2) {
10499
if ("-x".equals(args[0]) && args[1].endsWith(".pdf")) {
105100
// Read from pdf and write as BibTex
106-
Optional<XMPMetadata> meta = XMPUtil.readRawXMP(new File(args[1]));
101+
List<XMPMetadata> meta = XmpUtilReader.readRawXmp(Paths.get(args[1]));
107102

108-
if (meta.isPresent()) {
109-
XMLUtil.save(meta.get().getXMPDocument(), System.out, StandardCharsets.UTF_8.name());
103+
if (!meta.isEmpty()) {
104+
XmpSerializer serializer = new XmpSerializer();
105+
serializer.serialize(meta.get(0), System.out, true);
110106
} else {
111107
System.err.println("The given pdf does not contain any XMP-metadata.");
112108
}
113-
break;
109+
return;
114110
}
115111

116112
if (args[0].endsWith(".bib") && args[1].endsWith(".pdf")) {
117-
ParserResult result = new BibtexParser(importFormatPreferences, Globals.getFileUpdateMonitor()).parse(new FileReader(args[0]));
113+
try (FileReader reader = new FileReader(args[0])) {
114+
ParserResult result = new BibtexParser(importFormatPreferences, Globals.getFileUpdateMonitor()).parse(reader);
118115

119-
Collection<BibEntry> entries = result.getDatabase().getEntries();
116+
List<BibEntry> entries = result.getDatabase().getEntries();
120117

121-
if (entries.isEmpty()) {
122-
System.err.println("Could not find BibEntry in " + args[0]);
123-
} else {
124-
XMPUtil.writeXMP(new File(args[1]), entries, result.getDatabase(), false, xmpPreferences);
125-
System.out.println("XMP written.");
118+
if (entries.isEmpty()) {
119+
System.err.println("Could not find BibEntry in " + args[0]);
120+
} else {
121+
XmpUtilWriter.writeXmp(Paths.get(args[1]), entries, result.getDatabase(), xmpPreferences);
122+
System.out.println("XMP written.");
123+
}
126124
}
127-
break;
125+
return;
128126
}
129127

130128
usage();
131-
break;
132-
case 3:
129+
} else if (argsLength == 3) {
133130
if (!args[1].endsWith(".bib") && !args[2].endsWith(".pdf")) {
134131
usage();
135-
break;
132+
return;
136133
}
137134

138-
ParserResult result = new BibtexParser(importFormatPreferences, Globals.getFileUpdateMonitor()).parse(new FileReader(args[1]));
135+
try (FileReader reader = new FileReader(args[1])) {
136+
ParserResult result = new BibtexParser(importFormatPreferences, Globals.getFileUpdateMonitor()).parse(reader);
139137

140-
Optional<BibEntry> bibEntry = result.getDatabase().getEntryByKey(args[0]);
138+
Optional<BibEntry> bibEntry = result.getDatabase().getEntryByKey(args[0]);
141139

142-
if (bibEntry.isPresent()) {
143-
XMPUtil.writeXMP(new File(args[2]), bibEntry.get(), result.getDatabase(), xmpPreferences);
140+
if (bibEntry.isPresent()) {
141+
XmpUtilWriter.writeXmp(Paths.get(args[2]), bibEntry.get(), result.getDatabase(), xmpPreferences);
144142

145-
System.out.println("XMP written.");
146-
} else {
147-
System.err.println("Could not find BibEntry " + args[0] + " in " + args[0]);
143+
System.out.println("XMP written.");
144+
} else {
145+
System.err.println("Could not find BibEntry " + args[0] + " in " + args[0]);
146+
}
148147
}
149-
break;
150-
151-
default:
148+
} else {
152149
usage();
153150
}
154151
}
@@ -167,13 +164,13 @@ private static void usage() {
167164
System.out.println("Read from PDF and print raw XMP:");
168165
System.out.println(" xmpUtil -x <pdf>");
169166
System.out
170-
.println("Write the entry in <bib> given by <key> to the PDF:");
167+
.println("Write the entry in <bib> given by <key> to the PDF:");
171168
System.out.println(" xmpUtil <key> <bib> <pdf>");
172169
System.out.println("Write all entries in <bib> to the PDF:");
173170
System.out.println(" xmpUtil <bib> <pdf>");
174171
System.out.println("");
175172
System.out
176-
.println("To report bugs visit https://issues.jabref.org");
173+
.println("To report bugs visit https://issues.jabref.org");
177174
}
178175

179176
}

src/main/java/org/jabref/gui/documentviewer/PdfDocumentPageViewModel.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@
88
import javafx.embed.swing.SwingFXUtils;
99
import javafx.scene.image.Image;
1010

11+
import org.apache.pdfbox.pdmodel.PDDocument;
1112
import org.apache.pdfbox.pdmodel.PDPage;
1213
import org.apache.pdfbox.pdmodel.common.PDRectangle;
14+
import org.apache.pdfbox.rendering.ImageType;
15+
import org.apache.pdfbox.rendering.PDFRenderer;
1316

1417
/**
1518
* Represents the view model of a pdf page backed by a {@link PDPage}.
@@ -18,10 +21,12 @@ public class PdfDocumentPageViewModel extends DocumentPageViewModel {
1821

1922
private final PDPage page;
2023
private final int pageNumber;
24+
private final PDDocument document;
2125

22-
public PdfDocumentPageViewModel(PDPage page, int pageNumber) {
26+
public PdfDocumentPageViewModel(PDPage page, int pageNumber, PDDocument document) {
2327
this.page = Objects.requireNonNull(page);
2428
this.pageNumber = pageNumber;
29+
this.document = document;
2530
}
2631

2732
// Taken from http://stackoverflow.com/a/9417836/873661
@@ -37,10 +42,12 @@ private static BufferedImage resize(BufferedImage img, int newWidth, int newHeig
3742
}
3843

3944
@Override
45+
// Taken from https://stackoverflow.com/questions/23326562/apache-pdfbox-convert-pdf-to-images
4046
public Image render(int width, int height) {
47+
PDFRenderer renderer = new PDFRenderer(document);
4148
try {
4249
int resolution = 96;
43-
BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_RGB, 2 * resolution);
50+
BufferedImage image = renderer.renderImageWithDPI(pageNumber, 2 * resolution, ImageType.RGB);
4451
return SwingFXUtils.toFXImage(resize(image, width, height), null);
4552
} catch (IOException e) {
4653
// TODO: LOG

src/main/java/org/jabref/gui/documentviewer/PdfDocumentViewModel.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import javafx.collections.ObservableList;
99

1010
import org.apache.pdfbox.pdmodel.PDDocument;
11-
import org.apache.pdfbox.pdmodel.PDPage;
11+
import org.apache.pdfbox.pdmodel.PDPageTree;
1212

1313
public class PdfDocumentViewModel extends DocumentViewModel {
1414

@@ -21,13 +21,12 @@ public PdfDocumentViewModel(PDDocument document) {
2121

2222
@Override
2323
public ObservableList<DocumentPageViewModel> getPages() {
24-
@SuppressWarnings("unchecked")
25-
List<PDPage> pages = document.getDocumentCatalog().getAllPages();
24+
PDPageTree pages = document.getDocumentCatalog().getPages();
2625

27-
// There is apparently no neat way to get the page number from a PDPage...thus this old-style for loop
2826
List<PdfDocumentPageViewModel> pdfPages = new ArrayList<>();
29-
for (int i = 0; i < pages.size(); i++) {
30-
pdfPages.add(new PdfDocumentPageViewModel(pages.get(i), i + 1));
27+
// There is apparently no neat way to get the page number from a PDPage...thus this old-style for loop
28+
for (int i = 0; i < pages.getCount(); i++) {
29+
pdfPages.add(new PdfDocumentPageViewModel(pages.get(i), i + 1, document));
3130
}
3231
return FXCollections.observableArrayList(pdfPages);
3332
}

src/main/java/org/jabref/gui/externalfiles/DroppedFileHandler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
import org.jabref.gui.util.DefaultTaskExecutor;
3636
import org.jabref.logic.l10n.Localization;
3737
import org.jabref.logic.util.io.FileUtil;
38-
import org.jabref.logic.xmp.XMPUtil;
38+
import org.jabref.logic.xmp.XmpUtilReader;
3939
import org.jabref.model.database.BibDatabase;
4040
import org.jabref.model.entry.BibEntry;
4141
import org.jabref.model.entry.FieldName;
@@ -228,7 +228,7 @@ private boolean tryXmpImport(String fileName, ExternalFileType fileType, NamedCo
228228

229229
List<BibEntry> xmpEntriesInFile;
230230
try {
231-
xmpEntriesInFile = XMPUtil.readXMP(fileName, Globals.prefs.getXMPPreferences());
231+
xmpEntriesInFile = XmpUtilReader.readXmp(fileName, Globals.prefs.getXMPPreferences());
232232
} catch (IOException e) {
233233
LOGGER.warn("Problem reading XMP", e);
234234
return false;

src/main/java/org/jabref/gui/externalfiles/WriteXMPAction.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import org.jabref.gui.keyboard.KeyBinding;
3232
import org.jabref.gui.worker.AbstractWorker;
3333
import org.jabref.logic.l10n.Localization;
34-
import org.jabref.logic.xmp.XMPUtil;
34+
import org.jabref.logic.xmp.XmpUtilWriter;
3535
import org.jabref.model.database.BibDatabase;
3636
import org.jabref.model.entry.BibEntry;
3737

@@ -133,7 +133,7 @@ public void run() {
133133
for (Path file : files) {
134134
if (Files.exists(file)) {
135135
try {
136-
XMPUtil.writeXMP(file.toFile(), entry, database, Globals.prefs.getXMPPreferences());
136+
XmpUtilWriter.writeXmp(file, entry, database, Globals.prefs.getXMPPreferences());
137137
SwingUtilities.invokeLater(
138138
() -> optDiag.getProgressArea().append(" " + Localization.lang("OK") + ".\n"));
139139
entriesChanged++;

src/main/java/org/jabref/gui/fieldeditors/LinkedFileViewModel.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
import org.jabref.logic.cleanup.RenamePdfCleanup;
3535
import org.jabref.logic.l10n.Localization;
3636
import org.jabref.logic.util.io.FileUtil;
37-
import org.jabref.logic.xmp.XMPUtil;
37+
import org.jabref.logic.xmp.XmpUtilWriter;
3838
import org.jabref.model.database.BibDatabaseContext;
3939
import org.jabref.model.entry.BibEntry;
4040
import org.jabref.model.entry.LinkedFile;
@@ -331,7 +331,7 @@ public void writeXMPMetadata() {
331331
// Localization.lang("PDF does not exist");
332332
} else {
333333
try {
334-
XMPUtil.writeXMP(file.get(), entry, databaseContext.getDatabase(), Globals.prefs.getXMPPreferences());
334+
XmpUtilWriter.writeXmp(file.get(), entry, databaseContext.getDatabase(), Globals.prefs.getXMPPreferences());
335335
} catch (IOException | TransformerException ex) {
336336
// TODO: Print error message
337337
// Localization.lang("Error while writing") + " '" + file.toString() + "': " + ex;

0 commit comments

Comments
 (0)