diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ac47b90749..a3d9b688211 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv - We added support for import of a Refer/BibIX file format. [#13069](https://github.com/JabRef/jabref/issues/13069) - We added a new `jabkit` command `pseudonymize` to pseudonymize the library. [#13109](https://github.com/JabRef/jabref/issues/13109) - We added functionality to focus running instance when trying to start a second instance. [#13129](https://github.com/JabRef/jabref/issues/13129) +- We added EPUB files import. [#13217](https://github.com/JabRef/jabref/pull/13217) ### Changed diff --git a/jabgui/src/main/java/org/jabref/gui/LibraryTab.java b/jabgui/src/main/java/org/jabref/gui/LibraryTab.java index 2afe8e908c1..275cb18776d 100644 --- a/jabgui/src/main/java/org/jabref/gui/LibraryTab.java +++ b/jabgui/src/main/java/org/jabref/gui/LibraryTab.java @@ -56,7 +56,6 @@ import org.jabref.gui.undo.UndoableFieldChange; import org.jabref.gui.undo.UndoableInsertEntries; import org.jabref.gui.undo.UndoableRemoveEntries; -import org.jabref.gui.util.OptionalObjectProperty; import org.jabref.gui.util.UiTaskExecutor; import org.jabref.logic.ai.AiService; import org.jabref.logic.citationstyle.CitationStyleCache; @@ -71,6 +70,7 @@ import org.jabref.logic.search.PostgreServer; import org.jabref.logic.shared.DatabaseLocation; import org.jabref.logic.util.BackgroundTask; +import org.jabref.logic.util.OptionalObjectProperty; import org.jabref.logic.util.TaskExecutor; import org.jabref.logic.util.io.FileUtil; import org.jabref.model.FieldChange; diff --git a/jabgui/src/main/java/org/jabref/gui/StateManager.java b/jabgui/src/main/java/org/jabref/gui/StateManager.java index fc98720dac5..21c20efae6e 100644 --- a/jabgui/src/main/java/org/jabref/gui/StateManager.java +++ b/jabgui/src/main/java/org/jabref/gui/StateManager.java @@ -30,9 +30,9 @@ import org.jabref.gui.sidepane.SidePaneType; import org.jabref.gui.util.CustomLocalDragboard; import org.jabref.gui.util.DialogWindowState; -import org.jabref.gui.util.OptionalObjectProperty; import org.jabref.logic.search.IndexManager; import org.jabref.logic.util.BackgroundTask; +import org.jabref.logic.util.OptionalObjectProperty; import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.entry.BibEntry; import org.jabref.model.groups.GroupTreeNode; diff --git a/jabgui/src/main/java/org/jabref/gui/collab/DatabaseChange.java b/jabgui/src/main/java/org/jabref/gui/collab/DatabaseChange.java index 0ef63184c37..2cd4105df0f 100644 --- a/jabgui/src/main/java/org/jabref/gui/collab/DatabaseChange.java +++ b/jabgui/src/main/java/org/jabref/gui/collab/DatabaseChange.java @@ -18,7 +18,7 @@ import org.jabref.gui.collab.stringdelete.BibTexStringDelete; import org.jabref.gui.collab.stringrename.BibTexStringRename; import org.jabref.gui.undo.NamedCompound; -import org.jabref.gui.util.OptionalObjectProperty; +import org.jabref.logic.util.OptionalObjectProperty; import org.jabref.model.database.BibDatabaseContext; public sealed abstract class DatabaseChange permits EntryAdd, EntryChange, EntryDelete, GroupChange, MetadataChange, PreambleChange, BibTexStringAdd, BibTexStringChange, BibTexStringDelete, BibTexStringRename { diff --git a/jabgui/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java b/jabgui/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java index aa7ef91c847..3c82aba4298 100644 --- a/jabgui/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java +++ b/jabgui/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java @@ -182,6 +182,18 @@ public List call() { message = bibtexParserResult.getErrorMessage(); } addResultToList(file, success, message); + } else if (FileUtil.isEpubFile(file)) { + ParserResult result = contentImporter.importEpubContent(file); + List entries = result.getDatabase().getEntries(); + boolean success = !result.hasWarnings(); + String message; + if (success) { + message = Localization.lang("ePUB entry was successfully imported"); + } else { + message = result.getErrorMessage(); + } + entriesToAdd.addAll(entries); + addResultToList(file, true, message); } else { BibEntry emptyEntryWithLink = createEmptyEntryWithLink(file); entriesToAdd.add(emptyEntryWithLink); diff --git a/jabgui/src/main/java/org/jabref/gui/externalfiletype/StandardExternalFileType.java b/jabgui/src/main/java/org/jabref/gui/externalfiletype/StandardExternalFileType.java index fc12e9670e8..145c2b266b9 100644 --- a/jabgui/src/main/java/org/jabref/gui/externalfiletype/StandardExternalFileType.java +++ b/jabgui/src/main/java/org/jabref/gui/externalfiletype/StandardExternalFileType.java @@ -28,7 +28,7 @@ public enum StandardExternalFileType implements ExternalFileType { TIFF(Localization.lang("%0 image", "TIFF"), "tiff", "image/tiff", "gimp", "picture", IconTheme.JabRefIcons.PICTURE), URL("URL", "html", "text/html", "firefox", "www", IconTheme.JabRefIcons.WWW), MHT("MHT", "mht", "multipart/related", "firefox", "www", IconTheme.JabRefIcons.WWW), - ePUB("ePUB", "epub", "application/epub+zip", "firefox", "www", IconTheme.JabRefIcons.WWW), + ePUB("ePUB", "epub", "application/epub+zip", "firefox", "www", IconTheme.JabRefIcons.BOOK), MARKDOWN("Markdown", "md", "text/markdown", "emacs", "emacs", IconTheme.JabRefIcons.FILE_TEXT); private final String name; private final String extension; diff --git a/jabgui/src/main/java/org/jabref/gui/maintable/MainTableDataModel.java b/jabgui/src/main/java/org/jabref/gui/maintable/MainTableDataModel.java index f41687a7f48..5ee95528888 100644 --- a/jabgui/src/main/java/org/jabref/gui/maintable/MainTableDataModel.java +++ b/jabgui/src/main/java/org/jabref/gui/maintable/MainTableDataModel.java @@ -18,10 +18,10 @@ import org.jabref.gui.search.MatchCategory; import org.jabref.gui.util.BindingsHelper; import org.jabref.gui.util.FilteredListProxy; -import org.jabref.gui.util.OptionalObjectProperty; import org.jabref.logic.search.IndexManager; import org.jabref.logic.search.SearchPreferences; import org.jabref.logic.util.BackgroundTask; +import org.jabref.logic.util.OptionalObjectProperty; import org.jabref.logic.util.TaskExecutor; import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.entry.BibEntry; diff --git a/jabgui/src/main/java/org/jabref/gui/preferences/keybindings/KeyBindingsTabViewModel.java b/jabgui/src/main/java/org/jabref/gui/preferences/keybindings/KeyBindingsTabViewModel.java index 5c58218851d..ad451dba57e 100644 --- a/jabgui/src/main/java/org/jabref/gui/preferences/keybindings/KeyBindingsTabViewModel.java +++ b/jabgui/src/main/java/org/jabref/gui/preferences/keybindings/KeyBindingsTabViewModel.java @@ -24,8 +24,8 @@ import org.jabref.gui.preferences.keybindings.presets.BashKeyBindingPreset; import org.jabref.gui.preferences.keybindings.presets.KeyBindingPreset; import org.jabref.gui.preferences.keybindings.presets.NewEntryBindingPreset; -import org.jabref.gui.util.OptionalObjectProperty; import org.jabref.logic.l10n.Localization; +import org.jabref.logic.util.OptionalObjectProperty; public class KeyBindingsTabViewModel implements PreferenceTabViewModel { diff --git a/jabgui/src/test/java/org/jabref/gui/entryeditor/SourceTabTest.java b/jabgui/src/test/java/org/jabref/gui/entryeditor/SourceTabTest.java index fd43e6f70c0..63cb4ae9f6f 100644 --- a/jabgui/src/test/java/org/jabref/gui/entryeditor/SourceTabTest.java +++ b/jabgui/src/test/java/org/jabref/gui/entryeditor/SourceTabTest.java @@ -15,9 +15,9 @@ import org.jabref.gui.keyboard.KeyBindingRepository; import org.jabref.gui.search.SearchType; import org.jabref.gui.undo.CountingUndoManager; -import org.jabref.gui.util.OptionalObjectProperty; import org.jabref.logic.bibtex.FieldPreferences; import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.util.OptionalObjectProperty; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.BibEntryTypesManager; import org.jabref.model.entry.field.UnknownField; diff --git a/jabgui/src/test/java/org/jabref/gui/importer/NewEntryActionTest.java b/jabgui/src/test/java/org/jabref/gui/importer/NewEntryActionTest.java index 6f46fc0477d..90e2bd69432 100644 --- a/jabgui/src/test/java/org/jabref/gui/importer/NewEntryActionTest.java +++ b/jabgui/src/test/java/org/jabref/gui/importer/NewEntryActionTest.java @@ -7,7 +7,7 @@ import org.jabref.gui.LibraryTabContainer; import org.jabref.gui.StateManager; import org.jabref.gui.preferences.GuiPreferences; -import org.jabref.gui.util.OptionalObjectProperty; +import org.jabref.logic.util.OptionalObjectProperty; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.types.EntryType; import org.jabref.model.entry.types.StandardEntryType; diff --git a/jabgui/src/test/java/org/jabref/gui/sidepane/SidePaneViewModelTest.java b/jabgui/src/test/java/org/jabref/gui/sidepane/SidePaneViewModelTest.java index e735e4a5ba6..2c7f6caab9d 100644 --- a/jabgui/src/test/java/org/jabref/gui/sidepane/SidePaneViewModelTest.java +++ b/jabgui/src/test/java/org/jabref/gui/sidepane/SidePaneViewModelTest.java @@ -17,9 +17,9 @@ import org.jabref.gui.frame.SidePanePreferences; import org.jabref.gui.preferences.GuiPreferences; import org.jabref.gui.util.CustomLocalDragboard; -import org.jabref.gui.util.OptionalObjectProperty; import org.jabref.logic.ai.AiService; import org.jabref.logic.journals.JournalAbbreviationRepository; +import org.jabref.logic.util.OptionalObjectProperty; import org.jabref.logic.util.TaskExecutor; import org.jabref.model.entry.BibEntryTypesManager; import org.jabref.model.util.FileUpdateMonitor; diff --git a/jabgui/src/test/java/org/jabref/gui/util/OpenConsoleActionTest.java b/jabgui/src/test/java/org/jabref/gui/util/OpenConsoleActionTest.java index 55889e14816..28d360a2cc3 100644 --- a/jabgui/src/test/java/org/jabref/gui/util/OpenConsoleActionTest.java +++ b/jabgui/src/test/java/org/jabref/gui/util/OpenConsoleActionTest.java @@ -5,6 +5,7 @@ import org.jabref.gui.StateManager; import org.jabref.gui.frame.OpenConsoleAction; import org.jabref.gui.preferences.GuiPreferences; +import org.jabref.logic.util.OptionalObjectProperty; import org.jabref.model.database.BibDatabaseContext; import org.junit.jupiter.api.BeforeEach; diff --git a/jablib/src/main/java/org/jabref/logic/externalfiles/ExternalFilesContentImporter.java b/jablib/src/main/java/org/jabref/logic/externalfiles/ExternalFilesContentImporter.java index 7a868577c42..ac93b987a48 100644 --- a/jablib/src/main/java/org/jabref/logic/externalfiles/ExternalFilesContentImporter.java +++ b/jablib/src/main/java/org/jabref/logic/externalfiles/ExternalFilesContentImporter.java @@ -3,10 +3,14 @@ import java.io.IOException; import java.nio.file.Path; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; + import org.jabref.logic.FilePreferences; import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.OpenDatabase; import org.jabref.logic.importer.ParserResult; +import org.jabref.logic.importer.fileformat.EpubImporter; import org.jabref.logic.importer.fileformat.PdfMergeMetadataImporter; import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.util.FileUpdateMonitor; @@ -30,4 +34,12 @@ public ParserResult importPDFContent(Path file, BibDatabaseContext context, File public ParserResult importFromBibFile(Path bibFile, FileUpdateMonitor fileUpdateMonitor) throws IOException { return OpenDatabase.loadDatabase(bibFile, importFormatPreferences, fileUpdateMonitor); } + + public ParserResult importEpubContent(Path file) { + try { + return new EpubImporter(importFormatPreferences).importDatabase(file); + } catch (IOException | XPathExpressionException | ParserConfigurationException e) { + return ParserResult.fromError(e); + } + } } diff --git a/jablib/src/main/java/org/jabref/logic/importer/ImportFormatReader.java b/jablib/src/main/java/org/jabref/logic/importer/ImportFormatReader.java index 84c463f10ed..76605cefd09 100644 --- a/jablib/src/main/java/org/jabref/logic/importer/ImportFormatReader.java +++ b/jablib/src/main/java/org/jabref/logic/importer/ImportFormatReader.java @@ -9,6 +9,9 @@ import java.util.SortedSet; import java.util.TreeSet; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; + import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences; import org.jabref.logic.importer.fileformat.BiblioscapeImporter; import org.jabref.logic.importer.fileformat.BibtexImporter; @@ -17,6 +20,7 @@ import org.jabref.logic.importer.fileformat.CopacImporter; import org.jabref.logic.importer.fileformat.EndnoteImporter; import org.jabref.logic.importer.fileformat.EndnoteXmlImporter; +import org.jabref.logic.importer.fileformat.EpubImporter; import org.jabref.logic.importer.fileformat.InspecImporter; import org.jabref.logic.importer.fileformat.IsiImporter; import org.jabref.logic.importer.fileformat.MedlineImporter; @@ -38,10 +42,14 @@ import org.jabref.model.entry.BibEntry; import org.jabref.model.util.FileUpdateMonitor; -public class ImportFormatReader { +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +public class ImportFormatReader { public static final String BIBTEX_FORMAT = "BibTeX"; + private static final Logger LOGGER = LoggerFactory.getLogger(ImportFormatReader.class); + /** * All import formats. * Sorted accordingly to {@link Importer#compareTo}, which defaults to alphabetically by the name @@ -91,6 +99,15 @@ public void reset() { formats.add(new BibtexImporter(importFormatPreferences, fileUpdateMonitor)); formats.add(new CitaviXmlImporter()); + // {@link EpubImporter} constructs `XPath`s internally, and compilation may throw errors. + // {@link EpubReader} also constructs {@link DocumentBuilder}, whose "constructor" can also throw error. + // Hacky workaround. + try { + formats.add(new EpubImporter(importFormatPreferences)); + } catch (XPathExpressionException | ParserConfigurationException e) { + LOGGER.error("Unable to construct `EpubImporter`. `EpubImporter` will not be added to available importers", e); + } + // Get custom import formats formats.addAll(importerPreferences.getCustomImporters()); } diff --git a/jablib/src/main/java/org/jabref/logic/importer/ParserResult.java b/jablib/src/main/java/org/jabref/logic/importer/ParserResult.java index c0a75bc4903..f98ac646888 100644 --- a/jablib/src/main/java/org/jabref/logic/importer/ParserResult.java +++ b/jablib/src/main/java/org/jabref/logic/importer/ParserResult.java @@ -3,6 +3,7 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Objects; @@ -50,6 +51,10 @@ public static ParserResult fromErrorMessage(String message) { return parserResult; } + public static ParserResult fromEntry(BibEntry entry) { + return new ParserResult(Collections.singleton(entry)); + } + private static String getErrorMessage(Exception exception) { String errorMessage = exception.getLocalizedMessage(); if (exception.getCause() != null) { diff --git a/jablib/src/main/java/org/jabref/logic/importer/fileformat/EpubImporter.java b/jablib/src/main/java/org/jabref/logic/importer/fileformat/EpubImporter.java new file mode 100644 index 00000000000..59134bf3c87 --- /dev/null +++ b/jablib/src/main/java/org/jabref/logic/importer/fileformat/EpubImporter.java @@ -0,0 +1,155 @@ +package org.jabref.logic.importer.fileformat; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.nio.file.FileSystem; +import java.nio.file.FileSystems; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; + +import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.importer.Importer; +import org.jabref.logic.importer.ParserResult; +import org.jabref.logic.l10n.Localization; +import org.jabref.logic.util.FileType; +import org.jabref.logic.util.OptionalObjectProperty; +import org.jabref.logic.util.StandardFileType; +import org.jabref.logic.util.io.FileUtil; +import org.jabref.logic.util.io.XMLUtil; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.LinkedFile; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.types.StandardEntryType; + +import org.w3c.dom.Document; +import org.xml.sax.SAXException; + +public class EpubImporter extends Importer { + private static final char[] EPUB_HEADER_MAGIC_NUMBER = {0x50, 0x4b, 0x03, 0x04}; + + private final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + private final DocumentBuilder builder = factory.newDocumentBuilder(); + + private final XPathFactory xPathFactory = XPathFactory.newInstance(); + private final XPath xpath = xPathFactory.newXPath(); + + private final XPathExpression titlePath = xpath.compile("/package/metadata/title"); + private final XPathExpression creatorPath = xpath.compile("/package/metadata/creator"); + private final XPathExpression identifierPath = xpath.compile("/package/metadata/identifier"); + private final XPathExpression languagePath = xpath.compile("/package/metadata/language"); + private final XPathExpression sourcePath = xpath.compile("/package/metadata/source"); + private final XPathExpression descriptionPath = xpath.compile("/package/metadata/description"); + private final XPathExpression subjectPath = xpath.compile("/package/metadata/subject"); + + private final ImportFormatPreferences importFormatPreferences; + + public EpubImporter(ImportFormatPreferences importFormatPreferences) throws XPathExpressionException, ParserConfigurationException { + this.importFormatPreferences = importFormatPreferences; + } + + // ePUB is a ZIP-based format, so this method will clash with other ZIP-based formats. + // Currently, only `.ctv6bak` is found. + @Override + public boolean isRecognizedFormat(BufferedReader input) throws IOException { + char[] header = new char[EPUB_HEADER_MAGIC_NUMBER.length]; + int nRead = input.read(header); + return nRead == EPUB_HEADER_MAGIC_NUMBER.length && Arrays.equals(header, EPUB_HEADER_MAGIC_NUMBER); + } + + @Override + public ParserResult importDatabase(Path filePath) throws IOException { + // TODO: JabRef has {@link DublinCoreExtractor}, which is exactly the schema used in OPF. However, that class + // is tied to {@link DublinCoreSchema}, which is tied to {@link XMPSchema}. It seems there are no way to pass + // ordinary XML nodes to {@link DublinCoreSchema}. + // + // Current implementation uses some hand-crafted {@link XPath}s, which work okayish, but not as good as a + // full-featured {@link DublinCoreExtractor}. + + try (FileSystem fileSystem = FileSystems.newFileSystem(filePath)) { + OptionalObjectProperty metadataFilePath = OptionalObjectProperty.empty(); + + Files.walkFileTree(fileSystem.getPath("/"), new SimpleFileVisitor<>() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) { + if (file.toString().endsWith(".opf")) { + metadataFilePath.set(Optional.of(file)); + return FileVisitResult.TERMINATE; + } + return FileVisitResult.CONTINUE; + } + }); + + if (metadataFilePath.get().isEmpty()) { + return ParserResult.fromErrorMessage(Localization.lang("Could not find metadata file. Possibly corrupted ePUB file.")); + } + + File metadataFile = FileUtil.remapZipPath(metadataFilePath.get().get()).toFile(); + Document document = builder.parse(metadataFile); + + Optional title = XMLUtil.getNodeContentByXPath(document, titlePath); + Optional identifier = XMLUtil.getNodeContentByXPath(document, identifierPath); + Optional source = XMLUtil.getNodeContentByXPath(document, sourcePath); + Optional description = XMLUtil.getNodeContentByXPath(document, descriptionPath); + + List authors = XMLUtil.getNodesContentByXPath(document, creatorPath); + List subjects = XMLUtil.getNodesContentByXPath(document, subjectPath); + List languages = XMLUtil.getNodesContentByXPath(document, languagePath); + + LinkedFile linkedFile = new LinkedFile("", filePath.toAbsolutePath(), StandardFileType.EPUB.getName()); + + BibEntry entry = new BibEntry(StandardEntryType.Book) + .withField(StandardField.TITLE, title) + .withField(StandardField.ABSTRACT, description) + .withField(StandardField.URL, source.isPresent() ? source : identifier) + .withField(StandardField.AUTHOR, !authors.isEmpty() ? Optional.of(String.join(" and ", authors)) : Optional.empty()) + .withField(StandardField.LANGUAGE, !languages.isEmpty() ? Optional.of(String.join(" and ", languages)) : Optional.empty()) + .withKeywords(subjects, importFormatPreferences.bibEntryPreferences().getKeywordSeparator()) + .withFile(linkedFile); + + return ParserResult.fromEntry(entry); + } catch (SAXException | XPathExpressionException e) { + return ParserResult.fromError(e); + } + } + + @Override + public ParserResult importDatabase(BufferedReader input) throws IOException { + throw new UnsupportedOperationException("EpubImporter does not support importDatabase(BufferedReader reader). " + + "Instead use importDatabase(Path filePath)."); + } + + @Override + public String getId() { + return "epub"; + } + + @Override + public String getName() { + return "ePUB"; + } + + @Override + public String getDescription() { + return Localization.lang("Import the popular e-book file format ePUB"); + } + + @Override + public FileType getFileType() { + return StandardFileType.EPUB; + } +} diff --git a/jabgui/src/main/java/org/jabref/gui/util/OptionalObjectProperty.java b/jablib/src/main/java/org/jabref/logic/util/OptionalObjectProperty.java similarity index 97% rename from jabgui/src/main/java/org/jabref/gui/util/OptionalObjectProperty.java rename to jablib/src/main/java/org/jabref/logic/util/OptionalObjectProperty.java index 01a50742b94..7d447808e70 100644 --- a/jabgui/src/main/java/org/jabref/gui/util/OptionalObjectProperty.java +++ b/jablib/src/main/java/org/jabref/logic/util/OptionalObjectProperty.java @@ -1,4 +1,4 @@ -package org.jabref.gui.util; +package org.jabref.logic.util; import java.util.Optional; diff --git a/jablib/src/main/java/org/jabref/logic/util/StandardFileType.java b/jablib/src/main/java/org/jabref/logic/util/StandardFileType.java index 4d0bf1c0412..aa674823a01 100644 --- a/jablib/src/main/java/org/jabref/logic/util/StandardFileType.java +++ b/jablib/src/main/java/org/jabref/logic/util/StandardFileType.java @@ -46,7 +46,8 @@ public enum StandardFileType implements FileType { XML("XML", "xml"), XMP("XMP", "xmp"), YAML("YAML Markup", "yml", "yaml"), - ZIP("Zip Archive", "zip"); + ZIP("Zip Archive", "zip"), + EPUB("ePUB", "epub"); private final List extensions; private final String name; diff --git a/jablib/src/main/java/org/jabref/logic/util/io/FileUtil.java b/jablib/src/main/java/org/jabref/logic/util/io/FileUtil.java index 9093acfe570..0eb71543e8d 100644 --- a/jablib/src/main/java/org/jabref/logic/util/io/FileUtil.java +++ b/jablib/src/main/java/org/jabref/logic/util/io/FileUtil.java @@ -1,6 +1,7 @@ package org.jabref.logic.util.io; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.UncheckedIOException; import java.nio.file.FileSystems; @@ -18,6 +19,7 @@ import java.util.Locale; import java.util.Objects; import java.util.Optional; +import java.util.UUID; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -496,6 +498,17 @@ public static boolean isPDFFile(Path file) { return extension.isPresent() && StandardFileType.PDF.getExtensions().contains(extension.get()); } + /** + * Test if the file is a ePUB file by simply checking the extension to be ".epub" + * + * @param file The file to check + * @return True if file extension is ".pdf", false otherwise + */ + public static boolean isEpubFile(Path file) { + Optional extension = FileUtil.getFileExtension(file); + return extension.isPresent() && StandardFileType.EPUB.getExtensions().contains(extension.get()); + } + /** * @return Path of current panel database directory or the standard working directory in case the database was not saved yet */ @@ -589,4 +602,21 @@ public static String shortenFileName(String fileName, Integer maxLength) { public static boolean isCharLegal(char c) { return Arrays.binarySearch(ILLEGAL_CHARS, c) < 0; } + + /** + * Create a temporary file and fill it with contents of a file in ZIP archive. + *

+ * When you need to read (extract) a file from a ZIP archive, you cannot convert `ZipPath` (`ZipPath` is a private class) to {@link File}. + * One way of solving this problem is to make a temporary file, and copy ZIP file contents to the temporary file. + *

+ * Adapted from . + */ + public static Path remapZipPath(Path zipPath) throws IOException { + File tempFile = Files.createTempFile(UUID.randomUUID().toString(), ".tmp").toFile(); + tempFile.deleteOnExit(); + try (FileOutputStream out = new FileOutputStream(tempFile)) { + Files.copy(zipPath, out); + } + return tempFile.toPath(); + } } diff --git a/jablib/src/main/java/org/jabref/logic/util/io/XMLUtil.java b/jablib/src/main/java/org/jabref/logic/util/io/XMLUtil.java index 7fb0e6e4f68..526849d84c2 100644 --- a/jablib/src/main/java/org/jabref/logic/util/io/XMLUtil.java +++ b/jablib/src/main/java/org/jabref/logic/util/io/XMLUtil.java @@ -5,6 +5,8 @@ import java.util.List; import java.util.Optional; import java.util.RandomAccess; +import java.util.function.Predicate; +import java.util.stream.IntStream; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; @@ -12,8 +14,12 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathExpressionException; import org.jabref.architecture.AllowedToUseStandardStreams; +import org.jabref.model.strings.StringUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -133,4 +139,24 @@ public int size() { return list.getLength(); } } + + public static List getNodesContentByXPath(Node root, XPathExpression xPath) throws XPathExpressionException { + NodeList nodes = (NodeList) xPath.evaluate(root, XPathConstants.NODESET); + + if (nodes == null) { + return List.of(); + } + + // This needs to be (somewhat ugly) constructed from {@link IntRange}, as {@link NodeList} is an interface, + // and it's very strict (and limited). + return IntStream + .range(0, nodes.getLength()) + .mapToObj(i -> nodes.item(i).getTextContent()) + .filter(Predicate.not(StringUtil::isNullOrEmpty)) // Just in case. + .toList(); + } + + public static Optional getNodeContentByXPath(Node root, XPathExpression xPath) throws XPathExpressionException { + return StringUtil.optionalOfEmpty(xPath.evaluate(root)); + } } diff --git a/jablib/src/main/java/org/jabref/model/entry/BibEntry.java b/jablib/src/main/java/org/jabref/model/entry/BibEntry.java index 6f6fbc3012a..0f0ce4f81f2 100644 --- a/jablib/src/main/java/org/jabref/model/entry/BibEntry.java +++ b/jablib/src/main/java/org/jabref/model/entry/BibEntry.java @@ -845,11 +845,6 @@ public void addKeyword(Keyword keyword, Character delimiter) { this.putKeywords(keywords, delimiter); } - /** - * Add multiple keywords to entry - * - * @param keywords Keywords to add - */ public void addKeywords(Collection keywords, Character delimiter) { Objects.requireNonNull(keywords); keywords.forEach(keyword -> addKeyword(keyword, delimiter)); @@ -963,6 +958,14 @@ public BibEntry withField(Field field, String value) { return this; } + public BibEntry withField(Field field, Optional value) { + value.ifPresent(v -> { + setField(field, v); + this.setChanged(false); + }); + return this; + } + /** * A copy is made of the parameter */ @@ -997,6 +1000,12 @@ public BibEntry withUserComments(String commentsBeforeEntry) { return this; } + public BibEntry withKeywords(Collection keywords, Character delimiter) { + Objects.requireNonNull(keywords); + keywords.forEach(keyword -> addKeyword(keyword, delimiter)); + return this; + } + public List getEntryLinkList(Field field, BibDatabase database) { return getField(field).map(fieldValue -> EntryLinkList.parse(fieldValue, database)) .orElse(List.of()); @@ -1065,6 +1074,10 @@ public Optional setFiles(List files) { return this.setField(StandardField.FILE, newValue); } + public BibEntry withFile(LinkedFile files) { + return withFiles(List.of(files)); + } + public BibEntry withFiles(List files) { setFiles(files); this.setChanged(false); diff --git a/jablib/src/main/java/org/jabref/model/strings/StringUtil.java b/jablib/src/main/java/org/jabref/model/strings/StringUtil.java index 2624ed5b750..7f07d084bd9 100644 --- a/jablib/src/main/java/org/jabref/model/strings/StringUtil.java +++ b/jablib/src/main/java/org/jabref/model/strings/StringUtil.java @@ -743,6 +743,14 @@ public static String quoteStringIfSpaceIsContained(String string) { } } + /** + * @param string input string + * @return {@link Optional#empty()} if the {@param string} is empty, otherwise wrap it in {@link Optional}. + */ + public static Optional optionalOfEmpty(String string) { + return Optional.ofNullable(string).filter(s -> !s.isEmpty()); + } + /** * Checks if the given string contains any whitespace characters. The supported whitespace characters * are the set of characters matched by {@code \s} in regular expressions, which are {@code [ \t\n\x0B\f\r]}. diff --git a/jablib/src/main/resources/l10n/JabRef_en.properties b/jablib/src/main/resources/l10n/JabRef_en.properties index dc4b6bd248f..301905ee3d4 100644 --- a/jablib/src/main/resources/l10n/JabRef_en.properties +++ b/jablib/src/main/resources/l10n/JabRef_en.properties @@ -371,6 +371,10 @@ Scrapes\ the\ first\ page\ of\ a\ PDF\ for\ BibTeX\ information.=Scrapes the fir This\ importer\ enables\ "--importToOpen\ someEntry.bib"=This importer enables "--importToOpen someEntry.bib" This\ importer\ parses\ data\ of\ the\ first\ page\ of\ the\ PDF\ and\ creates\ a\ BibTeX\ entry.\ Currently,\ Springer\ and\ IEEE\ formats\ are\ supported.=This importer parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported. +Could\ not\ find\ metadata\ file.\ Possibly\ corrupted\ ePUB\ file.=Could not find metadata file. Possibly corrupted ePUB file. +Import\ the\ popular\ e-book\ file\ format\ ePUB=Import the popular e-book file format ePUB +ePUB\ entry\ was\ successfully\ imported=ePUB entry was successfully imported + Exporting\ %0=Exporting %0 Unknown\ export\ format\ %0=Unknown export format %0 diff --git a/jablib/src/test/java/org/jabref/logic/importer/fileformat/EpubImporterFilesTest.java b/jablib/src/test/java/org/jabref/logic/importer/fileformat/EpubImporterFilesTest.java new file mode 100644 index 00000000000..1200846af29 --- /dev/null +++ b/jablib/src/test/java/org/jabref/logic/importer/fileformat/EpubImporterFilesTest.java @@ -0,0 +1,110 @@ +package org.jabref.logic.importer.fileformat; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Path; +import java.util.Collections; +import java.util.List; +import java.util.function.Predicate; +import java.util.stream.Stream; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; + +import org.jabref.logic.bibtex.FieldPreferences; +import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences; +import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.importer.util.GrobidPreferences; +import org.jabref.logic.preferences.DOIPreferences; +import org.jabref.logic.util.StandardFileType; +import org.jabref.logic.xmp.XmpPreferences; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.BibEntryPreferences; +import org.jabref.model.entry.LinkedFile; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.types.StandardEntryType; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; + +class EpubImporterFilesTest { + private static final String FILE_ENDING = ".epub"; + + private EpubImporter importer; + + @BeforeEach + void setUp() throws XPathExpressionException, ParserConfigurationException { + BibEntryPreferences bibEntryPreferences = new BibEntryPreferences(','); + + ImportFormatPreferences importFormatPreferences = new ImportFormatPreferences( + bibEntryPreferences, + mock(CitationKeyPatternPreferences.class), + mock(FieldPreferences.class), + mock(XmpPreferences.class), + mock(DOIPreferences.class), + mock(GrobidPreferences.class) + ); + + this.importer = new EpubImporter(importFormatPreferences); + } + + private static Stream fileNames() throws IOException { + Predicate fileName = name -> name.startsWith("EpubImporterTest") && name.endsWith(FILE_ENDING); + return ImporterTestEngine.getTestFiles(fileName).stream(); + } + + private static Stream invalidFileNames() throws IOException { + // `.ctv6bak` is a ZIP-based file format, just as ePUB. We need to exclude them from tests, as + // {@link EpubImporter#isRecognizedFormat} works by checking only the ZIP file header. + Predicate fileName = name -> !name.startsWith("EpubImporterTest") && !name.endsWith(".ctv6bak"); + return ImporterTestEngine.getTestFiles(fileName).stream(); + } + + @ParameterizedTest + @MethodSource("fileNames") + void isRecognizedFormat(String fileName) throws IOException, XPathExpressionException, ParserConfigurationException { + ImporterTestEngine.testIsRecognizedFormat(importer, fileName); + } + + @ParameterizedTest + @MethodSource("invalidFileNames") + void isNotRecognizedFormat(String fileName) throws IOException { + ImporterTestEngine.testIsNotRecognizedFormat(importer, fileName); + } + + // Grimms were taken from Project Gutenberg. They were chosen as the book has 2 authors. + + @Test + void grimmEpubOld() throws URISyntaxException, IOException { + Path file = Path.of(EpubImporterFilesTest.class.getResource("EpubImporterTest1Old.epub").toURI()); + + List result = importer.importDatabase(file).getDatabase().getEntries(); + + assertEquals(List.of(grimms(file)), result); + } + + @Test + void grimmEpub3() throws URISyntaxException, IOException { + Path file = Path.of(EpubImporterFilesTest.class.getResource("EpubImporterTest2Epub3.epub").toURI()); + + List result = importer.importDatabase(file).getDatabase().getEntries(); + + assertEquals(List.of(grimms(file)), result); + } + + // Both ePUB3 and older ePUB version should have the same {@link BibEntry} (expect `file` field). + BibEntry grimms(Path file) { + return new BibEntry(StandardEntryType.Book) + .withField(StandardField.AUTHOR, "Jacob Grimm and Wilhelm Grimm") + .withField(StandardField.TITLE, "Grimms' Fairy Tales") + .withField(StandardField.KEYWORDS, "Fairy tales -- Germany") + .withField(StandardField.LANGUAGE, "en") + .withField(StandardField.URL, "https://www.gutenberg.org/files/2591/2591-h/2591-h.htm") + .withFile(new LinkedFile("", file.toAbsolutePath(), StandardFileType.EPUB.getName())); + } +} diff --git a/jablib/src/test/java/org/jabref/logic/importer/fileformat/EpubImporterTest.java b/jablib/src/test/java/org/jabref/logic/importer/fileformat/EpubImporterTest.java new file mode 100644 index 00000000000..a72ca9cadc5 --- /dev/null +++ b/jablib/src/test/java/org/jabref/logic/importer/fileformat/EpubImporterTest.java @@ -0,0 +1,37 @@ +package org.jabref.logic.importer.fileformat; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; + +import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.util.StandardFileType; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; + +class EpubImporterTest { + private EpubImporter importer; + + @BeforeEach + void setUp() throws XPathExpressionException, ParserConfigurationException { + this.importer = new EpubImporter(mock(ImportFormatPreferences.class)); + } + + @Test + void getFormatName() { + assertEquals("ePUB", importer.getName()); + } + + @Test + void getCLIId() { + assertEquals("epub", importer.getId()); + } + + @Test + void sGetExtensions() { + assertEquals(StandardFileType.EPUB, importer.getFileType()); + } +} diff --git a/jablib/src/test/resources/org/jabref/logic/importer/fileformat/EpubImporterTest1Old.epub b/jablib/src/test/resources/org/jabref/logic/importer/fileformat/EpubImporterTest1Old.epub new file mode 100644 index 00000000000..e6fd08e9ceb Binary files /dev/null and b/jablib/src/test/resources/org/jabref/logic/importer/fileformat/EpubImporterTest1Old.epub differ diff --git a/jablib/src/test/resources/org/jabref/logic/importer/fileformat/EpubImporterTest2Epub3.epub b/jablib/src/test/resources/org/jabref/logic/importer/fileformat/EpubImporterTest2Epub3.epub new file mode 100644 index 00000000000..3d6bbd4d22a Binary files /dev/null and b/jablib/src/test/resources/org/jabref/logic/importer/fileformat/EpubImporterTest2Epub3.epub differ