diff --git a/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java b/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java index 58a4f4eabe8..f47b2a963ae 100644 --- a/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java +++ b/src/main/java/org/jabref/gui/externalfiles/ImportHandler.java @@ -176,6 +176,18 @@ public List call() { message = bibtexParserResult.getErrorMessage(); } addResultToList(file, success, message); + } else if (FileUtil.isEpubFile(file)) { + ParserResult result = contentImporter.importEpubContent(file); + List entries = result.getDatabase().getEntries(); + boolean success = !result.hasWarnings(); + String message; + if (success) { + message = Localization.lang("ePUB entry was successfully imported"); + } else { + message = result.getErrorMessage(); + } + entriesToAdd.addAll(entries); + addResultToList(file, true, message); } else { BibEntry emptyEntryWithLink = createEmptyEntryWithLink(file); entriesToAdd.add(emptyEntryWithLink); diff --git a/src/main/java/org/jabref/gui/externalfiletype/StandardExternalFileType.java b/src/main/java/org/jabref/gui/externalfiletype/StandardExternalFileType.java index fc12e9670e8..145c2b266b9 100644 --- a/src/main/java/org/jabref/gui/externalfiletype/StandardExternalFileType.java +++ b/src/main/java/org/jabref/gui/externalfiletype/StandardExternalFileType.java @@ -28,7 +28,7 @@ public enum StandardExternalFileType implements ExternalFileType { TIFF(Localization.lang("%0 image", "TIFF"), "tiff", "image/tiff", "gimp", "picture", IconTheme.JabRefIcons.PICTURE), URL("URL", "html", "text/html", "firefox", "www", IconTheme.JabRefIcons.WWW), MHT("MHT", "mht", "multipart/related", "firefox", "www", IconTheme.JabRefIcons.WWW), - ePUB("ePUB", "epub", "application/epub+zip", "firefox", "www", IconTheme.JabRefIcons.WWW), + ePUB("ePUB", "epub", "application/epub+zip", "firefox", "www", IconTheme.JabRefIcons.BOOK), MARKDOWN("Markdown", "md", "text/markdown", "emacs", "emacs", IconTheme.JabRefIcons.FILE_TEXT); private final String name; private final String extension; diff --git a/src/main/java/org/jabref/logic/externalfiles/ExternalFilesContentImporter.java b/src/main/java/org/jabref/logic/externalfiles/ExternalFilesContentImporter.java index 7a868577c42..60e0ace1df7 100644 --- a/src/main/java/org/jabref/logic/externalfiles/ExternalFilesContentImporter.java +++ b/src/main/java/org/jabref/logic/externalfiles/ExternalFilesContentImporter.java @@ -3,10 +3,14 @@ import java.io.IOException; import java.nio.file.Path; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; + import org.jabref.logic.FilePreferences; import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.OpenDatabase; import org.jabref.logic.importer.ParserResult; +import org.jabref.logic.importer.fileformat.EpubImporter; import org.jabref.logic.importer.fileformat.PdfMergeMetadataImporter; import org.jabref.model.database.BibDatabaseContext; import org.jabref.model.util.FileUpdateMonitor; @@ -27,6 +31,14 @@ public ParserResult importPDFContent(Path file, BibDatabaseContext context, File } } + public ParserResult importEpubContent(Path file) { + try { + return new EpubImporter(importFormatPreferences).importDatabase(file); + } catch (IOException | XPathExpressionException | ParserConfigurationException e) { + return ParserResult.fromError(e); + } + } + public ParserResult importFromBibFile(Path bibFile, FileUpdateMonitor fileUpdateMonitor) throws IOException { return OpenDatabase.loadDatabase(bibFile, importFormatPreferences, fileUpdateMonitor); } diff --git a/src/main/java/org/jabref/logic/importer/ImportFormatReader.java b/src/main/java/org/jabref/logic/importer/ImportFormatReader.java index 2203b1156bf..287ff9d3588 100644 --- a/src/main/java/org/jabref/logic/importer/ImportFormatReader.java +++ b/src/main/java/org/jabref/logic/importer/ImportFormatReader.java @@ -9,6 +9,9 @@ import java.util.SortedSet; import java.util.TreeSet; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; + import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences; import org.jabref.logic.importer.fileformat.BiblioscapeImporter; import org.jabref.logic.importer.fileformat.BibtexImporter; @@ -17,6 +20,7 @@ import org.jabref.logic.importer.fileformat.CopacImporter; import org.jabref.logic.importer.fileformat.EndnoteImporter; import org.jabref.logic.importer.fileformat.EndnoteXmlImporter; +import org.jabref.logic.importer.fileformat.EpubImporter; import org.jabref.logic.importer.fileformat.InspecImporter; import org.jabref.logic.importer.fileformat.IsiImporter; import org.jabref.logic.importer.fileformat.MedlineImporter; @@ -37,10 +41,15 @@ import org.jabref.model.entry.BibEntry; import org.jabref.model.util.FileUpdateMonitor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + public class ImportFormatReader { public static final String BIBTEX_FORMAT = "BibTeX"; + private final static Logger LOGGER = LoggerFactory.getLogger(ImportFormatReader.class); + /** * All import formats. * Sorted accordingly to {@link Importer#compareTo}, which defaults to alphabetically by the name @@ -89,6 +98,15 @@ public void reset() { formats.add(new BibtexImporter(importFormatPreferences, fileUpdateMonitor)); formats.add(new CitaviXmlImporter()); + // {@link EpubImporter} constructs `XPath`s internally, and compilation may throw errors. + // {@link EpubReader} also constructs {@link DocumentBuilder}, whose "constructor" can also throw error. + // Hacky workaround. + try { + formats.add(new EpubImporter(importFormatPreferences)); + } catch (XPathExpressionException | ParserConfigurationException e) { + LOGGER.error("Unable to construct `EpubImporter`. `EpubImporter` will not be added to available importers", e); + } + // Get custom import formats formats.addAll(importerPreferences.getCustomImporters()); } diff --git a/src/main/java/org/jabref/logic/importer/ParserResult.java b/src/main/java/org/jabref/logic/importer/ParserResult.java index 60dd5af278a..0ec0195e7ef 100644 --- a/src/main/java/org/jabref/logic/importer/ParserResult.java +++ b/src/main/java/org/jabref/logic/importer/ParserResult.java @@ -45,6 +45,10 @@ public ParserResult(BibDatabase database, MetaData metaData, Set e this.entryTypes = Objects.requireNonNull(entryTypes); } + public static ParserResult fromEntry(BibEntry entry) { + return new ParserResult(Collections.singleton(entry)); + } + public static ParserResult fromErrorMessage(String message) { ParserResult parserResult = new ParserResult(); parserResult.addWarning(message); diff --git a/src/main/java/org/jabref/logic/importer/fileformat/EpubImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/EpubImporter.java new file mode 100644 index 00000000000..f8747d9d883 --- /dev/null +++ b/src/main/java/org/jabref/logic/importer/fileformat/EpubImporter.java @@ -0,0 +1,177 @@ +package org.jabref.logic.importer.fileformat; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.nio.file.FileSystem; +import java.nio.file.FileSystems; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; + +import org.jabref.gui.util.OptionalObjectProperty; +import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.importer.Importer; +import org.jabref.logic.importer.ParserResult; +import org.jabref.logic.l10n.Localization; +import org.jabref.logic.util.FileType; +import org.jabref.logic.util.StandardFileType; +import org.jabref.logic.util.io.FileUtil; +import org.jabref.logic.util.io.XMLUtil; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.LinkedFile; +import org.jabref.model.entry.field.Field; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.types.StandardEntryType; + +import org.w3c.dom.Document; +import org.xml.sax.SAXException; + +public class EpubImporter extends Importer { + private static final char[] EPUB_HEADER_MAGIC_NUMBER = {0x50, 0x4b, 0x03, 0x04}; + + private final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + private final DocumentBuilder builder = factory.newDocumentBuilder(); + + private final XPathFactory xPathFactory = XPathFactory.newInstance(); + private final XPath xpath = xPathFactory.newXPath(); + + private final XPathExpression titlePath = xpath.compile("/package/metadata/title"); + private final XPathExpression creatorPath = xpath.compile("/package/metadata/creator"); + private final XPathExpression identifierPath = xpath.compile("/package/metadata/identifier"); + private final XPathExpression languagePath = xpath.compile("/package/metadata/language"); + private final XPathExpression sourcePath = xpath.compile("/package/metadata/source"); + private final XPathExpression descriptionPath = xpath.compile("/package/metadata/description"); + private final XPathExpression subjectPath = xpath.compile("/package/metadata/subject"); + + private BibEntry entry = new BibEntry(StandardEntryType.Book); + + private final ImportFormatPreferences importFormatPreferences; + + public EpubImporter(ImportFormatPreferences importFormatPreferences) throws XPathExpressionException, ParserConfigurationException { + this.importFormatPreferences = importFormatPreferences; + } + + // ePUB is a ZIP-based format, so this method will clash with other ZIP-based formats. + // Currently, only `.ctv6bak` is found. + @Override + public boolean isRecognizedFormat(BufferedReader input) throws IOException { + char[] header = new char[EPUB_HEADER_MAGIC_NUMBER.length]; + int nRead = input.read(header); + return nRead == EPUB_HEADER_MAGIC_NUMBER.length && Arrays.equals(header, EPUB_HEADER_MAGIC_NUMBER); + } + + @Override + public ParserResult importDatabase(Path filePath) throws IOException { + // Not in functional programming style, but making {@link entry} a local mutable variable makes it easier + // to write {@link addField}. + // Potentially, this class won't work properly in concurrent situations. + + // TODO: JabRef has {@link DublinCoreExtractor}, which is exactly the schema used in OPF. However, that class + // is tied to {@link DublinCoreSchema}, which is tied to {@link XMPSchema}. It seems there are no way to pass + // ordinary XML nodes to {@link DublinCoreSchema}. + // + // Current implementation uses some hand-crafted {@link XPath}s, which work okayish, but not as good as a + // full-featured {@link DublinCoreExtractor}. + + entry = new BibEntry(StandardEntryType.Book); + + try (FileSystem fileSystem = FileSystems.newFileSystem(filePath)) { + OptionalObjectProperty metadataFilePath = OptionalObjectProperty.empty(); + + Files.walkFileTree(fileSystem.getPath("/"), new SimpleFileVisitor<>() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) { + if (file.toString().endsWith(".opf")) { + metadataFilePath.set(Optional.of(file)); + return FileVisitResult.TERMINATE; + } + return FileVisitResult.CONTINUE; + } + }); + + if (metadataFilePath.get().isEmpty()) { + return ParserResult.fromErrorMessage(Localization.lang("Could not find metadata file. Possibly corrupted ePUB file.")); + } + + File metadataFile = FileUtil.remapZipPath(metadataFilePath.get().get()).toFile(); + Document document = builder.parse(metadataFile); + + Optional title = XMLUtil.getNodeContentByXPath(document, titlePath); + Optional identifier = XMLUtil.getNodeContentByXPath(document, identifierPath); + Optional source = XMLUtil.getNodeContentByXPath(document, sourcePath); + Optional description = XMLUtil.getNodeContentByXPath(document, descriptionPath); + + List authors = XMLUtil.getNodesContentByXPath(document, creatorPath); + List subjects = XMLUtil.getNodesContentByXPath(document, subjectPath); + List languages = XMLUtil.getNodesContentByXPath(document, languagePath); + + addField(StandardField.TITLE, title); + addField(StandardField.ABSTRACT, description); + + if (source.isPresent()) { + addField(StandardField.URL, source); + } else { + addField(StandardField.URL, identifier); + } + + addField(StandardField.AUTHOR, Optional.of(String.join(" and ", authors))); + + // Might not be the right way. Leaving, as it still contains information. + addField(StandardField.LANGUAGE, Optional.of(String.join(" and ", languages))); + + entry.addKeywords(subjects, importFormatPreferences.bibEntryPreferences().getKeywordSeparator()); + + entry.addFile(new LinkedFile("", filePath.toAbsolutePath(), StandardFileType.EPUB.getName())); + + return ParserResult.fromEntry(entry); + } catch (SAXException | XPathExpressionException e) { + return ParserResult.fromError(e); + } + } + + // Tradeoff between conforming to controversial code standard and code simplicity. + // This refs: https://peps.python.org/pep-0008/#a-foolish-consistency-is-the-hobgoblin-of-little-minds. + private void addField(Field field, Optional value) { + value.ifPresent(it -> entry.setField(field, it)); + } + + @Override + public ParserResult importDatabase(BufferedReader input) throws IOException { + throw new UnsupportedOperationException("EpubImporter does not support importDatabase(BufferedReader reader). " + + "Instead use importDatabase(Path filePath)."); + } + + @Override + public String getId() { + return "epub"; + } + + @Override + public String getName() { + return "ePUB"; + } + + @Override + public String getDescription() { + return Localization.lang("Import the popular e-book file format ePUB"); + } + + @Override + public FileType getFileType() { + return StandardFileType.EPUB; + } +} diff --git a/src/main/java/org/jabref/logic/util/StandardFileType.java b/src/main/java/org/jabref/logic/util/StandardFileType.java index e6ccc825486..251319335ec 100644 --- a/src/main/java/org/jabref/logic/util/StandardFileType.java +++ b/src/main/java/org/jabref/logic/util/StandardFileType.java @@ -46,7 +46,8 @@ public enum StandardFileType implements FileType { XML("XML", "xml"), XMP("XMP", "xmp"), YAML("YAML Markup", "yaml"), - ZIP("Zip Archive", "zip"); + ZIP("Zip Archive", "zip"), + EPUB("ePUB", "epub"); private final List extensions; private final String name; diff --git a/src/main/java/org/jabref/logic/util/io/FileUtil.java b/src/main/java/org/jabref/logic/util/io/FileUtil.java index 5ddb734d8ce..653fc4e88ad 100644 --- a/src/main/java/org/jabref/logic/util/io/FileUtil.java +++ b/src/main/java/org/jabref/logic/util/io/FileUtil.java @@ -1,6 +1,7 @@ package org.jabref.logic.util.io; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.UncheckedIOException; import java.nio.file.FileSystems; @@ -18,6 +19,7 @@ import java.util.Locale; import java.util.Objects; import java.util.Optional; +import java.util.UUID; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -490,6 +492,17 @@ public static boolean isPDFFile(Path file) { return extension.isPresent() && StandardFileType.PDF.getExtensions().contains(extension.get()); } + /** + * Test if the file is a ePUB file by simply checking the extension to be ".epub" + * + * @param file The file to check + * @return True if file extension is ".pdf", false otherwise + */ + public static boolean isEpubFile(Path file) { + Optional extension = FileUtil.getFileExtension(file); + return extension.isPresent() && StandardFileType.EPUB.getExtensions().contains(extension.get()); + } + /** * @return Path of current panel database directory or the standard working directory in case the database was not saved yet */ @@ -585,4 +598,21 @@ public static String shortenFileName(String fileName, Integer maxLength) { public static boolean isCharLegal(char c) { return Arrays.binarySearch(ILLEGAL_CHARS, c) < 0; } + + /** + * Create a temporary file and fill it with contents of a file in ZIP archive. + *

+ * When you need to read (extract) a file from a ZIP archive, you cannot convert `ZipPath` (`ZipPath` is a private class) to {@link File}. + * One way of solving this problem is to make a temporary file, and copy ZIP file contents to the temporary file. + *

+ * Adapted from . + */ + public static Path remapZipPath(Path zipPath) throws IOException { + File tempFile = Files.createTempFile(UUID.randomUUID().toString(), ".tmp").toFile(); + tempFile.deleteOnExit(); + try (FileOutputStream out = new FileOutputStream(tempFile)) { + Files.copy(zipPath, out); + } + return tempFile.toPath(); + } } diff --git a/src/main/java/org/jabref/logic/util/io/XMLUtil.java b/src/main/java/org/jabref/logic/util/io/XMLUtil.java index b6126914de3..7e791a8dda3 100644 --- a/src/main/java/org/jabref/logic/util/io/XMLUtil.java +++ b/src/main/java/org/jabref/logic/util/io/XMLUtil.java @@ -6,6 +6,8 @@ import java.util.List; import java.util.Optional; import java.util.RandomAccess; +import java.util.function.Predicate; +import java.util.stream.IntStream; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; @@ -13,8 +15,12 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathExpressionException; import org.jabref.architecture.AllowedToUseStandardStreams; +import org.jabref.model.strings.StringUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -134,4 +140,24 @@ public int size() { return list.getLength(); } } + + public static List getNodesContentByXPath(Node root, XPathExpression xPath) throws XPathExpressionException { + NodeList nodes = (NodeList) xPath.evaluate(root, XPathConstants.NODESET); + + if (nodes == null) { + return List.of(); + } + + // This needs to be (somewhat ugly) constructed from {@link IntRange}, as {@link NodeList} is an interface, + // and it's very strict (and limited). + return IntStream + .range(0, nodes.getLength()) + .mapToObj(i -> nodes.item(i).getTextContent()) + .filter(Predicate.not(StringUtil::isNullOrEmpty)) // Just in case. + .toList(); + } + + public static Optional getNodeContentByXPath(Node root, XPathExpression xPath) throws XPathExpressionException { + return StringUtil.optionalOfEmpty(xPath.evaluate(root)); + } } diff --git a/src/main/java/org/jabref/model/strings/StringUtil.java b/src/main/java/org/jabref/model/strings/StringUtil.java index 2237526cd3e..749d940c5f8 100644 --- a/src/main/java/org/jabref/model/strings/StringUtil.java +++ b/src/main/java/org/jabref/model/strings/StringUtil.java @@ -755,4 +755,12 @@ public static String removeStringAtTheEnd(String string, String stringToBeRemove public static boolean endsWithIgnoreCase(String string, String suffix) { return StringUtils.endsWithIgnoreCase(string, suffix); } + + /** + * @param string input string + * @return {@link Optional#empty()} if the {@param string} is empty, otherwise wrap it in {@link Optional}. + */ + public static Optional optionalOfEmpty(String string) { + return Optional.ofNullable(string).filter(s -> !s.isEmpty()); + } } diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties index 1e90419d3d8..697c2d484ed 100644 --- a/src/main/resources/l10n/JabRef_en.properties +++ b/src/main/resources/l10n/JabRef_en.properties @@ -372,6 +372,10 @@ Scrapes\ the\ first\ page\ of\ a\ PDF\ for\ BibTeX\ information.=Scrapes the fir This\ importer\ enables\ "--importToOpen\ someEntry.bib"=This importer enables "--importToOpen someEntry.bib" This\ importer\ parses\ data\ of\ the\ first\ page\ of\ the\ PDF\ and\ creates\ a\ BibTeX\ entry.\ Currently,\ Springer\ and\ IEEE\ formats\ are\ supported.=This importer parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported. +Could\ not\ find\ metadata\ file.\ Possibly\ corrupted\ ePUB\ file.=Could not find metadata file. Possibly corrupted ePUB file. +Import\ the\ popular\ e-book\ file\ format\ ePUB=Import the popular e-book file format ePUB +ePUB\ entry\ was\ successfully\ imported=ePUB entry was successfully imported + Exporting\ %0=Exporting %0 Could\ not\ export\ file\ '%0'\ (reason\:\ %1)=Could not export file '%0' (reason: %1) Unknown\ export\ format\ %0=Unknown export format %0 diff --git a/src/test/java/org/jabref/logic/importer/fileformat/EpubImporterFilesTest.java b/src/test/java/org/jabref/logic/importer/fileformat/EpubImporterFilesTest.java new file mode 100644 index 00000000000..72ab2c1b956 --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fileformat/EpubImporterFilesTest.java @@ -0,0 +1,113 @@ +package org.jabref.logic.importer.fileformat; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Path; +import java.util.Collections; +import java.util.List; +import java.util.function.Predicate; +import java.util.stream.Stream; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; + +import org.jabref.logic.bibtex.FieldPreferences; +import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences; +import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.importer.util.GrobidPreferences; +import org.jabref.logic.preferences.DOIPreferences; +import org.jabref.logic.util.StandardFileType; +import org.jabref.logic.xmp.XmpPreferences; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.BibEntryPreferences; +import org.jabref.model.entry.LinkedFile; +import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.types.StandardEntryType; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; + +class EpubImporterFilesTest { + private static final String FILE_ENDING = ".epub"; + + private EpubImporter importer; + + @BeforeEach + void setUp() throws XPathExpressionException, ParserConfigurationException { + BibEntryPreferences bibEntryPreferences = new BibEntryPreferences(','); + + ImportFormatPreferences importFormatPreferences = new ImportFormatPreferences( + bibEntryPreferences, + mock(CitationKeyPatternPreferences.class), + mock(FieldPreferences.class), + mock(XmpPreferences.class), + mock(DOIPreferences.class), + mock(GrobidPreferences.class) + ); + + this.importer = new EpubImporter(importFormatPreferences); + } + + private static Stream fileNames() throws IOException { + Predicate fileName = name -> name.startsWith("EpubImporterTest") && name.endsWith(FILE_ENDING); + return ImporterTestEngine.getTestFiles(fileName).stream(); + } + + private static Stream invalidFileNames() throws IOException { + // `.ctv6bak` is a ZIP-based file format, just as ePUB. We need to exclude them from tests, as + // {@link EpubImporter#isRecognizedFormat} works by checking only the ZIP file header. + Predicate fileName = name -> !name.startsWith("EpubImporterTest") && !name.endsWith(".ctv6bak"); + return ImporterTestEngine.getTestFiles(fileName).stream(); + } + + @ParameterizedTest + @MethodSource("fileNames") + void isRecognizedFormat(String fileName) throws IOException, XPathExpressionException, ParserConfigurationException { + ImporterTestEngine.testIsRecognizedFormat(importer, fileName); + } + + @ParameterizedTest + @MethodSource("invalidFileNames") + void isNotRecognizedFormat(String fileName) throws IOException { + ImporterTestEngine.testIsNotRecognizedFormat(importer, fileName); + } + + // Grimms were taken from Project Gutenberg. They were chosen as the book has 2 authors. + + @Test + void grimmEpubOld() throws URISyntaxException, IOException { + Path file = Path.of(EpubImporterFilesTest.class.getResource("EpubImporterTest1Old.epub").toURI()); + + List result = importer.importDatabase(file).getDatabase().getEntries(); + + assertEquals(Collections.singletonList(grimms(file)), result); + } + + @Test + void grimmEpub3() throws URISyntaxException, IOException { + Path file = Path.of(EpubImporterFilesTest.class.getResource("EpubImporterTest2Epub3.epub").toURI()); + + List result = importer.importDatabase(file).getDatabase().getEntries(); + + assertEquals(Collections.singletonList(grimms(file)), result); + } + + // Both ePUB3 and older ePUB version should have the same {@link BibEntry} (expect `file` field). + BibEntry grimms(Path file) { + BibEntry expected = new BibEntry(StandardEntryType.Book); + expected.setField(StandardField.AUTHOR, "Jacob Grimm and Wilhelm Grimm"); + expected.setField(StandardField.TITLE, "Grimms' Fairy Tales"); + expected.setField(StandardField.KEYWORDS, "Fairy tales -- Germany"); + expected.setField(StandardField.LANGUAGE, "en"); + expected.setField(StandardField.URL, "https://www.gutenberg.org/files/2591/2591-h/2591-h.htm"); + + expected.setFiles(List.of(new LinkedFile("", file.toAbsolutePath(), StandardFileType.EPUB.getName()))); + + return expected; + } +} diff --git a/src/test/java/org/jabref/logic/importer/fileformat/EpubImporterTest.java b/src/test/java/org/jabref/logic/importer/fileformat/EpubImporterTest.java new file mode 100644 index 00000000000..a72ca9cadc5 --- /dev/null +++ b/src/test/java/org/jabref/logic/importer/fileformat/EpubImporterTest.java @@ -0,0 +1,37 @@ +package org.jabref.logic.importer.fileformat; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; + +import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.util.StandardFileType; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; + +class EpubImporterTest { + private EpubImporter importer; + + @BeforeEach + void setUp() throws XPathExpressionException, ParserConfigurationException { + this.importer = new EpubImporter(mock(ImportFormatPreferences.class)); + } + + @Test + void getFormatName() { + assertEquals("ePUB", importer.getName()); + } + + @Test + void getCLIId() { + assertEquals("epub", importer.getId()); + } + + @Test + void sGetExtensions() { + assertEquals(StandardFileType.EPUB, importer.getFileType()); + } +} diff --git a/src/test/resources/org/jabref/logic/importer/fileformat/EpubImporterTest1Old.epub b/src/test/resources/org/jabref/logic/importer/fileformat/EpubImporterTest1Old.epub new file mode 100644 index 00000000000..e6fd08e9ceb Binary files /dev/null and b/src/test/resources/org/jabref/logic/importer/fileformat/EpubImporterTest1Old.epub differ diff --git a/src/test/resources/org/jabref/logic/importer/fileformat/EpubImporterTest2Epub3.epub b/src/test/resources/org/jabref/logic/importer/fileformat/EpubImporterTest2Epub3.epub new file mode 100644 index 00000000000..3d6bbd4d22a Binary files /dev/null and b/src/test/resources/org/jabref/logic/importer/fileformat/EpubImporterTest2Epub3.epub differ