Skip to content

Commit a51e3b0

Browse files
committed
Initial implementation using tess4j
1 parent 251b84a commit a51e3b0

File tree

10 files changed

+277
-3
lines changed

10 files changed

+277
-3
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,3 +581,5 @@ CHANGELOG.html
581581

582582
# some strange gradle/IntelliJ extension
583583
extension 'reporting' property 'baseDirectory'
584+
585+
tessdata/

jabgui/src/main/java/org/jabref/gui/fieldeditors/LinkedFilesEditor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,8 @@ private void handleItemMouseClick(LinkedFileViewModel linkedFile, MouseEvent eve
325325
bibEntry,
326326
viewModel,
327327
contextCommandFactory,
328-
multiContextCommandFactory
328+
multiContextCommandFactory,
329+
taskExecutor
329330
);
330331

331332
ContextMenu contextMenu = contextMenuFactory.createForSelection(listView.getSelectionModel().getSelectedItems());

jabgui/src/main/java/org/jabref/gui/fieldeditors/contextmenu/ContextMenuFactory.java

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import javafx.collections.ObservableList;
44
import javafx.scene.control.ContextMenu;
5+
import javafx.scene.control.MenuItem;
56
import javafx.scene.control.SeparatorMenuItem;
67

78
import org.jabref.gui.DialogService;
@@ -10,7 +11,10 @@
1011
import org.jabref.gui.copyfiles.CopySingleFileAction;
1112
import org.jabref.gui.fieldeditors.LinkedFileViewModel;
1213
import org.jabref.gui.fieldeditors.LinkedFilesEditorViewModel;
14+
import org.jabref.gui.linkedfile.OcrAction;
1315
import org.jabref.gui.preferences.GuiPreferences;
16+
import org.jabref.logic.l10n.Localization;
17+
import org.jabref.logic.util.TaskExecutor;
1418
import org.jabref.model.database.BibDatabaseContext;
1519
import org.jabref.model.entry.BibEntry;
1620

@@ -25,21 +29,24 @@ public class ContextMenuFactory {
2529
private final LinkedFilesEditorViewModel viewModel;
2630
private final SingleContextCommandFactory singleCommandFactory;
2731
private final MultiContextCommandFactory multiCommandFactory;
32+
private final TaskExecutor taskExecutor;
2833

2934
public ContextMenuFactory(DialogService dialogService,
3035
GuiPreferences preferences,
3136
BibDatabaseContext databaseContext,
3237
ObservableOptionalValue<BibEntry> bibEntry,
3338
LinkedFilesEditorViewModel viewModel,
3439
SingleContextCommandFactory singleCommandFactory,
35-
MultiContextCommandFactory multiCommandFactory) {
40+
MultiContextCommandFactory multiCommandFactory,
41+
TaskExecutor taskExecutor) {
3642
this.dialogService = dialogService;
3743
this.preferences = preferences;
3844
this.databaseContext = databaseContext;
3945
this.bibEntry = bibEntry;
4046
this.viewModel = viewModel;
4147
this.singleCommandFactory = singleCommandFactory;
4248
this.multiCommandFactory = multiCommandFactory;
49+
this.taskExecutor = taskExecutor;
4350
}
4451

4552
public ContextMenu createForSelection(ObservableList<LinkedFileViewModel> selectedFiles) {
@@ -86,9 +93,45 @@ private ContextMenu createContextMenuForFile(LinkedFileViewModel linkedFile) {
8693
factory.createMenuItem(StandardActions.DELETE_FILE, singleCommandFactory.build(StandardActions.DELETE_FILE, linkedFile))
8794
);
8895

96+
// Add OCR menu item for PDF files
97+
if (linkedFile.getFile().getFileType().equalsIgnoreCase("pdf")) {
98+
menu.getItems().add(new SeparatorMenuItem());
99+
100+
MenuItem ocrItem = createOcrMenuItem(linkedFile);
101+
menu.getItems().add(ocrItem);
102+
}
103+
89104
return menu;
90105
}
91106

107+
/**
108+
* Creates the OCR menu item for a PDF file.
109+
* The menu item is only enabled if the PDF file exists on disk.
110+
*
111+
* @param linkedFile The linked PDF file
112+
* @return MenuItem configured for OCR action
113+
*/
114+
private MenuItem createOcrMenuItem(LinkedFileViewModel linkedFile) {
115+
MenuItem ocrItem = new MenuItem(Localization.lang("Extract text (OCR)"));
116+
117+
// Create the OCR action
118+
OcrAction ocrAction = new OcrAction(
119+
linkedFile.getFile(),
120+
databaseContext,
121+
dialogService,
122+
preferences.getFilePreferences(),
123+
taskExecutor
124+
);
125+
126+
// Set the action to execute when clicked
127+
ocrItem.setOnAction(event -> ocrAction.execute());
128+
129+
// Disable if the action is not executable (file doesn't exist)
130+
ocrItem.disableProperty().bind(ocrAction.executableProperty().not());
131+
132+
return ocrItem;
133+
}
134+
92135
@FunctionalInterface
93136
public interface SingleContextCommandFactory {
94137
ContextAction build(StandardActions action, LinkedFileViewModel file);
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package org.jabref.gui.linkedfile;
2+
3+
import org.jabref.gui.DialogService;
4+
import org.jabref.gui.StateManager;
5+
import org.jabref.gui.actions.Action;
6+
import org.jabref.gui.actions.ActionHelper;
7+
import org.jabref.gui.actions.SimpleCommand;
8+
import org.jabref.logic.util.BackgroundTask;
9+
import org.jabref.logic.util.TaskExecutor;
10+
import org.jabref.logic.l10n.Localization;
11+
import org.jabref.logic.ocr.OcrService;
12+
import org.jabref.logic.ocr.OcrException;
13+
import org.jabref.model.database.BibDatabaseContext;
14+
import org.jabref.model.entry.LinkedFile;
15+
import org.jabref.logic.FilePreferences;
16+
17+
import java.nio.file.Path;
18+
import java.util.Optional;
19+
20+
/**
21+
* Action for performing OCR (Optical Character Recognition) on linked PDF files.
22+
* <p>
23+
* This action extracts text content from PDF files that are attached to BibTeX entries.
24+
* It runs the OCR process in a background thread to keep the UI responsive and provides
25+
* user feedback through dialogs and notifications.
26+
* <p>
27+
* The action follows JabRef's command pattern and can be triggered from context menus.
28+
* It includes built-in validation to ensure it's only enabled for PDF files that exist on disk.
29+
*
30+
* @see OcrService
31+
* @see org.jabref.gui.actions.SimpleCommand
32+
*/
33+
34+
// Personal Note: Add more doc in between later
35+
36+
public class OcrAction extends SimpleCommand {
37+
38+
private final LinkedFile linkedFile;
39+
private final BibDatabaseContext databaseContext;
40+
private final DialogService dialogService;
41+
private final FilePreferences filePreferences;
42+
private final TaskExecutor taskExecutor;
43+
44+
public OcrAction(LinkedFile linkedFile,
45+
BibDatabaseContext databaseContext,
46+
DialogService dialogService,
47+
FilePreferences filePreferences,
48+
TaskExecutor taskExecutor) {
49+
this.linkedFile = linkedFile;
50+
this.databaseContext = databaseContext;
51+
this.dialogService = dialogService;
52+
this.filePreferences = filePreferences;
53+
this.taskExecutor = taskExecutor;
54+
55+
// Only executable for existing PDF files
56+
this.executable.set(
57+
linkedFile.getFileType().equalsIgnoreCase("pdf") &&
58+
linkedFile.findIn(databaseContext, filePreferences).isPresent()
59+
);
60+
}
61+
62+
@Override
63+
public void execute() {
64+
Optional<Path> filePath = linkedFile.findIn(databaseContext, filePreferences);
65+
66+
if (filePath.isEmpty()) {
67+
dialogService.showErrorDialogAndWait(
68+
Localization.lang("File not found"),
69+
Localization.lang("Could not locate the PDF file on disk.")
70+
);
71+
return;
72+
}
73+
74+
dialogService.notify(Localization.lang("Performing OCR..."));
75+
76+
BackgroundTask.wrap(() -> {
77+
OcrService ocrService = new OcrService();
78+
return ocrService.performOcr(filePath.get());
79+
})
80+
.onSuccess(extractedText -> {
81+
if (extractedText.isEmpty()) {
82+
dialogService.showInformationDialogAndWait(
83+
Localization.lang("OCR Complete"),
84+
Localization.lang("No text was found in the PDF.")
85+
);
86+
} else {
87+
// For now, just show preview
88+
String preview = extractedText.length() > 1000
89+
? extractedText.substring(0, 1000) + "..."
90+
: extractedText;
91+
92+
dialogService.showInformationDialogAndWait(
93+
Localization.lang("OCR Result"),
94+
preview
95+
);
96+
}
97+
})
98+
.onFailure(exception -> {
99+
dialogService.showErrorDialogAndWait(
100+
Localization.lang("OCR failed"),
101+
exception.getMessage()
102+
);
103+
})
104+
.executeWith(taskExecutor);
105+
}
106+
}

jabgui/src/test/java/org/jabref/gui/fieldeditors/contextmenu/ContextMenuFactoryTest.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.jabref.gui.fieldeditors.LinkedFileViewModel;
1212
import org.jabref.gui.fieldeditors.LinkedFilesEditorViewModel;
1313
import org.jabref.gui.preferences.GuiPreferences;
14+
import org.jabref.logic.util.TaskExecutor;
1415
import org.jabref.model.database.BibDatabaseContext;
1516
import org.jabref.model.entry.BibEntry;
1617
import org.jabref.model.entry.LinkedFile;
@@ -41,6 +42,7 @@ public class ContextMenuFactoryTest {
4142
private ContextMenuFactory factory;
4243
private ContextMenuFactory.SingleContextCommandFactory singleCommandFactory;
4344
private ContextMenuFactory.MultiContextCommandFactory multiCommandFactory;
45+
private TaskExecutor taskExecutor;
4446

4547
@BeforeAll
4648
public static void initToolkit() {
@@ -78,7 +80,8 @@ public void setUp() {
7880
bibEntry,
7981
viewModel,
8082
singleCommandFactory,
81-
multiCommandFactory
83+
multiCommandFactory,
84+
taskExecutor
8285
);
8386
}
8487

jablib/build.gradle.kts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,9 @@ dependencies {
243243
// Required for LocalizationConsistencyTest
244244
testImplementation("org.testfx:testfx-core:4.0.16-alpha")
245245
testImplementation("org.testfx:testfx-junit5:4.0.16-alpha")
246+
247+
// OCR support
248+
implementation("net.sourceforge.tess4j:tess4j:5.15.0")
246249
}
247250
/*
248251
jacoco {

jablib/src/main/java/module-info.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@
104104
exports org.jabref.logic.crawler;
105105
exports org.jabref.logic.git;
106106
exports org.jabref.logic.pseudonymization;
107+
exports org.jabref.logic.ocr;
107108

108109
requires java.base;
109110

@@ -250,5 +251,6 @@
250251
requires mslinks;
251252
requires org.antlr.antlr4.runtime;
252253
requires org.libreoffice.uno;
254+
requires tess4j;
253255
// endregion
254256
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package org.jabref.logic.ocr;
2+
3+
/**
4+
* Exception thrown when OCR operations fail.
5+
* This exception wraps lower-level OCR engine exceptions to provide
6+
* a consistent interface for error handling throughout JabRef.
7+
*/
8+
public class OcrException extends Exception {
9+
10+
/**
11+
* Constructs an OcrException with a message and underlying cause.
12+
*
13+
* @param message Descriptive error message
14+
* @param cause The underlying exception that caused this error
15+
*/
16+
public OcrException(String message, Throwable cause) {
17+
super(message, cause);
18+
}
19+
20+
/**
21+
* Constructs an OcrException with only a message.
22+
*
23+
* @param message Descriptive error message
24+
*/
25+
public OcrException(String message) {
26+
super(message);
27+
}
28+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package org.jabref.logic.ocr;
2+
3+
import net.sourceforge.tess4j.Tesseract;
4+
import net.sourceforge.tess4j.TesseractException;
5+
import org.jabref.model.strings.StringUtil; // JabRef utility class
6+
import org.slf4j.Logger;
7+
import org.slf4j.LoggerFactory;
8+
9+
import java.io.File;
10+
import java.nio.file.Path;
11+
12+
/**
13+
* Service for performing Optical Character Recognition (OCR) on PDF files.
14+
* This class provides a high-level interface to OCR functionality,
15+
* abstracting away the specific OCR engine implementation details.
16+
*/
17+
public class OcrService {
18+
private static final Logger LOGGER = LoggerFactory.getLogger(OcrService.class);
19+
20+
// The OCR engine instance
21+
private final Tesseract tesseract;
22+
23+
/**
24+
* Constructs a new OcrService with default settings.
25+
* Currently uses Tesseract with English language support.
26+
*/
27+
public OcrService() {
28+
this.tesseract = new Tesseract();
29+
30+
// Configure Tesseract
31+
tesseract.setLanguage("eng");
32+
33+
// TODO: This path needs to be configurable and bundled properly
34+
// For now, we'll use a relative path that works during development
35+
tesseract.setDatapath("tessdata");
36+
37+
LOGGER.debug("Initialized OcrService with Tesseract");
38+
}
39+
40+
/**
41+
* Performs OCR on a PDF file and returns the extracted text.
42+
*
43+
* @param pdfPath Path to the PDF file to process
44+
* @return The extracted text, or empty string if no text found
45+
* @throws OcrException if OCR processing fails
46+
*/
47+
public String performOcr(Path pdfPath) throws OcrException {
48+
// Validate input
49+
if (pdfPath == null) {
50+
throw new OcrException("PDF path cannot be null");
51+
}
52+
53+
File pdfFile = pdfPath.toFile();
54+
if (!pdfFile.exists()) {
55+
throw new OcrException("PDF file does not exist: " + pdfPath);
56+
}
57+
58+
try {
59+
LOGGER.info("Starting OCR for file: {}", pdfFile.getName());
60+
61+
// Perform OCR
62+
String result = tesseract.doOCR(pdfFile);
63+
64+
// Clean up the result (remove extra whitespace, etc.)
65+
result = StringUtil.isBlank(result) ? "" : result.trim();
66+
67+
LOGGER.info("OCR completed successfully. Extracted {} characters", result.length());
68+
return result;
69+
70+
} catch (TesseractException e) {
71+
LOGGER.error("OCR failed for file: {}", pdfFile.getName(), e);
72+
throw new OcrException(
73+
"Failed to perform OCR on file: " + pdfFile.getName() +
74+
". Error: " + e.getMessage(), e
75+
);
76+
}
77+
}
78+
}

jablib/src/main/resources/l10n/JabRef_en.properties

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,14 @@ Extract\ references\ from\ file\ (online)=Extract references from file (online)
316316
Extract\ References\ (offline)=Extract References (offline)
317317
Extract\ References\ (online)=Extract References (online)
318318

319+
Extract\ text\ (OCR)=Extract text (OCR)
320+
Performing\ OCR...=Performing OCR...
321+
OCR\ Complete=OCR Complete
322+
OCR\ Result=OCR Result
323+
OCR\ failed=OCR failed
324+
No\ text\ was\ found\ in\ the\ PDF.=No text was found in the PDF.
325+
Could\ not\ locate\ the\ PDF\ file\ on\ disk.=Could not locate the PDF file on disk.
326+
319327
Processing...=Processing...
320328
Processing\ "%0"...=Processing "%0"...
321329
Processing\ PDF(s)=Processing PDF(s)

0 commit comments

Comments
 (0)