Skip to content

Commit f80cec8

Browse files
committed
addressed mentor code feedback after 19.06.25
1 parent 5a256ae commit f80cec8

File tree

3 files changed

+120
-82
lines changed

3 files changed

+120
-82
lines changed

jabgui/src/main/java/org/jabref/gui/linkedfile/OcrAction.java

Lines changed: 54 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import org.jabref.logic.util.TaskExecutor;
1010
import org.jabref.logic.l10n.Localization;
1111
import org.jabref.logic.ocr.OcrService;
12+
import org.jabref.logic.ocr.OcrResult;
1213
import org.jabref.logic.ocr.OcrException;
1314
import org.jabref.model.database.BibDatabaseContext;
1415
import org.jabref.model.entry.LinkedFile;
@@ -17,23 +18,14 @@
1718
import java.nio.file.Path;
1819
import java.util.Optional;
1920

21+
import org.slf4j.Logger;
22+
import org.slf4j.LoggerFactory;
23+
2024
/**
2125
* Action for performing OCR (Optical Character Recognition) on linked PDF files.
22-
* <p>
23-
* This action extracts text content from PDF files that are attached to BibTeX entries.
24-
* It runs the OCR process in a background thread to keep the UI responsive and provides
25-
* user feedback through dialogs and notifications.
26-
* <p>
27-
* The action follows JabRef's command pattern and can be triggered from context menus.
28-
* It includes built-in validation to ensure it's only enabled for PDF files that exist on disk.
29-
*
30-
* @see OcrService
31-
* @see org.jabref.gui.actions.SimpleCommand
3226
*/
33-
34-
// Personal Note: Add more doc in between later
35-
3627
public class OcrAction extends SimpleCommand {
28+
private static final Logger LOGGER = LoggerFactory.getLogger(OcrAction.class);
3729

3830
private final LinkedFile linkedFile;
3931
private final BibDatabaseContext databaseContext;
@@ -73,34 +65,55 @@ public void execute() {
7365

7466
dialogService.notify(Localization.lang("Performing OCR..."));
7567

76-
BackgroundTask.wrap(() -> {
77-
OcrService ocrService = new OcrService();
78-
return ocrService.performOcr(filePath.get());
79-
})
80-
.onSuccess(extractedText -> {
81-
if (extractedText.isEmpty()) {
82-
dialogService.showInformationDialogAndWait(
83-
Localization.lang("OCR Complete"),
84-
Localization.lang("No text was found in the PDF.")
85-
);
86-
} else {
87-
// For now, just show preview
88-
String preview = extractedText.length() > 1000
89-
? extractedText.substring(0, 1000) + "..."
90-
: extractedText;
68+
BackgroundTask<OcrResult> task = BackgroundTask.wrap(() -> {
69+
try {
70+
OcrService ocrService = new OcrService();
71+
return ocrService.performOcr(filePath.get());
72+
} catch (OcrException e) {
73+
// This is a bug in JabRef
74+
LOGGER.error("Failed to initialize OCR service", e);
75+
return OcrResult.failure("OCR service initialization failed: " + e.getMessage());
76+
}
77+
})
78+
.showToUser(true) // Show in task list
79+
.withInitialMessage(Localization.lang("Performing OCR on %0", linkedFile.getLink()));
80+
81+
task.onSuccess(result -> {
82+
// Use pattern matching with the sealed class
83+
switch (result) {
84+
case OcrResult.Success success -> {
85+
String extractedText = success.text();
86+
if (extractedText.isEmpty()) {
87+
dialogService.showInformationDialogAndWait(
88+
Localization.lang("OCR Complete"),
89+
Localization.lang("No text was found in the PDF.")
90+
);
91+
} else {
92+
// Show preview
93+
String preview = extractedText.length() > 1000
94+
? extractedText.substring(0, 1000) + "..."
95+
: extractedText;
9196

92-
dialogService.showInformationDialogAndWait(
93-
Localization.lang("OCR Result"),
94-
preview
95-
);
96-
}
97-
})
98-
.onFailure(exception -> {
99-
dialogService.showErrorDialogAndWait(
100-
Localization.lang("OCR failed"),
101-
exception.getMessage()
102-
);
103-
})
104-
.executeWith(taskExecutor);
97+
dialogService.showInformationDialogAndWait(
98+
Localization.lang("OCR Result"),
99+
preview
100+
);
101+
}
102+
}
103+
case OcrResult.Failure failure -> {
104+
dialogService.showErrorDialogAndWait(
105+
Localization.lang("OCR failed"),
106+
failure.errorMessage()
107+
);
108+
}
109+
}
110+
})
111+
.onFailure(exception -> {
112+
dialogService.showErrorDialogAndWait(
113+
Localization.lang("OCR failed"),
114+
exception.getMessage()
115+
);
116+
})
117+
.executeWith(taskExecutor);
105118
}
106119
}
Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,60 @@
11
package org.jabref.logic.ocr;
22

3-
import java.util.Optional;
4-
5-
public class OcrResult {
6-
private final boolean success;
7-
private final String text;
8-
private final String errorMessage;
9-
10-
private OcrResult(boolean success, String text, String errorMessage) {
11-
this.success = success;
12-
this.text = text;
13-
this.errorMessage = errorMessage;
3+
/**
4+
* Represents the result of an OCR operation.
5+
* Uses sealed classes to ensure type safety and avoid null parameters.
6+
*/
7+
public sealed interface OcrResult {
8+
9+
/**
10+
* Represents a successful OCR operation with extracted text.
11+
*/
12+
record Success(String text) implements OcrResult {
13+
public Success {
14+
// Convert null to empty string instead of throwing exception
15+
if (text == null) {
16+
text = "";
17+
}
18+
}
1419
}
1520

16-
public static OcrResult success(String text) {
17-
return new OcrResult(true, text, null);
21+
/**
22+
* Represents a failed OCR operation with an error message.
23+
*/
24+
record Failure(String errorMessage) implements OcrResult {
25+
public Failure {
26+
// Provide default message instead of throwing exception
27+
if (errorMessage == null || errorMessage.isBlank()) {
28+
errorMessage = "Unknown error occurred";
29+
}
30+
}
1831
}
1932

20-
public static OcrResult failure(String errorMessage) {
21-
return new OcrResult(false, null, errorMessage);
33+
/**
34+
* Checks if this result represents a success.
35+
*/
36+
default boolean isSuccess() {
37+
return this instanceof Success;
2238
}
2339

24-
public boolean isSuccess() {
25-
return success;
40+
/**
41+
* Checks if this result represents a failure.
42+
*/
43+
default boolean isFailure() {
44+
return this instanceof Failure;
2645
}
2746

28-
public Optional<String> getText() {
29-
return Optional.ofNullable(text);
47+
/**
48+
* Factory method for creating a success result.
49+
*/
50+
static OcrResult success(String text) {
51+
return new Success(text);
3052
}
3153

32-
public Optional<String> getErrorMessage() {
33-
return Optional.ofNullable(errorMessage);
54+
/**
55+
* Factory method for creating a failure result.
56+
*/
57+
static OcrResult failure(String errorMessage) {
58+
return new Failure(errorMessage);
3459
}
3560
}

jablib/src/main/java/org/jabref/logic/ocr/OcrService.java

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
package org.jabref.logic.ocr;
22

3-
import java.io.File;
3+
import java.nio.file.Files;
44
import java.nio.file.Path;
5+
import java.nio.file.Paths;
56

67
import org.jabref.model.strings.StringUtil;
78

@@ -46,10 +47,10 @@ private void configureLibraryPath() {
4647
String originalPath = System.getProperty(JNA_LIBRARY_PATH, "");
4748
if (Platform.isARM()) {
4849
System.setProperty(JNA_LIBRARY_PATH,
49-
originalPath + File.pathSeparator + "/opt/homebrew/lib/");
50+
originalPath + java.io.File.pathSeparator + "/opt/homebrew/lib/");
5051
} else {
5152
System.setProperty(JNA_LIBRARY_PATH,
52-
originalPath + File.pathSeparator + "/usr/local/cellar/");
53+
originalPath + java.io.File.pathSeparator + "/usr/local/cellar/");
5354
}
5455
}
5556
}
@@ -59,11 +60,11 @@ private void configureTessdata() throws OcrException {
5960
String tessdataPath = System.getenv(TESSDATA_PREFIX);
6061

6162
if (tessdataPath != null && !tessdataPath.isEmpty()) {
62-
File tessdataDir = new File(tessdataPath);
63-
if (tessdataDir.exists() && tessdataDir.isDirectory()) {
63+
Path tessdataDir = Paths.get(tessdataPath);
64+
if (Files.exists(tessdataDir) && Files.isDirectory(tessdataDir)) {
6465
// Tesseract expects the parent directory of tessdata
65-
if (tessdataDir.getName().equals("tessdata")) {
66-
tesseract.setDatapath(tessdataDir.getParent());
66+
if (tessdataDir.getFileName().toString().equals("tessdata")) {
67+
tesseract.setDatapath(tessdataDir.getParent().toString());
6768
} else {
6869
tesseract.setDatapath(tessdataPath);
6970
}
@@ -91,11 +92,13 @@ private String findSystemTessdata() {
9192
"/usr/share" // System
9293
};
9394

94-
for (String path : possiblePaths) {
95-
File tessdata = new File(path, "tessdata");
96-
File engData = new File(tessdata, "eng.traineddata");
97-
if (tessdata.exists() && engData.exists()) {
98-
return path; // Return parent of tessdata
95+
for (String pathStr : possiblePaths) {
96+
Path path = Paths.get(pathStr);
97+
Path tessdata = path.resolve("tessdata");
98+
Path engData = tessdata.resolve("eng.traineddata");
99+
100+
if (Files.exists(tessdata) && Files.exists(engData)) {
101+
return pathStr; // Return parent of tessdata
99102
}
100103
}
101104

@@ -106,8 +109,7 @@ private String findSystemTessdata() {
106109
* Performs OCR on a PDF file and returns the extracted text.
107110
*
108111
* @param pdfPath Path to the PDF file to process
109-
* @return The extracted text, or empty string if no text found
110-
* @throws OcrException if OCR processing fails
112+
* @return The extracted text result
111113
*/
112114
public OcrResult performOcr(Path pdfPath) {
113115
// User error - not an exception
@@ -116,18 +118,16 @@ public OcrResult performOcr(Path pdfPath) {
116118
return OcrResult.failure("No file path provided");
117119
}
118120

119-
File pdfFile = pdfPath.toFile();
120-
121121
// User error - not an exception
122-
if (!pdfFile.exists()) {
122+
if (!Files.exists(pdfPath)) {
123123
LOGGER.warn("PDF file does not exist: {}", pdfPath);
124124
return OcrResult.failure("File does not exist: " + pdfPath.getFileName());
125125
}
126126

127127
try {
128-
LOGGER.info("Starting OCR for file: {}", pdfFile.getName());
128+
LOGGER.info("Starting OCR for file: {}", pdfPath.getFileName());
129129

130-
String result = tesseract.doOCR(pdfFile);
130+
String result = tesseract.doOCR(pdfPath.toFile());
131131
result = StringUtil.isBlank(result) ? "" : result.trim();
132132

133133
LOGGER.info("OCR completed successfully. Extracted {} characters", result.length());
@@ -136,7 +136,7 @@ public OcrResult performOcr(Path pdfPath) {
136136
} catch (TesseractException e) {
137137
// This could be either a user error (corrupt PDF) or our bug
138138
// Log it as error but return as failure, not exception
139-
LOGGER.error("OCR processing failed for file: {}", pdfFile.getName(), e);
139+
LOGGER.error("OCR processing failed for file: {}", pdfPath.getFileName(), e);
140140
return OcrResult.failure("Failed to extract text from PDF: " + e.getMessage());
141141
}
142142
}

0 commit comments

Comments
 (0)