1
1
package org .jabref .logic .ocr ;
2
2
3
+ import java .io .File ;
4
+ import java .nio .file .Path ;
5
+
6
+ import org .jabref .model .strings .StringUtil ;
7
+
8
+ import com .sun .jna .Platform ;
3
9
import net .sourceforge .tess4j .Tesseract ;
4
10
import net .sourceforge .tess4j .TesseractException ;
5
- import org .jabref .model .strings .StringUtil ; // JabRef utility class
6
11
import org .slf4j .Logger ;
7
12
import org .slf4j .LoggerFactory ;
8
13
9
- import java .io .File ;
10
- import java .nio .file .Path ;
11
-
12
14
/**
13
15
* Service for performing Optical Character Recognition (OCR) on PDF files.
14
16
* This class provides a high-level interface to OCR functionality,
15
17
* abstracting away the specific OCR engine implementation details.
16
18
*/
17
19
public class OcrService {
18
20
private static final Logger LOGGER = LoggerFactory .getLogger (OcrService .class );
19
-
21
+ private static final String JNA_LIBRARY_PATH = "jna.library.path" ;
20
22
// The OCR engine instance
21
23
private final Tesseract tesseract ;
22
24
@@ -25,6 +27,13 @@ public class OcrService {
25
27
* Currently uses Tesseract with English language support.
26
28
*/
27
29
public OcrService () {
30
+ if (Platform .isMac ()) {
31
+ if (Platform .isARM ()) {
32
+ System .setProperty (JNA_LIBRARY_PATH , JNA_LIBRARY_PATH + File .pathSeparator + "/opt/homebrew/lib/" );
33
+ } else {
34
+ System .setProperty (JNA_LIBRARY_PATH , JNA_LIBRARY_PATH + File .pathSeparator + "/usr/local/cellar/" );
35
+ }
36
+ }
28
37
this .tesseract = new Tesseract ();
29
38
30
39
// Configure Tesseract
@@ -66,8 +75,8 @@ public String performOcr(Path pdfPath) throws OcrException {
66
75
67
76
LOGGER .info ("OCR completed successfully. Extracted {} characters" , result .length ());
68
77
return result ;
69
-
70
- } catch ( TesseractException e ) {
78
+ } catch (
79
+ TesseractException e ) {
71
80
LOGGER .error ("OCR failed for file: {}" , pdfFile .getName (), e );
72
81
throw new OcrException (
73
82
"Failed to perform OCR on file: " + pdfFile .getName () +
0 commit comments