@@ -8,6 +8,18 @@ import {
8
8
workerTimeout ,
9
9
} from '../globals'
10
10
import type { OcrOptions } from '../types'
11
+ import type { ocrLangs } from './ocr-langs'
12
+
13
+ /**
14
+ * Concatenates an array of langs to a single string to be passed to Tesseract
15
+ * e.g. ['fra', 'eng'] => 'eng+fra'
16
+ * The langs are sorted alphabetically because it's also used a cache key
17
+ * @param langs
18
+ * @returns
19
+ */
20
+ function concatLangs ( langs : Array < typeof ocrLangs [ number ] > ) : string {
21
+ return langs . sort ( ) . join ( '+' )
22
+ }
11
23
12
24
class OCRWorker {
13
25
static #pool: OCRWorker [ ] = [ ]
@@ -51,12 +63,12 @@ class OCRWorker {
51
63
} ) : Promise < { text : string ; langs : string } > {
52
64
return new Promise ( async ( resolve , reject ) => {
53
65
this . #running = true
54
- const langs = msg . options . langs . join ( '+' )
66
+ const langs = concatLangs ( msg . options . langs )
55
67
56
68
if ( ! this . #ready) {
57
69
await this . worker . load ( )
58
70
await this . worker . loadLanguage ( langs )
59
- await this . worker . initialize ( msg . options . langs [ 0 ] )
71
+ await this . worker . initialize ( langs )
60
72
this . #ready = true
61
73
}
62
74
@@ -101,9 +113,9 @@ class OCRManager {
101
113
}
102
114
103
115
async #getImageText( file : TFile , options : OcrOptions ) : Promise < string > {
104
- const optLangs = options . langs . sort ( ) . join ( '+' )
116
+ const langs = concatLangs ( options . langs )
105
117
// Get the text from the cache if it exists
106
- const cache = await readCache ( file , optLangs )
118
+ const cache = await readCache ( file , langs )
107
119
if ( cache ) {
108
120
return cache . text ?? FAILED_TO_EXTRACT
109
121
}
@@ -132,12 +144,12 @@ class OCRManager {
132
144
. trim ( )
133
145
134
146
// Add it to the cache
135
- await writeCache ( cachePath . folder , cachePath . filename , text , optLangs )
147
+ await writeCache ( cachePath . folder , cachePath . filename , text , langs )
136
148
resolve ( text )
137
149
} catch ( e ) {
138
150
// In case of error (unreadable PDF or timeout) just add
139
151
// an empty string to the cache
140
- await writeCache ( cachePath . folder , cachePath . filename , '' , optLangs )
152
+ await writeCache ( cachePath . folder , cachePath . filename , '' , langs )
141
153
resolve ( '' )
142
154
}
143
155
} )
0 commit comments