Skip to content

Commit 15feb4a

Browse files
committed
Do not take OCR languages into account when reading the cache
This may fix #37
1 parent 7baa897 commit 15feb4a

File tree

3 files changed

+27
-18
lines changed

3 files changed

+27
-18
lines changed

lib/src/cache.ts

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,20 +50,13 @@ export function getCachePath(file: TFile): {
5050
* @param optLangs
5151
* @returns
5252
*/
53-
export async function readCache(
54-
file: TFile,
55-
optLangs = ''
56-
): Promise<ExtractedText | null> {
53+
export async function readCache(file: TFile): Promise<ExtractedText | null> {
5754
const cachePath = getCachePath(file)
5855

5956
// Get the text from the cache if it exists
6057
if (await app.vault.adapter.exists(cachePath.fullpath)) {
6158
const raw = await app.vault.adapter.read(cachePath.fullpath)
62-
const cache = JSON.parse(raw) as ExtractedText
63-
// Check that the languages list has not changed since the cache was created
64-
if (cache.langs === optLangs) {
65-
return cache
66-
}
59+
return JSON.parse(raw) as ExtractedText
6760
}
6861
return null
6962
}

lib/src/ocr/ocr-manager.ts

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ import type { ocrLangs } from './ocr-langs'
1414
* Concatenates an array of langs to a single string to be passed to Tesseract
1515
* e.g. ['fra', 'eng'] => 'eng+fra'
1616
* The langs are sorted alphabetically because it's also used a cache key
17-
* @param langs
18-
* @returns
17+
* @param langs
18+
* @returns
1919
*/
20-
function concatLangs (langs: Array<typeof ocrLangs[number]>): string {
20+
function concatLangs(langs: Array<(typeof ocrLangs)[number]>): string {
2121
return langs.sort().join('+')
2222
}
2323

@@ -102,7 +102,11 @@ class OCRManager {
102102
*/
103103
public async getImageText(file: TFile, options: OcrOptions): Promise<string> {
104104
try {
105-
return await imagesProcessQueue.add(() => this.#getImageText(file, options)) ?? ''
105+
return (
106+
(await imagesProcessQueue.add(() =>
107+
this.#getImageText(file, options)
108+
)) ?? ''
109+
)
106110
} catch (e) {
107111
console.warn(
108112
`Text Extractor - Error while extracting text from ${file.basename}`
@@ -113,9 +117,8 @@ class OCRManager {
113117
}
114118

115119
async #getImageText(file: TFile, options: OcrOptions): Promise<string> {
116-
const langs = concatLangs(options.langs)
117120
// Get the text from the cache if it exists
118-
const cache = await readCache(file, langs)
121+
const cache = await readCache(file)
119122
if (cache) {
120123
return cache.text ?? FAILED_TO_EXTRACT
121124
}
@@ -128,6 +131,7 @@ class OCRManager {
128131
const cachePath = getCachePath(file)
129132
const data = new Uint8ClampedArray(await app.vault.readBinary(file))
130133
const worker = OCRWorker.getWorker()
134+
const langs = concatLangs(options.langs)
131135

132136
return new Promise(async (resolve, reject) => {
133137
try {
@@ -144,12 +148,24 @@ class OCRManager {
144148
.trim()
145149

146150
// Add it to the cache
147-
await writeCache(cachePath.folder, cachePath.filename, text, file.path, langs)
151+
await writeCache(
152+
cachePath.folder,
153+
cachePath.filename,
154+
text,
155+
file.path,
156+
langs
157+
)
148158
resolve(text)
149159
} catch (e) {
150160
// In case of error (unreadable PDF or timeout) just add
151161
// an empty string to the cache
152-
await writeCache(cachePath.folder, cachePath.filename, '', file.path, langs)
162+
await writeCache(
163+
cachePath.folder,
164+
cachePath.filename,
165+
'',
166+
file.path,
167+
langs
168+
)
153169
resolve('')
154170
}
155171
})

plugin/src/settings.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ export class TextExtractorSettingsTab extends PluginSettingTab {
4343
info.createDiv({
4444
cls: 'setting-item-description',
4545
text: `A list of languages to use for OCR. e.g. if your vault contains documents in English and French, you'd want to add 'eng' and 'fra' here.
46-
This setting only applies to images, not PDFs.`,
46+
This setting only applies to images, not PDFs. You may have to clear the cache after changing this setting.`,
4747
})
4848

4949
new LangSelector({

0 commit comments

Comments
 (0)