Skip to content

Commit a805639

Browse files
author
cracker0dks
committed
set valid chars to ocr for better recog
1 parent 8f30b7c commit a805639

File tree

2 files changed

+33
-26
lines changed

2 files changed

+33
-26
lines changed

JDownloader 2.0/tools/Windows/offlineCaptchaSolver/ocr.js

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,11 @@ function getPrzeklejText(file, callback) {
138138

139139
// });
140140
newImage.getBuffer(Jimp.MIME_JPEG, function (err, data) {
141-
Tesseract.recognize(data).then(function (result) {
141+
Tesseract.recognize(data, {
142+
tessedit_pageseg_mode: 'PSM_SINGLE_CHAR',
143+
tessedit_char_blacklist: '!?',
144+
tessedit_char_whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
145+
}).then(function (result) {
142146
var text = result["text"].replace(/\W/g, '');
143147
var confidence = result["confidence"];
144148
endConfidents += " Letter" + index + ": " + text + " => " + confidence + "%";
@@ -226,7 +230,10 @@ function getKeep2shareSText(file, callback) {
226230
thinOut(image, 2, function (image) {
227231
changeAllPresentPixelsToBlack(image, function (image) {
228232
image.getBuffer(Jimp.MIME_JPEG, function (err, data) {
229-
Tesseract.recognize(data).then(function (result) {
233+
Tesseract.recognize(data, {
234+
tessedit_char_blacklist: '0123456789!?',
235+
tessedit_char_whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
236+
}).then(function (result) {
230237
var text = result["text"].replace(/\W/g, '');
231238
var confidence = result["confidence"];
232239
callback({ host: what2Scan, text: text, confidence: confidence });

JDownloader 2.0/tools/Windows/offlineCaptchaSolver/package-lock.json

Lines changed: 24 additions & 24 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)