You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: Searchable_Image_PDF_Creat-O-Mat.bat
+16-16Lines changed: 16 additions & 16 deletions
Original file line number
Diff line number
Diff line change
@@ -2,7 +2,7 @@ ECHO OFF
2
2
SETLOCAL
3
3
REM ~ ###################################################################################################################
4
4
REM ~ Searchable Image PDF Creat-O-Mat
5
-
SETVERSION=1.2
5
+
SETVERSION=1.3
6
6
REM ~ This script creates a searchable PDF out of a PDF with one or more scanned pages. It is possible to drag and drop one or multiple PDF files onto this batch file to start the process.
7
7
REM ~ But you can use the command line (<script name> [pdf filename #1] [pdf filename #2] ... [pdf filename #n]) too.
8
8
REM ~
@@ -12,23 +12,23 @@ REM ~ Prerequisites:
12
12
REM ~ ImageMagick (7.0.8-27 and newer) https://imagemagick.org/ | License: https://imagemagick.org/script/license.php
13
13
REM ~ Ghostscript (9.x) https://www.ghostscript.com/
14
14
REM ~ Tesseract (4.0 and newer) https://github.com/tesseract-ocr/tesseract/wiki | http://www.apache.org/licenses/LICENSE-2.0
15
-
REM ~ OS: Microsoft Windows 7 (with PowerShell); 8; 8.1
15
+
REM ~ OS: Microsoft Windows 7 (with PowerShell); 8; 8.1; 10
16
16
REM ~
17
17
REM ~ Preferences:
18
18
REM ~ (leave no whitespace between the foldername and the '=' / do not use "):
REM ~ SRCLANG shall contain the abbreviations of the installed Tesseract languages which shall be searched for in the scanned files [default: eng]. Multiple languages e.g.: deu+eng - see https://github.com/tesseract-ocr/tesseract/wiki/Data-Files
23
-
SETSRCLANG=deu
23
+
SETSRCLANG=deu+eng
24
24
REM ~ The scanned page can be deskewed before it is processed with Tesseract or not [default: true / alternative: false]. It is recommended to deskew the sanned page because it increases the success rate of the OCR software. But it will take more time.
25
25
SETDESKEW=true
26
26
REM ~ RESULTFOLDER is the folder where the searchable PDF will be stored (%CD% is the directory which contains this script) [default: %CD%\results]
27
27
SETRESULTFOLDER=%CD%\searchable_PDF
28
28
REM ~ TMPFOLDER is the folder where the extracted image files will be stored temporaly (the folder will be created and removed automatically during each run) [default: %CD%\temp]
29
29
SETTMPFOLDER=%CD%\temp
30
30
REM ~ After Imagemagick and Tesseract have created the new PDF file it has usually a bigger file size. But it can be re-packed with Ghostscript which compresses the image file to a certain resolution e.g. screen (72dpi), ebook (150dpi), printer(300dpi), prepress(300dpi+colorpreserving)
31
-
SETREPACKPROFILE=printer
31
+
SETREPACKPROFILE=ebook
32
32
REM ~ ###################################################################################################################
33
33
34
34
REM ~ clear the screen (/ the command line window)
@@ -59,7 +59,7 @@ IF NOT EXIST "%TESSERACT%" (
59
59
ECHO The Tesseract location seems to be wrong. Please check the preferences.
60
60
GOTO :SCRIPTEND
61
61
)
62
-
REM ~ Is the Tesseract language package abbrevation of the correct pattern?
62
+
REM ~ Is the Tesseract langauge package abbrevation of the correct pattern?
ECHO The language settings seem to be wrong. Please check the preferences.
@@ -96,11 +96,11 @@ IF "%~1" == "" (
96
96
)
97
97
:LOOP
98
98
ECHO ### File %AMOUNT_OF_FILES% / %ARGCOUNT% ###
99
-
ECHO%~1
99
+
ECHO"%~1"
100
100
101
101
REM ~ Resolution which Imagemagick and Tesseract shall use to handle the images (in DPI / default:300)
102
102
SETRESDPI=300
103
-
103
+
104
104
REM ~ IF the file does not exist THEN skip it or ELSE do the whole process
105
105
IFNOTEXIST"%~1" (
106
106
ECHO The file "%~1" does not exist.
@@ -150,7 +150,7 @@ SET /a "AMOUNT_OF_FILES=%AMOUNT_OF_FILES% + 1"
150
150
REM ~ `SHIFT` fills '%1' with the content of the second argument (`%2`), %2 with the content of third argument (`%3`) and so on
151
151
SHIFT
152
152
153
-
REM ~ IF the AMOUNT_OF_FILES dragged onto this .bat is smaller or equal to the total amount of files/arguments AND the next argument is not empty string THEN repeat the last step again. (Otherwise continue to the end of the script.)
153
+
REM ~ IF the AMOUNT_OF_FILES dragged onto this .bat is smaller or equal to the total amount of file/arguments AND the next argument is not empty string THEN repeat the last step again. (Otherwise continue to the end of the script.)
154
154
IF%AMOUNT_OF_FILES%LEQ%ARGCOUNT%IFNOT"%~1"=="" (
155
155
GOTO :LOOP
156
156
)
@@ -159,18 +159,18 @@ IF %AMOUNT_OF_FILES% LEQ %ARGCOUNT% IF NOT "%~1" == "" (
159
159
REM ~ remove the temp folder
160
160
RMDIR"%TMPFOLDER%"
161
161
162
-
REM ~ setting the colors back to default
163
-
COLOR
164
-
165
162
REM ~ determining the duration (with the help of https://stackoverflow.com/questions/42603119/arithmetic-operations-with-hhmmss-times-in-batch-file/42603985#42603985)
166
163
SETEndPosition=%time:~0,8%
167
-
SET /A "ss=(((1%EndPosition::=-100)*60+1%-100)-(((1%StartPosition::=-100)*60+1%-100)"
168
-
SET /A "hh=ss/3600+100,ss%%=3600,mm=ss/60+100,ss=ss%%60+100"
164
+
set /A "ss=(((1%EndPosition::=-100)*60+1%-100)-(((1%StartPosition::=-100)*60+1%-100)"
165
+
set /A "hh=ss/3600+100,ss%%=3600,mm=ss/60+100,ss=ss%%60+100"
0 commit comments