Skip to content

Commit c5f3c52

Browse files
authored
Update Searchable_Image_PDF_Creat-O-Mat.bat
fixing a minor problem displaying filenames with special charatcers
1 parent 1a4a27e commit c5f3c52

File tree

1 file changed

+16
-16
lines changed

1 file changed

+16
-16
lines changed

Searchable_Image_PDF_Creat-O-Mat.bat

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ ECHO OFF
22
SETLOCAL
33
REM ~ ###################################################################################################################
44
REM ~ Searchable Image PDF Creat-O-Mat
5-
SET VERSION=1.2
5+
SET VERSION=1.3
66
REM ~ This script creates a searchable PDF out of a PDF with one or more scanned pages. It is possible to drag and drop one or multiple PDF files onto this batch file to start the process.
77
REM ~ But you can use the command line (<script name> [pdf filename #1] [pdf filename #2] ... [pdf filename #n]) too.
88
REM ~
@@ -12,23 +12,23 @@ REM ~ Prerequisites:
1212
REM ~ ImageMagick (7.0.8-27 and newer) https://imagemagick.org/ | License: https://imagemagick.org/script/license.php
1313
REM ~ Ghostscript (9.x) https://www.ghostscript.com/
1414
REM ~ Tesseract (4.0 and newer) https://github.com/tesseract-ocr/tesseract/wiki | http://www.apache.org/licenses/LICENSE-2.0
15-
REM ~ OS: Microsoft Windows 7 (with PowerShell); 8; 8.1
15+
REM ~ OS: Microsoft Windows 7 (with PowerShell); 8; 8.1; 10
1616
REM ~
1717
REM ~ Preferences:
1818
REM ~ (leave no whitespace between the foldername and the '=' / do not use "):
19-
SET IMAGEMAGIC=C:\Program Files\ImageMagick\magick.exe
20-
SET GHOSTSCRIPT=C:\Program Files\gs\gs9.23\bin\gswin64c.exe
21-
SET TESSERACT=C:\Program Files (x86)\Tesseract-OCR\tesseract.exe
19+
SET IMAGEMAGIC=C:\Program Files\ImageMagick-7.0.9-Q16\magick.exe
20+
SET GHOSTSCRIPT=C:\Program Files\gs\gs9.50\bin\gswin64c.exe
21+
SET TESSERACT=C:\Program Files\Tesseract-OCR\tesseract.exe
2222
REM ~ SRCLANG shall contain the abbreviations of the installed Tesseract languages which shall be searched for in the scanned files [default: eng]. Multiple languages e.g.: deu+eng - see https://github.com/tesseract-ocr/tesseract/wiki/Data-Files
23-
SET SRCLANG=deu
23+
SET SRCLANG=deu+eng
2424
REM ~ The scanned page can be deskewed before it is processed with Tesseract or not [default: true / alternative: false]. It is recommended to deskew the sanned page because it increases the success rate of the OCR software. But it will take more time.
2525
SET DESKEW=true
2626
REM ~ RESULTFOLDER is the folder where the searchable PDF will be stored (%CD% is the directory which contains this script) [default: %CD%\results]
2727
SET RESULTFOLDER=%CD%\searchable_PDF
2828
REM ~ TMPFOLDER is the folder where the extracted image files will be stored temporaly (the folder will be created and removed automatically during each run) [default: %CD%\temp]
2929
SET TMPFOLDER=%CD%\temp
3030
REM ~ After Imagemagick and Tesseract have created the new PDF file it has usually a bigger file size. But it can be re-packed with Ghostscript which compresses the image file to a certain resolution e.g. screen (72dpi), ebook (150dpi), printer(300dpi), prepress(300dpi+colorpreserving)
31-
SET REPACKPROFILE=printer
31+
SET REPACKPROFILE=ebook
3232
REM ~ ###################################################################################################################
3333

3434
REM ~ clear the screen (/ the command line window)
@@ -59,7 +59,7 @@ IF NOT EXIST "%TESSERACT%" (
5959
ECHO The Tesseract location seems to be wrong. Please check the preferences.
6060
GOTO :SCRIPTEND
6161
)
62-
REM ~ Is the Tesseract language package abbrevation of the correct pattern?
62+
REM ~ Is the Tesseract langauge package abbrevation of the correct pattern?
6363
FOR /F "usebackq tokens=*" %%i IN (`PowerShell -noninteractive -NoProfile "&{ '%SRCLANG%' | Select-String -Pattern '^([a-z]{3}_?([a-z]{3})?)(\+([a-z]{3}_?([a-z]{3})?))*$' -Quiet}"`) DO SET RST=%%i
6464
IF /I NOT "%RST%" == "true" (
6565
ECHO The language settings seem to be wrong. Please check the preferences.
@@ -96,11 +96,11 @@ IF "%~1" == "" (
9696
)
9797
:LOOP
9898
ECHO ### File %AMOUNT_OF_FILES% / %ARGCOUNT% ###
99-
ECHO %~1
99+
ECHO "%~1"
100100

101101
REM ~ Resolution which Imagemagick and Tesseract shall use to handle the images (in DPI / default:300)
102102
SET RESDPI=300
103-
103+
104104
REM ~ IF the file does not exist THEN skip it or ELSE do the whole process
105105
IF NOT EXIST "%~1" (
106106
ECHO The file "%~1" does not exist.
@@ -150,7 +150,7 @@ SET /a "AMOUNT_OF_FILES=%AMOUNT_OF_FILES% + 1"
150150
REM ~ `SHIFT` fills '%1' with the content of the second argument (`%2`), %2 with the content of third argument (`%3`) and so on
151151
SHIFT
152152

153-
REM ~ IF the AMOUNT_OF_FILES dragged onto this .bat is smaller or equal to the total amount of files/arguments AND the next argument is not empty string THEN repeat the last step again. (Otherwise continue to the end of the script.)
153+
REM ~ IF the AMOUNT_OF_FILES dragged onto this .bat is smaller or equal to the total amount of file/arguments AND the next argument is not empty string THEN repeat the last step again. (Otherwise continue to the end of the script.)
154154
IF %AMOUNT_OF_FILES% LEQ %ARGCOUNT% IF NOT "%~1" == "" (
155155
GOTO :LOOP
156156
)
@@ -159,18 +159,18 @@ IF %AMOUNT_OF_FILES% LEQ %ARGCOUNT% IF NOT "%~1" == "" (
159159
REM ~ remove the temp folder
160160
RMDIR "%TMPFOLDER%"
161161

162-
REM ~ setting the colors back to default
163-
COLOR
164-
165162
REM ~ determining the duration (with the help of https://stackoverflow.com/questions/42603119/arithmetic-operations-with-hhmmss-times-in-batch-file/42603985#42603985)
166163
SET EndPosition=%time:~0,8%
167-
SET /A "ss=(((1%EndPosition::=-100)*60+1%-100)-(((1%StartPosition::=-100)*60+1%-100)"
168-
SET /A "hh=ss/3600+100,ss%%=3600,mm=ss/60+100,ss=ss%%60+100"
164+
set /A "ss=(((1%EndPosition::=-100)*60+1%-100)-(((1%StartPosition::=-100)*60+1%-100)"
165+
set /A "hh=ss/3600+100,ss%%=3600,mm=ss/60+100,ss=ss%%60+100"
169166
ECHO Duration: %hh:~1%:%mm:~1%:%ss:~1%
170167
ECHO ### END ###
171168

172169
:SCRIPTEND
173170
ENDLOCAL
174171

172+
REM ~ setting the colors back to default
173+
COLOR
174+
175175
REM ~ keep the command line window open
176176
CMD /k

0 commit comments

Comments
 (0)