File tree Expand file tree Collapse file tree 2 files changed +10
-4
lines changed Expand file tree Collapse file tree 2 files changed +10
-4
lines changed Original file line number Diff line number Diff line change @@ -124,7 +124,7 @@ def flip_image(img_name, img_dir):
124
124
return
125
125
126
126
127
- def check_text (file_path , lim = 3 ):
127
+ def check_text (file_path , lim = 3 , convert_to_grayscale = False ):
128
128
"""Checks an image for burned-in text.
129
129
130
130
Parameters
@@ -140,6 +140,8 @@ def check_text(file_path, lim=3):
140
140
True if the image has more than lim words, else False.
141
141
"""
142
142
img = cv2 .imread (file_path )
143
+ if convert_to_grayscale :
144
+ img = cv2 .cvtColor (img , cv2 .COLOR_BGR2GRAY )
143
145
tess = pytesseract .image_to_data (img , output_type = Output .DICT )
144
146
words = tess ['text' ]
145
147
if np .any ([len (w ) > lim for w in words ]):
Original file line number Diff line number Diff line change 28
28
type = int ,
29
29
default = 3 ,
30
30
help = 'maximum allowable number of words per image' )
31
+ parser .add_argument ('--convert_to_grayscale' ,
32
+ action = 'store_true' )
31
33
parser .add_argument ('--no_multiprocessing' ,
32
34
action = 'store_true' )
33
- parser .set_defaults (no_multiprocessing = False )
35
+ parser .set_defaults (no_multiprocessing = False ,
36
+ convert_to_grayscale = False )
34
37
args = parser .parse_args ()
35
38
36
39
# Setting globals
37
40
IMG_DIR = args .img_dir
38
41
TEXT_DIR = args .text_dir
39
42
NUM_WORDS = args .num_words
40
43
USE_MULTIPROCESSING = not args .no_multiprocessing
44
+ GRAY = args .convert_to_grayscale
41
45
42
46
# Importing the data
43
47
files = os .listdir (IMG_DIR )
44
48
45
49
# Checking the files
46
50
if USE_MULTIPROCESSING :
47
51
with Pool () as p :
48
- input = [(IMG_DIR + f , NUM_WORDS ) for f in files ]
52
+ input = [(IMG_DIR + f , NUM_WORDS , GRAY ) for f in files ]
49
53
res1 = p .starmap (check_text , input )
50
54
p .close ()
51
55
p .join ()
59
63
p .close ()
60
64
p .join ()
61
65
else :
62
- res1 = [check_text (IMG_DIR + f , NUM_WORDS ) for f in files ]
66
+ res1 = [check_text (IMG_DIR + f , NUM_WORDS , GRAY ) for f in files ]
63
67
with_text = np .where (res1 )[0 ]
64
68
to_move = [files [i ] for i in with_text ]
65
69
res2 = [os .rename (IMG_DIR + f , TEXT_DIR + f ) for f in to_move ]
You can’t perform that action at this time.
0 commit comments