Skip to content

Commit 5a5895d

Browse files
committed
initial commit
1 parent 1eccf5c commit 5a5895d

11 files changed

+573
-7
lines changed

MultipleImageProcessing.py

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
"""
2+
This script processes images by performing OCR (Optical Character Recognition) to extract text,
3+
translates the extracted text to a target language, and replaces the original text in the images
4+
with the translated text. The processed images are saved in a specified output folder.
5+
6+
Usage:
7+
1. Ensure the 'ExportedImages' folder contains the images to be processed.
8+
2. Ensure the 'TranslatedImages' folder is empty or contains no conflicting filenames.
9+
3. Run the script and follow the prompts.
10+
11+
Dependencies:
12+
- deep_translator
13+
- easyocr
14+
- concurrent.futures
15+
"""
16+
17+
print("[INFO] Starting the image processing...")
18+
19+
from PIL import Image, ImageDraw, ImageFont
20+
from deep_translator import GoogleTranslator
21+
import os
22+
import easyocr
23+
import warnings
24+
from concurrent.futures import ThreadPoolExecutor
25+
import time
26+
warnings.filterwarnings("ignore", category=RuntimeWarning, module="easyocr.utils")
27+
28+
print("[INFO] Please make sure ExportedImages folder is empty!")
29+
print("[INFO] Please make sure TranslatedImages folder is empty!")
30+
print("[Warning] please make sure TranslatedImages folder is empty !")
31+
32+
#add command line pause or ask user to press enter
33+
input("Press Enter to continue...")
34+
def process_image(filename, input_folder, output_folder, reader, translator):
35+
print(f"[INFO] Processing {filename}...")
36+
37+
image_path = os.path.join(input_folder, filename)
38+
extracted_text_boxes = perform_ocr(image_path, reader)
39+
40+
translated_texts = []
41+
for box in extracted_text_boxes:
42+
text = box[1]
43+
try:
44+
translated_texts.append(translator.translate(text))
45+
except Exception as e:
46+
print(f"[WARNING] Translation error for '{text}': {e}")
47+
print(f"[WARNING] No translation found for: {text}")
48+
translated_texts.append(None)
49+
50+
image = overlay_translated_text(image_path, translated_texts, extracted_text_boxes)
51+
52+
output_path = os.path.join(output_folder, filename)
53+
image.save(output_path)
54+
print(f"[INFO] Saved {filename} to {output_folder}.")
55+
56+
print("[INFO] Loading the OCR and translation models...")
57+
source_lang = "en"
58+
target_lang = "fr"
59+
reader = easyocr.Reader([source_lang, target_lang], model_storage_directory='model')
60+
translator = GoogleTranslator(source="en", target="fr")
61+
62+
def main():
63+
input_folder = "ExportedImages"
64+
output_folder = "TranslatedImages"
65+
choice = input("Do you want to process images one by one or process multiple images? (Enter 1 for one by one or 2 for multiple files same time): ").strip().lower()
66+
if choice not in ["1", "2"]:
67+
print("[ERROR] Invalid choice. Please Enter 1 for one by one or 2 for multiple for multiple files same time.")
68+
return
69+
70+
if choice == "1":
71+
for filename in os.listdir(input_folder):
72+
if filename.lower().endswith((".jpg", ".jpeg", ".png")):
73+
process_image(filename, input_folder, output_folder, reader, translator)
74+
return
75+
76+
if not os.path.exists(output_folder):
77+
os.makedirs(output_folder)
78+
79+
if choice == "2":
80+
num_workers = os.cpu_count() or 1
81+
with ThreadPoolExecutor(max_workers=num_workers) as executor:
82+
futures = [executor.submit(process_image, filename, input_folder, output_folder, reader, translator)
83+
for filename in os.listdir(input_folder)
84+
if filename.lower().endswith((".jpg", ".jpeg", ".png"))]
85+
86+
total_files = len(futures)
87+
for i, future in enumerate(futures):
88+
try:
89+
future.result()
90+
except Exception as e:
91+
print(f"[ERROR] Failed to process {futures[i].filename}: {e}")
92+
# Uncomment the following lines to show progress
93+
# progress = (i + 1) / total_files * 100
94+
# print(f"[INFO] Progress: {progress:.2f}%")
95+
96+
def perform_ocr(image_path, reader):
97+
# Perform OCR on the image
98+
result = reader.readtext(image_path, width_ths = 0.8, decoder = 'wordbeamsearch')
99+
100+
# Extract text and bounding boxes from the OCR result
101+
extracted_text_boxes = [(entry[0], entry[1]) for entry in result if entry[2] > 0.4]
102+
103+
return extracted_text_boxes
104+
105+
def get_font(image, text, width, height):
106+
107+
# Default values at start
108+
font_size = None # For font size
109+
font = None # For object truetype with correct font size
110+
box = None # For version 8.0.0
111+
x = 0
112+
y = 0
113+
114+
draw = ImageDraw.Draw(image) # Create a draw object
115+
116+
# Test for different font sizes
117+
for size in range(1, 500):
118+
119+
# Create new font
120+
new_font = ImageFont.truetype("DejaVuSans-Bold.ttf", size=size)
121+
122+
# Calculate bbox for version 8.0.0
123+
new_box = draw.textbbox((0, 0), text, font=new_font)
124+
125+
# Calculate width and height
126+
new_w = new_box[2] - new_box[0] # Bottom - Top
127+
new_h = new_box[3] - new_box[1] # Right - Left
128+
129+
# If too big then exit with previous values
130+
if new_w > width or new_h > height:
131+
break
132+
133+
# Set new current values as current values
134+
font_size = size
135+
font = new_font
136+
box = new_box
137+
w = new_w
138+
h = new_h
139+
140+
# Calculate position (minus margins in box)
141+
x = (width - w) // 2 - box[0] # Minus left margin
142+
y = (height - h) // 2 - box[1] # Minus top margin
143+
144+
return font, x, y
145+
146+
def adjust_color_brightness(color, strength):
147+
r, g, b = color
148+
r = max(0, min(255, r + strength))
149+
g = max(0, min(255, g + strength))
150+
b = max(0, min(255, b + strength))
151+
return (r, g, b)
152+
153+
def extract_background_color(image, x_min, y_min, x_max, y_max):
154+
margin = 10
155+
region = image.crop((
156+
max(x_min - margin, 0),
157+
max(y_min - margin, 0),
158+
min(x_max + margin, image.width),
159+
min(y_max + margin, image.height),
160+
))
161+
edge_colors = region.getcolors(region.size[0] * region.size[1])
162+
background_color = max(edge_colors, key=lambda x: x[0])[1]
163+
return adjust_color_brightness(background_color, 40)
164+
165+
def determine_text_color(background_color):
166+
# Calculate the luminance of the background color
167+
luminance = (
168+
0.299 * background_color[0]
169+
+ 0.587 * background_color[1]
170+
+ 0.114 * background_color[2]
171+
) / 255
172+
173+
# Determine the text color based on the background luminance
174+
if luminance > 0.5:
175+
return "black" # Use black text for light backgrounds
176+
else:
177+
return "white" # Use white text for dark backgrounds
178+
179+
def overlay_translated_text(image_path, translated_texts, text_boxes):
180+
# Open the image
181+
image = Image.open(image_path)
182+
draw = ImageDraw.Draw(image)
183+
184+
# Load a font that supports French accent marks
185+
font = ImageFont.truetype("DejaVuSans-Bold.ttf", size=20)
186+
187+
# Replace each text box with translated text
188+
for text_box, translated in zip(text_boxes, translated_texts):
189+
190+
if translated is None:
191+
continue
192+
193+
# Set initial values
194+
x_min, y_min = text_box[0][0][0], text_box[0][0][1]
195+
x_max, y_max = text_box[0][0][0], text_box[0][0][1]
196+
197+
for coordinate in text_box[0]:
198+
199+
x, y = coordinate
200+
201+
if x < x_min:
202+
x_min = x
203+
elif x > x_max:
204+
x_max = x
205+
if y < y_min:
206+
y_min = y
207+
elif y > y_max:
208+
y_max = y
209+
210+
# Find the most common color in the text region
211+
background_color = extract_background_color(image, x_min, y_min, x_max, y_max)
212+
213+
# Draw a rectangle to cover the text region with the original background color
214+
draw.rectangle(((x_min, y_min), (x_max, y_max)), fill=background_color)
215+
216+
# Calculate font size, box
217+
font, x, y = get_font(image, translated, x_max - x_min, y_max - y_min)
218+
219+
# Draw the translated text within the box
220+
draw.text(
221+
(x_min + x, y_min + y),
222+
translated,
223+
fill=determine_text_color(background_color),
224+
font=font,
225+
)
226+
227+
return image
228+
229+
start_time = time.time()
230+
231+
if __name__ == "__main__":
232+
main()
233+
end_time = time.time()
234+
elapsed_time = end_time - start_time
235+
elapsed_minutes = elapsed_time / 60
236+
print(f"[INFO] Image processing completed in {elapsed_minutes:.2f} minutes.")
237+
print("[INFO] Please check the 'TranslatedImages' folder for the processed images.")
238+
print("[INFO] Thank you for using the image processing script!")
239+

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,21 @@ This project utilizes optical character recognition (OCR) and translation to tra
1414
1. Clone this repository to your local machine.
1515
2. Install the required Python dependencies using `pip install pipenv && pipenv install`.
1616

17+
or
18+
19+
```
20+
py -3.10 -m venv venv
21+
venv\Scripts\activate
22+
pip install -r requirements.txt
23+
```
24+
25+
If you like to use torch with cuda:
26+
27+
```
28+
pip uninstall torch torchvision
29+
pip install torch==2.5.1+cu118 torchaudio==2.5.1+cu118 torchvision==0.20.1+cu118 --index-url https://download.pytorch.org/whl/cu118
30+
```
31+
1732
## Usage
1833

1934
1. Place your input images in the `input` folder.

0 commit comments

Comments
 (0)