Skip to content

Commit c32b3e0

Browse files
committed
⚡ INIT
0 parents  commit c32b3e0

File tree

7 files changed

+1194
-0
lines changed

7 files changed

+1194
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
model

Pipfile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[[source]]
2+
url = "https://pypi.org/simple"
3+
verify_ssl = true
4+
name = "pypi"
5+
6+
[packages]
7+
googletrans = "*"
8+
pillow = "*"
9+
easyocr = "*"
10+
deep-translator = "*"
11+
black = "*"
12+
13+
[dev-packages]
14+
15+
[requires]
16+
python_version = "3.9"

Pipfile.lock

Lines changed: 943 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Image Translator
2+
3+
This project utilizes optical character recognition (OCR) and translation to translate text within images from one language to another. It performs the following steps:
4+
5+
1. **OCR Processing:** The project extracts text and its bounding boxes from input images using the EasyOCR library.
6+
2. **Translation:** It translates the extracted text using the Google Translator API.
7+
3. **Text Replacement:** The translated text is then overlaid onto the image, replacing the original text while maintaining its position and style.
8+
4. **Output:** Finally, the modified image with translated text is saved to an output folder.
9+
10+
## Setup
11+
12+
### Installation
13+
14+
1. Clone this repository to your local machine.
15+
2. Install the required Python dependencies using `pip install pipenv && pipenv install`.
16+
17+
## Usage
18+
19+
1. Place your input images in the `input` folder.
20+
2. Run the script `image_translator.py`.
21+
3. Translated images will be saved in the `output` folder.
22+
23+
## Notes
24+
25+
- Supported languages for OCR can be seen [here](https://www.jaided.ai/easyocr/)
26+
- Supported languages for Google Translate can be obtained using the following code:
27+
```python
28+
from deep_translator.constants import GOOGLE_LANGUAGES_TO_CODES
29+
print(GOOGLE_LANGUAGES_TO_CODES)
30+
```
31+
- Adjustments to text languages, recognition thresholds, translation services, or image processing parameters can be made within the script.
32+
33+
## Acknowledgments
34+
35+
- [EasyOCR](https://github.com/JaidedAI/EasyOCR) - For OCR processing.
36+
- [Google Translator](https://pypi.org/project/deep-translator/) - For text translation.
37+
- [Pillow (PIL Fork)](https://python-pillow.org/) - For image manipulation.

input/image-1.jpg

113 KB
Loading

main.py

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
from PIL import Image, ImageDraw, ImageFont
2+
from deep_translator import GoogleTranslator
3+
import os, easyocr
4+
5+
6+
def perform_ocr(image_path, reader):
7+
# Perform OCR on the image
8+
result = reader.readtext(image_path, width_ths = 0.8, decoder = 'wordbeamsearch')
9+
10+
# Extract text and bounding boxes from the OCR result
11+
extracted_text_boxes = [(entry[0], entry[1]) for entry in result if entry[2] > 0.4]
12+
13+
return extracted_text_boxes
14+
15+
16+
def get_font(image, text, width, height):
17+
18+
# Default values at start
19+
font_size = None # For font size
20+
font = None # For object truetype with correct font size
21+
box = None # For version 8.0.0
22+
x = 0
23+
y = 0
24+
25+
draw = ImageDraw.Draw(image) # Create a draw object
26+
27+
# Test for different font sizes
28+
for size in range(1, 500):
29+
30+
# Create new font
31+
new_font = ImageFont.load_default(size=font_size)
32+
33+
# Calculate bbox for version 8.0.0
34+
new_box = draw.textbbox((0, 0), text, font=new_font)
35+
36+
# Calculate width and height
37+
new_w = new_box[2] - new_box[0] # Bottom - Top
38+
new_h = new_box[3] - new_box[1] # Right - Left
39+
40+
# If too big then exit with previous values
41+
if new_w > width or new_h > height:
42+
break
43+
44+
# Set new current values as current values
45+
font_size = size
46+
font = new_font
47+
box = new_box
48+
w = new_w
49+
h = new_h
50+
51+
# Calculate position (minus margins in box)
52+
x = (width - w) // 2 - box[0] # Minus left margin
53+
y = (height - h) // 2 - box[1] # Minus top margin
54+
55+
return font, x, y
56+
57+
58+
def add_discoloration(color, strength):
59+
# Adjust RGB values to add discoloration
60+
r, g, b = color
61+
r = max(0, min(255, r + strength)) # Ensure RGB values are within valid range
62+
g = max(0, min(255, g + strength))
63+
b = max(0, min(255, b + strength))
64+
65+
if r == 255 and g == 255 and b == 255:
66+
r, g, b = 245, 245, 245
67+
68+
return (r, g, b)
69+
70+
71+
def get_background_color(image, x_min, y_min, x_max, y_max):
72+
# Define the margin for the edges
73+
margin = 10
74+
75+
# Crop a small region around the edges of the bounding box
76+
edge_region = image.crop(
77+
(
78+
max(x_min - margin, 0),
79+
max(y_min - margin, 0),
80+
min(x_max + margin, image.width),
81+
min(y_max + margin, image.height),
82+
)
83+
)
84+
85+
# Find the most common color in the cropped region
86+
edge_colors = edge_region.getcolors(edge_region.size[0] * edge_region.size[1])
87+
background_color = max(edge_colors, key=lambda x: x[0])[1]
88+
89+
# Add a bit of discoloration to the background color
90+
background_color = add_discoloration(background_color, 40)
91+
92+
return background_color
93+
94+
95+
def get_text_fill_color(background_color):
96+
# Calculate the luminance of the background color
97+
luminance = (
98+
0.299 * background_color[0]
99+
+ 0.587 * background_color[1]
100+
+ 0.114 * background_color[2]
101+
) / 255
102+
103+
# Determine the text color based on the background luminance
104+
if luminance > 0.5:
105+
return "black" # Use black text for light backgrounds
106+
else:
107+
return "white" # Use white text for dark backgrounds
108+
109+
110+
def replace_text_with_translation(image_path, translated_texts, text_boxes):
111+
# Open the image
112+
image = Image.open(image_path)
113+
draw = ImageDraw.Draw(image)
114+
115+
# Load a font
116+
font = ImageFont.load_default()
117+
118+
# Replace each text box with translated text
119+
for text_box, translated in zip(text_boxes, translated_texts):
120+
121+
if translated is None:
122+
continue
123+
124+
# Set initial values
125+
x_min, y_min = text_box[0][0][0], text_box[0][0][1]
126+
x_max, y_max = text_box[0][0][0], text_box[0][0][1]
127+
128+
for coordinate in text_box[0]:
129+
130+
x, y = coordinate
131+
132+
if x < x_min:
133+
x_min = x
134+
elif x > x_max:
135+
x_max = x
136+
if y < y_min:
137+
y_min = y
138+
elif y > y_max:
139+
y_max = y
140+
141+
# Find the most common color in the text region
142+
background_color = get_background_color(image, x_min, y_min, x_max, y_max)
143+
144+
# Draw a rectangle to cover the text region with the original background color
145+
draw.rectangle(((x_min, y_min), (x_max, y_max)), fill=background_color)
146+
147+
# Calculate font size, box
148+
font, x, y = get_font(image, translated, x_max - x_min, y_max - y_min)
149+
150+
# Draw the translated text within the box
151+
draw.text(
152+
(x_min + x, y_min + y),
153+
translated,
154+
fill=get_text_fill_color(background_color),
155+
font=font,
156+
)
157+
158+
return image
159+
160+
161+
# Initialize the OCR reader
162+
reader = easyocr.Reader(["ch_sim", "en"], model_storage_directory = 'model')
163+
164+
# Initialize the Translator
165+
translator = GoogleTranslator(source="zh-CN", target="en")
166+
167+
# Define input and output location
168+
input_folder = "input"
169+
output_folder = "output"
170+
171+
# Process each image file from input
172+
files = os.listdir(input_folder)
173+
image_files = [file for file in files if file.endswith((".jpg", ".jpeg", ".png"))]
174+
for filename in image_files:
175+
176+
print(f'[INFO] Processing {filename}...')
177+
178+
image_path = os.path.join(input_folder, filename)
179+
180+
# Extract text and location
181+
extracted_text_boxes = perform_ocr(image_path, reader)
182+
183+
# Translate texts
184+
translated_texts = []
185+
for text_box, text in extracted_text_boxes:
186+
translated_texts.append(translator.translate(text))
187+
188+
# Replace text with translated text
189+
image = replace_text_with_translation(image_path, translated_texts, extracted_text_boxes)
190+
191+
# Save modified image
192+
base_filename, extension = os.path.splitext(filename)
193+
output_filename = f"{base_filename}-translated{extension}"
194+
output_path = os.path.join(output_folder, output_filename)
195+
image.save(output_path)
196+
197+
print(f'[INFO] Saved as {output_filename}...')

output/image-1-translated.jpg

85.4 KB
Loading

0 commit comments

Comments
 (0)