|
| 1 | +""" |
| 2 | +This script processes images by performing OCR (Optical Character Recognition) to extract text, |
| 3 | +translates the extracted text to a target language, and replaces the original text in the images |
| 4 | +with the translated text. The processed images are saved in a specified output folder. |
| 5 | +
|
| 6 | +Usage: |
| 7 | +1. Ensure the 'ExportedImages' folder contains the images to be processed. |
| 8 | +2. Ensure the 'TranslatedImages' folder is empty or contains no conflicting filenames. |
| 9 | +3. Run the script and follow the prompts. |
| 10 | +
|
| 11 | +Dependencies: |
| 12 | +- deep_translator |
| 13 | +- easyocr |
| 14 | +- concurrent.futures |
| 15 | +""" |
| 16 | + |
| 17 | +print("[INFO] Starting the image processing...") |
| 18 | + |
| 19 | +from PIL import Image, ImageDraw, ImageFont |
| 20 | +from deep_translator import GoogleTranslator |
| 21 | +import os |
| 22 | +import easyocr |
| 23 | +import warnings |
| 24 | +from concurrent.futures import ThreadPoolExecutor |
| 25 | +import time |
| 26 | +warnings.filterwarnings("ignore", category=RuntimeWarning, module="easyocr.utils") |
| 27 | + |
| 28 | +print("[INFO] Please make sure ExportedImages folder is empty!") |
| 29 | +print("[INFO] Please make sure TranslatedImages folder is empty!") |
| 30 | +print("[Warning] please make sure TranslatedImages folder is empty !") |
| 31 | + |
| 32 | +#add command line pause or ask user to press enter |
| 33 | +input("Press Enter to continue...") |
| 34 | +def process_image(filename, input_folder, output_folder, reader, translator): |
| 35 | + print(f"[INFO] Processing {filename}...") |
| 36 | + |
| 37 | + image_path = os.path.join(input_folder, filename) |
| 38 | + extracted_text_boxes = perform_ocr(image_path, reader) |
| 39 | + |
| 40 | + translated_texts = [] |
| 41 | + for box in extracted_text_boxes: |
| 42 | + text = box[1] |
| 43 | + try: |
| 44 | + translated_texts.append(translator.translate(text)) |
| 45 | + except Exception as e: |
| 46 | + print(f"[WARNING] Translation error for '{text}': {e}") |
| 47 | + print(f"[WARNING] No translation found for: {text}") |
| 48 | + translated_texts.append(None) |
| 49 | + |
| 50 | + image = overlay_translated_text(image_path, translated_texts, extracted_text_boxes) |
| 51 | + |
| 52 | + output_path = os.path.join(output_folder, filename) |
| 53 | + image.save(output_path) |
| 54 | + print(f"[INFO] Saved {filename} to {output_folder}.") |
| 55 | + |
| 56 | +print("[INFO] Loading the OCR and translation models...") |
| 57 | +source_lang = "en" |
| 58 | +target_lang = "fr" |
| 59 | +reader = easyocr.Reader([source_lang, target_lang], model_storage_directory='model') |
| 60 | +translator = GoogleTranslator(source="en", target="fr") |
| 61 | + |
| 62 | +def main(): |
| 63 | + input_folder = "ExportedImages" |
| 64 | + output_folder = "TranslatedImages" |
| 65 | + choice = input("Do you want to process images one by one or process multiple images? (Enter 1 for one by one or 2 for multiple files same time): ").strip().lower() |
| 66 | + if choice not in ["1", "2"]: |
| 67 | + print("[ERROR] Invalid choice. Please Enter 1 for one by one or 2 for multiple for multiple files same time.") |
| 68 | + return |
| 69 | + |
| 70 | + if choice == "1": |
| 71 | + for filename in os.listdir(input_folder): |
| 72 | + if filename.lower().endswith((".jpg", ".jpeg", ".png")): |
| 73 | + process_image(filename, input_folder, output_folder, reader, translator) |
| 74 | + return |
| 75 | + |
| 76 | + if not os.path.exists(output_folder): |
| 77 | + os.makedirs(output_folder) |
| 78 | + |
| 79 | + if choice == "2": |
| 80 | + num_workers = os.cpu_count() or 1 |
| 81 | + with ThreadPoolExecutor(max_workers=num_workers) as executor: |
| 82 | + futures = [executor.submit(process_image, filename, input_folder, output_folder, reader, translator) |
| 83 | + for filename in os.listdir(input_folder) |
| 84 | + if filename.lower().endswith((".jpg", ".jpeg", ".png"))] |
| 85 | + |
| 86 | + total_files = len(futures) |
| 87 | + for i, future in enumerate(futures): |
| 88 | + try: |
| 89 | + future.result() |
| 90 | + except Exception as e: |
| 91 | + print(f"[ERROR] Failed to process {futures[i].filename}: {e}") |
| 92 | + # Uncomment the following lines to show progress |
| 93 | + # progress = (i + 1) / total_files * 100 |
| 94 | + # print(f"[INFO] Progress: {progress:.2f}%") |
| 95 | + |
| 96 | +def perform_ocr(image_path, reader): |
| 97 | + # Perform OCR on the image |
| 98 | + result = reader.readtext(image_path, width_ths = 0.8, decoder = 'wordbeamsearch') |
| 99 | + |
| 100 | + # Extract text and bounding boxes from the OCR result |
| 101 | + extracted_text_boxes = [(entry[0], entry[1]) for entry in result if entry[2] > 0.4] |
| 102 | + |
| 103 | + return extracted_text_boxes |
| 104 | + |
| 105 | +def get_font(image, text, width, height): |
| 106 | + |
| 107 | + # Default values at start |
| 108 | + font_size = None # For font size |
| 109 | + font = None # For object truetype with correct font size |
| 110 | + box = None # For version 8.0.0 |
| 111 | + x = 0 |
| 112 | + y = 0 |
| 113 | + |
| 114 | + draw = ImageDraw.Draw(image) # Create a draw object |
| 115 | + |
| 116 | + # Test for different font sizes |
| 117 | + for size in range(1, 500): |
| 118 | + |
| 119 | + # Create new font |
| 120 | + new_font = ImageFont.truetype("DejaVuSans-Bold.ttf", size=size) |
| 121 | + |
| 122 | + # Calculate bbox for version 8.0.0 |
| 123 | + new_box = draw.textbbox((0, 0), text, font=new_font) |
| 124 | + |
| 125 | + # Calculate width and height |
| 126 | + new_w = new_box[2] - new_box[0] # Bottom - Top |
| 127 | + new_h = new_box[3] - new_box[1] # Right - Left |
| 128 | + |
| 129 | + # If too big then exit with previous values |
| 130 | + if new_w > width or new_h > height: |
| 131 | + break |
| 132 | + |
| 133 | + # Set new current values as current values |
| 134 | + font_size = size |
| 135 | + font = new_font |
| 136 | + box = new_box |
| 137 | + w = new_w |
| 138 | + h = new_h |
| 139 | + |
| 140 | + # Calculate position (minus margins in box) |
| 141 | + x = (width - w) // 2 - box[0] # Minus left margin |
| 142 | + y = (height - h) // 2 - box[1] # Minus top margin |
| 143 | + |
| 144 | + return font, x, y |
| 145 | + |
| 146 | +def adjust_color_brightness(color, strength): |
| 147 | + r, g, b = color |
| 148 | + r = max(0, min(255, r + strength)) |
| 149 | + g = max(0, min(255, g + strength)) |
| 150 | + b = max(0, min(255, b + strength)) |
| 151 | + return (r, g, b) |
| 152 | + |
| 153 | +def extract_background_color(image, x_min, y_min, x_max, y_max): |
| 154 | + margin = 10 |
| 155 | + region = image.crop(( |
| 156 | + max(x_min - margin, 0), |
| 157 | + max(y_min - margin, 0), |
| 158 | + min(x_max + margin, image.width), |
| 159 | + min(y_max + margin, image.height), |
| 160 | + )) |
| 161 | + edge_colors = region.getcolors(region.size[0] * region.size[1]) |
| 162 | + background_color = max(edge_colors, key=lambda x: x[0])[1] |
| 163 | + return adjust_color_brightness(background_color, 40) |
| 164 | + |
| 165 | +def determine_text_color(background_color): |
| 166 | + # Calculate the luminance of the background color |
| 167 | + luminance = ( |
| 168 | + 0.299 * background_color[0] |
| 169 | + + 0.587 * background_color[1] |
| 170 | + + 0.114 * background_color[2] |
| 171 | + ) / 255 |
| 172 | + |
| 173 | + # Determine the text color based on the background luminance |
| 174 | + if luminance > 0.5: |
| 175 | + return "black" # Use black text for light backgrounds |
| 176 | + else: |
| 177 | + return "white" # Use white text for dark backgrounds |
| 178 | + |
| 179 | +def overlay_translated_text(image_path, translated_texts, text_boxes): |
| 180 | + # Open the image |
| 181 | + image = Image.open(image_path) |
| 182 | + draw = ImageDraw.Draw(image) |
| 183 | + |
| 184 | + # Load a font that supports French accent marks |
| 185 | + font = ImageFont.truetype("DejaVuSans-Bold.ttf", size=20) |
| 186 | + |
| 187 | + # Replace each text box with translated text |
| 188 | + for text_box, translated in zip(text_boxes, translated_texts): |
| 189 | + |
| 190 | + if translated is None: |
| 191 | + continue |
| 192 | + |
| 193 | + # Set initial values |
| 194 | + x_min, y_min = text_box[0][0][0], text_box[0][0][1] |
| 195 | + x_max, y_max = text_box[0][0][0], text_box[0][0][1] |
| 196 | + |
| 197 | + for coordinate in text_box[0]: |
| 198 | + |
| 199 | + x, y = coordinate |
| 200 | + |
| 201 | + if x < x_min: |
| 202 | + x_min = x |
| 203 | + elif x > x_max: |
| 204 | + x_max = x |
| 205 | + if y < y_min: |
| 206 | + y_min = y |
| 207 | + elif y > y_max: |
| 208 | + y_max = y |
| 209 | + |
| 210 | + # Find the most common color in the text region |
| 211 | + background_color = extract_background_color(image, x_min, y_min, x_max, y_max) |
| 212 | + |
| 213 | + # Draw a rectangle to cover the text region with the original background color |
| 214 | + draw.rectangle(((x_min, y_min), (x_max, y_max)), fill=background_color) |
| 215 | + |
| 216 | + # Calculate font size, box |
| 217 | + font, x, y = get_font(image, translated, x_max - x_min, y_max - y_min) |
| 218 | + |
| 219 | + # Draw the translated text within the box |
| 220 | + draw.text( |
| 221 | + (x_min + x, y_min + y), |
| 222 | + translated, |
| 223 | + fill=determine_text_color(background_color), |
| 224 | + font=font, |
| 225 | + ) |
| 226 | + |
| 227 | + return image |
| 228 | + |
| 229 | +start_time = time.time() |
| 230 | + |
| 231 | +if __name__ == "__main__": |
| 232 | + main() |
| 233 | + end_time = time.time() |
| 234 | + elapsed_time = end_time - start_time |
| 235 | + elapsed_minutes = elapsed_time / 60 |
| 236 | + print(f"[INFO] Image processing completed in {elapsed_minutes:.2f} minutes.") |
| 237 | + print("[INFO] Please check the 'TranslatedImages' folder for the processed images.") |
| 238 | + print("[INFO] Thank you for using the image processing script!") |
| 239 | + |
0 commit comments