|
| 1 | +from PIL import Image, ImageDraw, ImageFont |
| 2 | +from deep_translator import GoogleTranslator |
| 3 | +import os, easyocr |
| 4 | + |
| 5 | + |
| 6 | +def perform_ocr(image_path, reader): |
| 7 | + # Perform OCR on the image |
| 8 | + result = reader.readtext(image_path, width_ths = 0.8, decoder = 'wordbeamsearch') |
| 9 | + |
| 10 | + # Extract text and bounding boxes from the OCR result |
| 11 | + extracted_text_boxes = [(entry[0], entry[1]) for entry in result if entry[2] > 0.4] |
| 12 | + |
| 13 | + return extracted_text_boxes |
| 14 | + |
| 15 | + |
| 16 | +def get_font(image, text, width, height): |
| 17 | + |
| 18 | + # Default values at start |
| 19 | + font_size = None # For font size |
| 20 | + font = None # For object truetype with correct font size |
| 21 | + box = None # For version 8.0.0 |
| 22 | + x = 0 |
| 23 | + y = 0 |
| 24 | + |
| 25 | + draw = ImageDraw.Draw(image) # Create a draw object |
| 26 | + |
| 27 | + # Test for different font sizes |
| 28 | + for size in range(1, 500): |
| 29 | + |
| 30 | + # Create new font |
| 31 | + new_font = ImageFont.load_default(size=font_size) |
| 32 | + |
| 33 | + # Calculate bbox for version 8.0.0 |
| 34 | + new_box = draw.textbbox((0, 0), text, font=new_font) |
| 35 | + |
| 36 | + # Calculate width and height |
| 37 | + new_w = new_box[2] - new_box[0] # Bottom - Top |
| 38 | + new_h = new_box[3] - new_box[1] # Right - Left |
| 39 | + |
| 40 | + # If too big then exit with previous values |
| 41 | + if new_w > width or new_h > height: |
| 42 | + break |
| 43 | + |
| 44 | + # Set new current values as current values |
| 45 | + font_size = size |
| 46 | + font = new_font |
| 47 | + box = new_box |
| 48 | + w = new_w |
| 49 | + h = new_h |
| 50 | + |
| 51 | + # Calculate position (minus margins in box) |
| 52 | + x = (width - w) // 2 - box[0] # Minus left margin |
| 53 | + y = (height - h) // 2 - box[1] # Minus top margin |
| 54 | + |
| 55 | + return font, x, y |
| 56 | + |
| 57 | + |
| 58 | +def add_discoloration(color, strength): |
| 59 | + # Adjust RGB values to add discoloration |
| 60 | + r, g, b = color |
| 61 | + r = max(0, min(255, r + strength)) # Ensure RGB values are within valid range |
| 62 | + g = max(0, min(255, g + strength)) |
| 63 | + b = max(0, min(255, b + strength)) |
| 64 | + |
| 65 | + if r == 255 and g == 255 and b == 255: |
| 66 | + r, g, b = 245, 245, 245 |
| 67 | + |
| 68 | + return (r, g, b) |
| 69 | + |
| 70 | + |
| 71 | +def get_background_color(image, x_min, y_min, x_max, y_max): |
| 72 | + # Define the margin for the edges |
| 73 | + margin = 10 |
| 74 | + |
| 75 | + # Crop a small region around the edges of the bounding box |
| 76 | + edge_region = image.crop( |
| 77 | + ( |
| 78 | + max(x_min - margin, 0), |
| 79 | + max(y_min - margin, 0), |
| 80 | + min(x_max + margin, image.width), |
| 81 | + min(y_max + margin, image.height), |
| 82 | + ) |
| 83 | + ) |
| 84 | + |
| 85 | + # Find the most common color in the cropped region |
| 86 | + edge_colors = edge_region.getcolors(edge_region.size[0] * edge_region.size[1]) |
| 87 | + background_color = max(edge_colors, key=lambda x: x[0])[1] |
| 88 | + |
| 89 | + # Add a bit of discoloration to the background color |
| 90 | + background_color = add_discoloration(background_color, 40) |
| 91 | + |
| 92 | + return background_color |
| 93 | + |
| 94 | + |
| 95 | +def get_text_fill_color(background_color): |
| 96 | + # Calculate the luminance of the background color |
| 97 | + luminance = ( |
| 98 | + 0.299 * background_color[0] |
| 99 | + + 0.587 * background_color[1] |
| 100 | + + 0.114 * background_color[2] |
| 101 | + ) / 255 |
| 102 | + |
| 103 | + # Determine the text color based on the background luminance |
| 104 | + if luminance > 0.5: |
| 105 | + return "black" # Use black text for light backgrounds |
| 106 | + else: |
| 107 | + return "white" # Use white text for dark backgrounds |
| 108 | + |
| 109 | + |
| 110 | +def replace_text_with_translation(image_path, translated_texts, text_boxes): |
| 111 | + # Open the image |
| 112 | + image = Image.open(image_path) |
| 113 | + draw = ImageDraw.Draw(image) |
| 114 | + |
| 115 | + # Load a font |
| 116 | + font = ImageFont.load_default() |
| 117 | + |
| 118 | + # Replace each text box with translated text |
| 119 | + for text_box, translated in zip(text_boxes, translated_texts): |
| 120 | + |
| 121 | + if translated is None: |
| 122 | + continue |
| 123 | + |
| 124 | + # Set initial values |
| 125 | + x_min, y_min = text_box[0][0][0], text_box[0][0][1] |
| 126 | + x_max, y_max = text_box[0][0][0], text_box[0][0][1] |
| 127 | + |
| 128 | + for coordinate in text_box[0]: |
| 129 | + |
| 130 | + x, y = coordinate |
| 131 | + |
| 132 | + if x < x_min: |
| 133 | + x_min = x |
| 134 | + elif x > x_max: |
| 135 | + x_max = x |
| 136 | + if y < y_min: |
| 137 | + y_min = y |
| 138 | + elif y > y_max: |
| 139 | + y_max = y |
| 140 | + |
| 141 | + # Find the most common color in the text region |
| 142 | + background_color = get_background_color(image, x_min, y_min, x_max, y_max) |
| 143 | + |
| 144 | + # Draw a rectangle to cover the text region with the original background color |
| 145 | + draw.rectangle(((x_min, y_min), (x_max, y_max)), fill=background_color) |
| 146 | + |
| 147 | + # Calculate font size, box |
| 148 | + font, x, y = get_font(image, translated, x_max - x_min, y_max - y_min) |
| 149 | + |
| 150 | + # Draw the translated text within the box |
| 151 | + draw.text( |
| 152 | + (x_min + x, y_min + y), |
| 153 | + translated, |
| 154 | + fill=get_text_fill_color(background_color), |
| 155 | + font=font, |
| 156 | + ) |
| 157 | + |
| 158 | + return image |
| 159 | + |
| 160 | + |
| 161 | +# Initialize the OCR reader |
| 162 | +reader = easyocr.Reader(["ch_sim", "en"], model_storage_directory = 'model') |
| 163 | + |
| 164 | +# Initialize the Translator |
| 165 | +translator = GoogleTranslator(source="zh-CN", target="en") |
| 166 | + |
| 167 | +# Define input and output location |
| 168 | +input_folder = "input" |
| 169 | +output_folder = "output" |
| 170 | + |
| 171 | +# Process each image file from input |
| 172 | +files = os.listdir(input_folder) |
| 173 | +image_files = [file for file in files if file.endswith((".jpg", ".jpeg", ".png"))] |
| 174 | +for filename in image_files: |
| 175 | + |
| 176 | + print(f'[INFO] Processing {filename}...') |
| 177 | + |
| 178 | + image_path = os.path.join(input_folder, filename) |
| 179 | + |
| 180 | + # Extract text and location |
| 181 | + extracted_text_boxes = perform_ocr(image_path, reader) |
| 182 | + |
| 183 | + # Translate texts |
| 184 | + translated_texts = [] |
| 185 | + for text_box, text in extracted_text_boxes: |
| 186 | + translated_texts.append(translator.translate(text)) |
| 187 | + |
| 188 | + # Replace text with translated text |
| 189 | + image = replace_text_with_translation(image_path, translated_texts, extracted_text_boxes) |
| 190 | + |
| 191 | + # Save modified image |
| 192 | + base_filename, extension = os.path.splitext(filename) |
| 193 | + output_filename = f"{base_filename}-translated{extension}" |
| 194 | + output_path = os.path.join(output_folder, output_filename) |
| 195 | + image.save(output_path) |
| 196 | + |
| 197 | + print(f'[INFO] Saved as {output_filename}...') |
0 commit comments