Skip to content

Surya Recognizer #4

@titipata

Description

@titipata

ตัวอย่างการใช้ผลจาก YOLO dectector ดึงส่วนของภาพออกมาและใช้ surya-ocr ในการอ่านข้อความ

from pathlib import Path
from ultralytics import YOLO

checkpoint = Path("best_detection_yolo.pt").resolve()
# Load a model from a checkpoint
model = YOLO(checkpoint)
results = model.predict(images, imgsz=640, conf=0.25, half=True)
from surya.recognition import batch_recognition
from surya.model.recognition.model import load_model as load_recognizer
from surya.model.recognition.processor import load_processor as load_recognizer_processor

recognizer = load_recognizer()
recognizer_processor = load_recognizer_processor()

# Process each YOLO result
padding = 0
images_for_ocr = []
languages_for_ocr = []
for idx, result in enumerate(results):
    orig_img = result.orig_img
    if result.boxes is not None:
        boxes = result.boxes.xyxy.cpu().numpy()
        confidences = result.boxes.conf.cpu().numpy()
        for box, conf in zip(boxes, confidences):
            # Extract coordinates with padding
            x1, y1, x2, y2 = map(int, box)
            x1 = max(0, x1 - padding)
            y1 = max(0, y1 - padding)
            x2 = min(orig_img.shape[1], x2 + padding)
            y2 = min(orig_img.shape[0], y2 + padding)
            region = orig_img[y1:y2, x1:x2]
            pil_region = Image.fromarray(region)
            images_for_ocr.append(pil_region)
languages_for_ocr = [["th", "en"]] * len(images_for_ocr)

text_results, confidence_scores = batch_recognition(
    images=images_for_ocr,
    languages=languages_for_ocr,
    model=recognizer,
    processor=recognizer_processor
)

Metadata

Metadata

Assignees

No one assigned

    Labels

    documentationImprovements or additions to documentation

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions