-
Notifications
You must be signed in to change notification settings - Fork 3
Open
Labels
documentationImprovements or additions to documentationImprovements or additions to documentation
Description
ตัวอย่างการใช้ผลจาก YOLO dectector ดึงส่วนของภาพออกมาและใช้ surya-ocr ในการอ่านข้อความ
from pathlib import Path
from ultralytics import YOLO
checkpoint = Path("best_detection_yolo.pt").resolve()
# Load a model from a checkpoint
model = YOLO(checkpoint)
results = model.predict(images, imgsz=640, conf=0.25, half=True)from surya.recognition import batch_recognition
from surya.model.recognition.model import load_model as load_recognizer
from surya.model.recognition.processor import load_processor as load_recognizer_processor
recognizer = load_recognizer()
recognizer_processor = load_recognizer_processor()
# Process each YOLO result
padding = 0
images_for_ocr = []
languages_for_ocr = []
for idx, result in enumerate(results):
orig_img = result.orig_img
if result.boxes is not None:
boxes = result.boxes.xyxy.cpu().numpy()
confidences = result.boxes.conf.cpu().numpy()
for box, conf in zip(boxes, confidences):
# Extract coordinates with padding
x1, y1, x2, y2 = map(int, box)
x1 = max(0, x1 - padding)
y1 = max(0, y1 - padding)
x2 = min(orig_img.shape[1], x2 + padding)
y2 = min(orig_img.shape[0], y2 + padding)
region = orig_img[y1:y2, x1:x2]
pil_region = Image.fromarray(region)
images_for_ocr.append(pil_region)
languages_for_ocr = [["th", "en"]] * len(images_for_ocr)
text_results, confidence_scores = batch_recognition(
images=images_for_ocr,
languages=languages_for_ocr,
model=recognizer,
processor=recognizer_processor
)Metadata
Metadata
Assignees
Labels
documentationImprovements or additions to documentationImprovements or additions to documentation