From 3151aaffd56b7c0d604ad00068ccf7b3b287d109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= Date: Thu, 27 Mar 2025 15:55:52 +0100 Subject: [PATCH] :bug: fix for null objects in extras --- mindee/parsing/common/document.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mindee/parsing/common/document.py b/mindee/parsing/common/document.py index 3b4ba7c3..cb0af6a3 100644 --- a/mindee/parsing/common/document.py +++ b/mindee/parsing/common/document.py @@ -62,17 +62,18 @@ def __str__(self) -> str: def _inject_full_text_ocr(self, raw_prediction: StringDict) -> None: pages = raw_prediction.get("inference", {}).get("pages", []) + # check for: empty, missing, or null if ( not pages - or "extras" not in pages[0] - or "full_text_ocr" not in pages[0]["extras"] + or not pages[0].get("extras", None) + or not pages[0]["extras"].get("full_text_ocr", None) ): return full_text_content = "\n".join( page["extras"]["full_text_ocr"]["content"] for page in pages - if "extras" in page and "full_text_ocr" in page["extras"] + if page.get("extras", None) and page["extras"].get("full_text_ocr", None) ) artificial_text_obj = {"content": full_text_content}