Skip to content

Commit a37ae5d

Browse files
authored
🐛 fix for null objects in extras (#313)
1 parent b006fd5 commit a37ae5d

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

mindee/parsing/common/document.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,18 @@ def __str__(self) -> str:
6262
def _inject_full_text_ocr(self, raw_prediction: StringDict) -> None:
6363
pages = raw_prediction.get("inference", {}).get("pages", [])
6464

65+
# check for: empty, missing, or null
6566
if (
6667
not pages
67-
or "extras" not in pages[0]
68-
or "full_text_ocr" not in pages[0]["extras"]
68+
or not pages[0].get("extras", None)
69+
or not pages[0]["extras"].get("full_text_ocr", None)
6970
):
7071
return
7172

7273
full_text_content = "\n".join(
7374
page["extras"]["full_text_ocr"]["content"]
7475
for page in pages
75-
if "extras" in page and "full_text_ocr" in page["extras"]
76+
if page.get("extras", None) and page["extras"].get("full_text_ocr", None)
7677
)
7778

7879
artificial_text_obj = {"content": full_text_content}

0 commit comments

Comments
 (0)