Closed
Description
Our entire receipt parsing system is currently down, so excuse the terse error report, but I will quickly post this here while I try to work around the issue on our side. I will come back and clean up this issue as soon as I have extinguished some fires on our end.
Here is (a really simplified) example for how we use the Mindee SDK:
from __future__ import annotations
from typing import BinaryIO
from mindee import Client
from mindee.product import InvoiceV4
from some_place import MINDEE_API_KEY
def parse(file: BinaryIO):
client = Client(api_key=MINDEE_API_KEY)
input_source = client.source_from_bytes(
input_bytes=file.read(),
filename=file.name,
)
file.seek(0)
return client.parse( # <-- This fails
product_class=InvoiceV4,
input_source=input_source,
)
Here is a screenshot that shows Mindee's JSON API response:
This response is not gracefully handled by the Python SDK client for mindee:
File ~/crdbrd/hub/src/hub/receipts/mindee.py:108, in parse(file, media_type)
103 raise exceptions.ReceiptParsingError from exc
105 file.seek(0)
106 parser_result = cast(
107 PredictResponse[InvoiceV4],
--> 108 client.parse( # pyright: ignore[reportUnknownMemberType]
109 product_class=InvoiceV4,
110 input_source=input_source,
111 ),
112 )
113 document = cast(
114 Document[InvoiceV4Document, Page[InvoiceV4Document]] | None,
115 parser_result.document,
116 )
117 if document is None:
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/client.py:126, in Client.parse(self, product_class, input_source, include_words, close_file, page_options, cropper, endpoint, full_text)
120 if page_options and input_source.is_pdf():
121 input_source.process_pdf(
122 page_options.operation,
123 page_options.on_min_pages,
124 page_options.page_indexes,
125 )
--> 126 return self._make_request(
127 product_class,
128 input_source,
129 endpoint,
130 include_words,
131 close_file,
132 cropper,
133 full_text,
134 )
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/client.py:427, in Client._make_request(self, product_class, input_source, endpoint, include_words, close_file, cropper, full_text)
421 clean_response = clean_request_json(response)
422 raise handle_error(
423 str(product_class.endpoint_name),
424 clean_response,
425 )
--> 427 return PredictResponse(product_class, dict_response)
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/parsing/common/predict_response.py:28, in PredictResponse.__init__(self, inference_type, raw_response)
21 """
22 Container for the raw API response and the parsed document.
23
24 :param inference_type: Type of the inference.
25 :param raw_response: json response from HTTP call.
26 """
27 super().__init__(raw_response)
---> 28 self.document = Document(inference_type, raw_response["document"])
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/parsing/common/document.py:50, in Document.__init__(self, inference_type, raw_response)
48 if "extras" in raw_response and raw_response["inference"]["extras"]:
49 self.extras = Extras(raw_response["extras"])
---> 50 self._inject_full_text_ocr(raw_response)
51 self.inference = inference_type(raw_response["inference"])
52 self.n_pages = raw_response["n_pages"]
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/parsing/common/document.py:72, in Document._inject_full_text_ocr(self, raw_prediction)
65 if (
66 not pages
67 or "extras" not in pages[0]
68 or "full_text_ocr" not in pages[0]["extras"]
69 ):
70 return
---> 72 full_text_content = "\n".join(
73 page["extras"]["full_text_ocr"]["content"]
74 for page in pages
75 if "extras" in page and "full_text_ocr" in page["extras"]
76 )
78 artificial_text_obj = {"content": full_text_content}
80 if not hasattr(self, "extras") or not self.extras:
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/parsing/common/document.py:73, in <genexpr>(.0)
65 if (
66 not pages
67 or "extras" not in pages[0]
68 or "full_text_ocr" not in pages[0]["extras"]
69 ):
70 return
72 full_text_content = "\n".join(
---> 73 page["extras"]["full_text_ocr"]["content"]
74 for page in pages
75 if "extras" in page and "full_text_ocr" in page["extras"]
76 )
78 artificial_text_obj = {"content": full_text_content}
80 if not hasattr(self, "extras") or not self.extras:
TypeError: 'NoneType' object is not subscriptable
These keys should not have been inserted in the response or the client should check for None
before processing the data.
Metadata
Metadata
Assignees
Labels
No labels