Skip to content

Commit 00e8f47

Browse files
♻️ update error handling (#222)
1 parent ed26a86 commit 00e8f47

File tree

10 files changed

+187
-39
lines changed

10 files changed

+187
-39
lines changed

mindee/client.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@
1414
UrlInputSource,
1515
)
1616
from mindee.logger import logger
17+
from mindee.mindee_http import (
18+
clean_request_json,
19+
is_valid_async_response,
20+
is_valid_sync_response,
21+
)
1722
from mindee.mindee_http.endpoint import CustomEndpoint, Endpoint
1823
from mindee.mindee_http.mindee_api import MindeeApi
1924
from mindee.parsing.common.async_predict_response import AsyncPredictResponse
@@ -312,11 +317,11 @@ def send_feedback(
312317
endpoint = self._initialize_ots_endpoint(product_class)
313318

314319
feedback_response = endpoint.document_feedback_req_put(document_id, feedback)
315-
if not feedback_response.ok:
320+
if not is_valid_sync_response(feedback_response):
321+
feedback_clean_response = clean_request_json(feedback_response)
316322
raise handle_error(
317323
str(product_class.endpoint_name),
318-
feedback_response.json(),
319-
feedback_response.status_code,
324+
feedback_clean_response,
320325
)
321326

322327
return FeedbackResponse(feedback_response.json())
@@ -336,11 +341,11 @@ def _make_request(
336341

337342
dict_response = response.json()
338343

339-
if not response.ok:
344+
if not is_valid_sync_response(response):
345+
clean_response = clean_request_json(response)
340346
raise handle_error(
341347
str(product_class.endpoint_name),
342-
dict_response,
343-
response.status_code,
348+
clean_response,
344349
)
345350

346351
return PredictResponse(product_class, dict_response)
@@ -365,11 +370,11 @@ def _predict_async(
365370

366371
dict_response = response.json()
367372

368-
if not response.ok:
373+
if not is_valid_async_response(response):
374+
clean_response = clean_request_json(response)
369375
raise handle_error(
370376
str(product_class.endpoint_name),
371-
dict_response,
372-
response.status_code,
377+
clean_response,
373378
)
374379

375380
return AsyncPredictResponse(product_class, dict_response)
@@ -387,16 +392,11 @@ def _get_queued_document(
387392
"""
388393
queue_response = endpoint.document_queue_req_get(queue_id=queue_id)
389394

390-
if (
391-
not queue_response.status_code
392-
or queue_response.status_code < 200
393-
or queue_response.status_code > 302
394-
):
395-
dict_response = queue_response.json()
395+
if not is_valid_async_response(queue_response):
396+
clean_queue_response = clean_request_json(queue_response)
396397
raise handle_error(
397398
str(product_class.endpoint_name),
398-
dict_response,
399-
queue_response.status_code,
399+
clean_queue_response,
400400
)
401401

402402
return AsyncPredictResponse(product_class, queue_response.json())

mindee/error/mindee_http_error.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,14 +88,24 @@ class MindeeHTTPServerError(MindeeHTTPError):
8888
"""API Server HTTP exception."""
8989

9090

91-
def handle_error(url: str, response: StringDict, code: int) -> MindeeHTTPError:
91+
def handle_error(url: str, response: StringDict) -> MindeeHTTPError:
9292
"""
9393
Creates an appropriate HTTP error exception, based on retrieved HTTP error code.
9494
9595
:param url: url of the product
9696
:param response: StringDict
9797
"""
9898
error_obj = create_error_obj(response)
99+
if not isinstance(response, str) and ( # type: ignore
100+
"status_code" in response
101+
and (
102+
isinstance(response["status_code"], int)
103+
or response["status_code"].isdigit()
104+
)
105+
):
106+
code = int(response["status_code"])
107+
else:
108+
code = 500
99109
if 400 <= code <= 499:
100110
return MindeeHTTPClientError(error_obj, url, code)
101111
if 500 <= code <= 599:

mindee/mindee_http/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
from mindee.mindee_http.base_endpoint import BaseEndpoint
22
from mindee.mindee_http.endpoint import CustomEndpoint, Endpoint
33
from mindee.mindee_http.mindee_api import MindeeApi
4+
from mindee.mindee_http.response_validation import (
5+
clean_request_json,
6+
is_valid_async_response,
7+
is_valid_sync_response,
8+
)
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import json
2+
3+
import requests
4+
5+
from mindee.parsing.common import StringDict
6+
7+
8+
def is_valid_sync_response(response: requests.Response) -> bool:
9+
"""
10+
Checks if the synchronous response is valid. Returns True if the response is valid.
11+
12+
:param response: a requests response object.
13+
:return: bool
14+
"""
15+
if not response or not response.ok:
16+
return False
17+
response_json = json.loads(response.content)
18+
# VERY rare edge case where raw html is sent instead of json.
19+
if not isinstance(response_json, dict):
20+
return False
21+
return True
22+
23+
24+
def is_valid_async_response(response: requests.Response) -> bool:
25+
"""
26+
Checks if the asynchronous response is valid. Also checks if it is a valid synchronous response.
27+
28+
Returns True if the response is valid.
29+
30+
:param response: a requests response object.
31+
:return: bool
32+
"""
33+
if not is_valid_sync_response(response):
34+
return False
35+
response_json = json.loads(response.content)
36+
# Checks invalid status codes within the bounds of ok responses.
37+
if response.status_code and (
38+
response.status_code < 200 or response.status_code > 302
39+
):
40+
return False
41+
# Async errors.
42+
if "job" not in response_json:
43+
return False
44+
if (
45+
"job" in response_json
46+
and "error" in response_json["job"]
47+
and response_json["job"]["error"]
48+
):
49+
return False
50+
return True
51+
52+
53+
def clean_request_json(response: requests.Response) -> StringDict:
54+
"""
55+
Checks and correct the response error format depending on the two possible kind of returns.
56+
57+
:param response: Raw request response.
58+
:return: Returns the job error if the error is due to parsing, returns the http error otherwise.
59+
"""
60+
response_json = response.json()
61+
if response.status_code < 200 or response.status_code > 302:
62+
response_json["status_code"] = response.status_code
63+
return response_json
64+
corrected_json = response_json
65+
if (
66+
"status_code" in response_json["api_request"]
67+
and response_json["api_request"]["status_code"].isdigit()
68+
and int(response_json["api_request"]["status_code"]) >= 400
69+
):
70+
corrected_json["status_code"] = int(response_json["api_request"]["status_code"])
71+
if (
72+
"job" in response_json
73+
and "error" in response_json["job"]
74+
and response_json["job"]["error"]
75+
):
76+
corrected_json["error"] = response_json["job"]["error"]
77+
corrected_json["status_code"] = 500
78+
return corrected_json

mindee/parsing/common/async_predict_response.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ def __init__(
2626
Inherits and instantiates a normal PredictResponse if the parsing of
2727
the current queue is both requested and done.
2828
29-
:param input_source: Input object
30-
:param raw_response: json response from HTTP call
29+
:param inference_type: Type of the inference.
30+
:param raw_response: json response from HTTP call.
3131
"""
3232
super().__init__(raw_response)
3333
self.job = Job(raw_response["job"])

mindee/parsing/common/job.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from datetime import datetime
33
from typing import Optional
44

5+
from mindee.parsing.common.string_dict import StringDict
6+
57

68
class Job:
79
"""
@@ -12,6 +14,8 @@ class Job:
1214

1315
id: str
1416
"""ID of the job sent by the API in response to an enqueue request."""
17+
error: Optional[StringDict]
18+
"""Information about an error that occurred during the job processing."""
1519
issued_at: datetime
1620
"""Timestamp of the request reception by the API."""
1721
available_at: Optional[datetime]
@@ -33,6 +37,10 @@ def __init__(self, json_response: dict) -> None:
3337
else:
3438
self.available_at = None
3539
self.id = json_response["id"]
40+
if json_response.get("status_code"):
41+
self.status_code = json_response["status_code"]
42+
if json_response.get("error"):
43+
self.error = json_response.get("error")
3644
self.status = json_response["status"]
3745
if self.available_at:
3846
self.millisecs_taken = int(

mindee/parsing/common/predict_response.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ def __init__(
2121
"""
2222
Container for the raw API response and the parsed document.
2323
24-
:param input_source: Input object
25-
:param http_response: json response from HTTP call
24+
:param inference_type: Type of the inference.
25+
:param raw_response: json response from HTTP call.
2626
"""
2727
super().__init__(raw_response)
2828
self.document = Document(inference_type, raw_response["document"])

tests/api/test_async_response.py

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,41 @@
11
import json
22

33
import pytest
4+
import requests
45

56
from mindee.client import Client
67
from mindee.input.sources import PathInput
8+
from mindee.mindee_http.response_validation import is_valid_async_response
9+
from mindee.parsing.common.api_request import RequestStatus
710
from mindee.parsing.common.async_predict_response import AsyncPredictResponse
811
from mindee.product.invoice_splitter import InvoiceSplitterV1
912

1013
ASYNC_DIR = "./tests/data/async"
1114

12-
FILE_PATH_POST_SUCCESS = f"{ ASYNC_DIR }/post_success.json"
13-
FILE_PATH_POST_FAIL = f"{ ASYNC_DIR }/post_fail_forbidden.json"
14-
FILE_PATH_GET_PROCESSING = f"{ ASYNC_DIR }/get_processing.json"
15-
FILE_PATH_GET_COMPLETED = f"{ ASYNC_DIR }/get_completed.json"
15+
FILE_PATH_POST_SUCCESS = f"{ASYNC_DIR}/post_success.json"
16+
FILE_PATH_POST_FAIL = f"{ASYNC_DIR}/post_fail_forbidden.json"
17+
FILE_PATH_GET_PROCESSING = f"{ASYNC_DIR}/get_processing.json"
18+
FILE_PATH_GET_COMPLETED = f"{ASYNC_DIR}/get_completed.json"
19+
FILE_PATH_GET_FAILED_JOB = f"{ASYNC_DIR}/get_failed_job_error.json"
20+
21+
22+
class FakeResponse(requests.Response):
23+
def __init__(self, json_data, _status_code=200):
24+
super().__init__()
25+
self._json_data = json_data
26+
self.status_code = _status_code
27+
self._ok = True
28+
29+
def set_ok_status(self, ok_status):
30+
self._ok = ok_status
31+
32+
@property
33+
def ok(self):
34+
return self._ok
35+
36+
@property
37+
def content(self) -> str:
38+
return json.dumps(self._json_data)
1639

1740

1841
@pytest.fixture
@@ -29,6 +52,9 @@ def dummy_client() -> Client:
2952
def test_async_response_post_success():
3053
response = json.load(open(FILE_PATH_POST_SUCCESS))
3154
parsed_response = AsyncPredictResponse(InvoiceSplitterV1, response)
55+
fake_response = FakeResponse(response)
56+
fake_response.set_ok_status(True)
57+
assert is_valid_async_response(fake_response) is True
3258
assert parsed_response.job is not None
3359
assert (
3460
parsed_response.job.issued_at.isoformat() == "2023-02-16T12:33:49.602947+00:00"
@@ -41,18 +67,17 @@ def test_async_response_post_success():
4167

4268
def test_async_response_post_fail():
4369
response = json.load(open(FILE_PATH_POST_FAIL))
44-
parsed_response = AsyncPredictResponse(InvoiceSplitterV1, response)
45-
assert parsed_response.job is not None
46-
assert parsed_response.job.issued_at.isoformat() == "2023-01-01T00:00:00+00:00"
47-
assert parsed_response.job.available_at is None
48-
assert parsed_response.job.status is None
49-
assert parsed_response.job.id is None
50-
assert parsed_response.api_request.error["code"] == "Forbidden"
70+
fake_response = FakeResponse(response)
71+
fake_response.set_ok_status(False)
72+
assert is_valid_async_response(fake_response) is False
5173

5274

5375
def test_async_get_processing():
5476
response = json.load(open(FILE_PATH_GET_PROCESSING))
5577
parsed_response = AsyncPredictResponse(InvoiceSplitterV1, response)
78+
fake_response = FakeResponse(response)
79+
fake_response.set_ok_status(True)
80+
assert is_valid_async_response(fake_response) is True
5681
assert parsed_response.job is not None
5782
assert parsed_response.job.issued_at.isoformat() == "2023-03-16T12:33:49.602947"
5883
assert parsed_response.job.available_at is None
@@ -64,8 +89,25 @@ def test_async_get_processing():
6489
def test_async_response_get_completed():
6590
response = json.load(open(FILE_PATH_GET_COMPLETED))
6691
parsed_response = AsyncPredictResponse(InvoiceSplitterV1, response)
92+
fake_response = FakeResponse(response)
93+
fake_response.set_ok_status(True)
94+
assert is_valid_async_response(fake_response) is True
6795
assert parsed_response.job is not None
6896
assert parsed_response.job.issued_at.isoformat() == "2023-03-21T13:52:56.326107"
6997
assert parsed_response.job.available_at.isoformat() == "2023-03-21T13:53:00.990339"
7098
assert parsed_response.job.status == "completed"
7199
assert parsed_response.api_request.error == {}
100+
101+
102+
def test_async_get_failed_job():
103+
response = json.load(open(FILE_PATH_GET_FAILED_JOB))
104+
parsed_response = AsyncPredictResponse(InvoiceSplitterV1, response)
105+
fake_response = FakeResponse(response)
106+
fake_response.set_ok_status(False)
107+
assert is_valid_async_response(fake_response) is False
108+
assert parsed_response.api_request.status == RequestStatus.SUCCESS
109+
assert parsed_response.api_request.status_code == 200
110+
assert parsed_response.job.issued_at.isoformat() == "2024-02-20T10:31:06.878599"
111+
assert parsed_response.job.available_at.isoformat() == "2024-02-20T10:31:06.878599"
112+
assert parsed_response.job.status == "failed"
113+
assert parsed_response.job.error["code"] == "ServerError"

0 commit comments

Comments
 (0)