From ce2c77513697ebf4efbfcba10288c725648edfcb Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Wed, 22 May 2024 11:03:17 +0200 Subject: [PATCH 1/3] :sparkles: add support for local loading --- mindee/client.py | 18 +++++++++++++ mindee/input/__init__.py | 1 + mindee/input/local_response.py | 29 +++++++++++++++++++++ tests/test_client.py | 46 +++++++++++++++++++++++++++++++--- 4 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 mindee/input/local_response.py diff --git a/mindee/client.py b/mindee/client.py index b438b1ce..27d1680b 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -4,6 +4,7 @@ from mindee.error.mindee_error import MindeeClientError, MindeeError from mindee.error.mindee_http_error import handle_error +from mindee.input import LocalResponse from mindee.input.page_options import PageOptions from mindee.input.sources import ( Base64Input, @@ -178,6 +179,23 @@ def enqueue( cropper, ) + def load_prediction( + self, product_class: Type[Inference], local_response: LocalResponse + ) -> Union[AsyncPredictResponse, PredictResponse]: + """ + Load a prediction. + + :param product_class: Class of the product to use. + :param local_response: Local response to load. + :return: + """ + try: + if local_response.json.get("job"): + return AsyncPredictResponse(product_class, local_response.json) + return PredictResponse(product_class, local_response.json) + except KeyError as exc: + raise MindeeError("No prediction found in local response.") from exc + def parse_queued( self, product_class: Type[Inference], diff --git a/mindee/input/__init__.py b/mindee/input/__init__.py index fde6821d..81744be8 100644 --- a/mindee/input/__init__.py +++ b/mindee/input/__init__.py @@ -1,3 +1,4 @@ +from mindee.input.local_response import LocalResponse from mindee.input.page_options import PageOptions from mindee.input.sources import ( Base64Input, diff --git a/mindee/input/local_response.py b/mindee/input/local_response.py new file mode 100644 index 00000000..612078ea --- /dev/null +++ b/mindee/input/local_response.py @@ -0,0 +1,29 @@ +import json +from pathlib import Path +from typing import Union, BinaryIO, Dict, Any +import io + +from mindee.error import MindeeError + + +class LocalResponse: + json: Dict[str, Any] + + def __init__(self, input_file: Union[BinaryIO, str, Path, bytes]): + input_binary: BinaryIO + if isinstance(input_file, BinaryIO): + input_binary = input_file + input_binary.seek(0) + elif isinstance(input_file, str) or isinstance(input_file, Path): + with open(input_file, 'rb') as f: + input_binary = io.BytesIO(f.read()) + elif isinstance(input_file, bytes): + input_binary = io.BytesIO(input_file) + else: + raise TypeError('Incompatible type for input.') + try: + input_binary.seek(0) + self.json = json.load(input_binary) + input_binary.close() + except json.decoder.JSONDecodeError as exc: + raise MindeeError('File is not a valid dictionary.') from exc diff --git a/tests/test_client.py b/tests/test_client.py index 873f2a27..a3ef0d0d 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -2,13 +2,16 @@ import pytest -from mindee import Client, PageOptions, product -from mindee.error.mindee_error import MindeeClientError +from mindee import AsyncPredictResponse, Client, PageOptions, PredictResponse, product +from mindee.error.mindee_error import MindeeClientError, MindeeError from mindee.error.mindee_http_error import MindeeHTTPError +from mindee.input import LocalResponse from mindee.input.sources import LocalInputSource +from mindee.product import InternationalIdV2, InvoiceV4 from mindee.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 from mindee.product.receipt.receipt_v4 import ReceiptV4 -from tests.test_inputs import FILE_TYPES_DIR +from tests.mindee_http.test_error import ERROR_DATA_DIR +from tests.test_inputs import FILE_TYPES_DIR, PRODUCT_DATA_DIR from tests.utils import clear_envvars, dummy_envvars @@ -113,3 +116,40 @@ def test_async_wrong_polling_delay(dummy_client: Client): input_doc = dummy_client.source_from_path(FILE_TYPES_DIR / "pdf" / "blank.pdf") with pytest.raises(MindeeClientError): dummy_client.enqueue_and_parse(InvoiceSplitterV1, input_doc, delay_sec=0) + + +def test_local_response_from_sync_json(dummy_client: Client): + input_file = LocalResponse( + PRODUCT_DATA_DIR / "invoices" / "response_v4" / "complete.json" + ) + with open(PRODUCT_DATA_DIR / "invoices" / "response_v4" / "summary_full.rst") as f: + reference_doc = f.read() + result = dummy_client.load_prediction(InvoiceV4, input_file) + assert isinstance(result, PredictResponse) + assert str(result.document) == reference_doc + + +def test_local_response_from_async_json(dummy_client: Client): + input_file = LocalResponse( + PRODUCT_DATA_DIR / "international_id" / "response_v2" / "complete.json" + ) + with open( + PRODUCT_DATA_DIR / "international_id" / "response_v2" / "summary_full.rst" + ) as f: + reference_doc = f.read() + result = dummy_client.load_prediction(InternationalIdV2, input_file) + assert isinstance(result, AsyncPredictResponse) + assert str(result.document) == reference_doc + + +def test_local_response_from_invalid_file(dummy_client: Client): + with pytest.raises(MindeeError): + LocalResponse( + PRODUCT_DATA_DIR / "invoices" / "response_v4" / "summary_full.rst" + ) + + +def test_local_response_from_invalid_dict(dummy_client: Client): + input_file = LocalResponse(ERROR_DATA_DIR / "error_400_no_details.json") + with pytest.raises(MindeeError): + dummy_client.load_prediction(InvoiceV4, input_file) From 5964fad6d66182ef3dda6a29d131745abae80f5f Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Fri, 24 May 2024 11:15:34 +0200 Subject: [PATCH 2/3] :sparkles: add support for HMAC signature usage for localresponses --- mindee/client.py | 6 +- mindee/input/local_response.py | 91 +++++++++++++++++++++++++----- tests/Input/test_local_response.py | 54 ++++++++++++++++++ tests/api/test_async_response.py | 13 +++-- tests/test_client.py | 7 ++- 5 files changed, 144 insertions(+), 27 deletions(-) create mode 100644 tests/Input/test_local_response.py diff --git a/mindee/client.py b/mindee/client.py index 27d1680b..7b43689b 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -190,9 +190,9 @@ def load_prediction( :return: """ try: - if local_response.json.get("job"): - return AsyncPredictResponse(product_class, local_response.json) - return PredictResponse(product_class, local_response.json) + if local_response.as_dict.get("job"): + return AsyncPredictResponse(product_class, local_response.as_dict) + return PredictResponse(product_class, local_response.as_dict) except KeyError as exc: raise MindeeError("No prediction found in local response.") from exc diff --git a/mindee/input/local_response.py b/mindee/input/local_response.py index 612078ea..b9017541 100644 --- a/mindee/input/local_response.py +++ b/mindee/input/local_response.py @@ -1,29 +1,90 @@ +import hashlib +import hmac +import io import json from pathlib import Path -from typing import Union, BinaryIO, Dict, Any -import io +from typing import Any, BinaryIO, Dict, Union from mindee.error import MindeeError class LocalResponse: - json: Dict[str, Any] + """Local response loaded from a file.""" + + _file: BinaryIO + """File object of the local response.""" def __init__(self, input_file: Union[BinaryIO, str, Path, bytes]): - input_binary: BinaryIO if isinstance(input_file, BinaryIO): - input_binary = input_file - input_binary.seek(0) - elif isinstance(input_file, str) or isinstance(input_file, Path): - with open(input_file, 'rb') as f: - input_binary = io.BytesIO(f.read()) + self._file = input_file + self._file.seek(0) + elif isinstance(input_file, (str, Path)): + with open(input_file, "r", encoding="utf-8") as file: + self._file = io.BytesIO( + file.read().replace("\r", "").replace("\n", "").encode() + ) elif isinstance(input_file, bytes): - input_binary = io.BytesIO(input_file) + self._file = io.BytesIO(input_file) else: - raise TypeError('Incompatible type for input.') + raise MindeeError("Incompatible type for input.") + + @property + def as_dict(self) -> Dict[str, Any]: + """ + Returns the dictionary representation of the file. + + :return: A json-like dictionary. + """ try: - input_binary.seek(0) - self.json = json.load(input_binary) - input_binary.close() + self._file.seek(0) + out_json = json.loads(self._file.read()) except json.decoder.JSONDecodeError as exc: - raise MindeeError('File is not a valid dictionary.') from exc + raise MindeeError("File is not a valid dictionary.") from exc + return out_json + + @staticmethod + def _process_secret_key( + secret_key: Union[str, bytes, bytearray] + ) -> Union[bytes, bytearray]: + """ + Processes the secret key as a byte array. + + :param secret_key: Secret key, either a string or a byte/byte array. + :return: a byte/byte array secret key. + """ + if isinstance(secret_key, (bytes, bytearray)): + return secret_key + return secret_key.encode("utf-8") + + def get_hmac_signature(self, secret_key: Union[str, bytes, bytearray]): + """ + Returns the hmac signature of the local response, from the secret key provided. + + :param secret_key: Secret key, either a string or a byte/byte array. + :return: The hmac signature of the local response. + """ + algorithm = hashlib.sha256 + + try: + self._file.seek(0) + mac = hmac.new( + LocalResponse._process_secret_key(secret_key), + self._file.read(), + algorithm, + ) + except (TypeError, ValueError) as exc: + raise MindeeError("Could not get HMAC signature from payload.") from exc + + return mac.hexdigest() + + def is_valid_hmac_signature( + self, secret_key: Union[str, bytes, bytearray], signature: str + ): + """ + Checks if the hmac signature of the local response is valid. + + :param secret_key: Secret key, given as a string. + :param signature: + :return: True if the HMAC signature is valid. + """ + return signature == self.get_hmac_signature(secret_key) diff --git a/tests/Input/test_local_response.py b/tests/Input/test_local_response.py new file mode 100644 index 00000000..293fd1ec --- /dev/null +++ b/tests/Input/test_local_response.py @@ -0,0 +1,54 @@ +from pathlib import Path + +import pytest + +from mindee.input import LocalResponse +from tests.api.test_async_response import ASYNC_DIR + + +@pytest.fixture +def dummy_secret_key(): + return "ogNjY44MhvKPGTtVsI8zG82JqWQa68woYQH" + + +@pytest.fixture +def signature(): + return "5ed1673e34421217a5dbfcad905ee62261a3dd66c442f3edd19302072bbf70d0" + + +@pytest.fixture +def file_path(): + return Path(ASYNC_DIR / "get_completed_empty.json") + + +def test_valid_file_local_response(dummy_secret_key, signature, file_path): + local_response = LocalResponse(file_path) + assert local_response._file is not None + assert not local_response.is_valid_hmac_signature( + dummy_secret_key, "invalid signature" + ) + assert signature == local_response.get_hmac_signature(dummy_secret_key) + assert local_response.is_valid_hmac_signature(dummy_secret_key, signature) + + +def test_valid_path_local_response(dummy_secret_key, signature, file_path): + local_response = LocalResponse(file_path) + assert local_response._file is not None + assert not local_response.is_valid_hmac_signature( + dummy_secret_key, "invalid signature" + ) + assert signature == local_response.get_hmac_signature(dummy_secret_key) + assert local_response.is_valid_hmac_signature(dummy_secret_key, signature) + + +def test_valid_bytes_local_response(dummy_secret_key, signature, file_path): + with open(file_path, "r") as f: + str_response = f.read().replace("\r", "").replace("\n", "") + file_bytes = str_response.encode("utf-8") + local_response = LocalResponse(file_bytes) + assert local_response._file is not None + assert not local_response.is_valid_hmac_signature( + dummy_secret_key, "invalid signature" + ) + assert signature == local_response.get_hmac_signature(dummy_secret_key) + assert local_response.is_valid_hmac_signature(dummy_secret_key, signature) diff --git a/tests/api/test_async_response.py b/tests/api/test_async_response.py index 507cb135..dda6715e 100644 --- a/tests/api/test_async_response.py +++ b/tests/api/test_async_response.py @@ -1,4 +1,5 @@ import json +from pathlib import Path import pytest import requests @@ -10,13 +11,13 @@ from mindee.parsing.common.async_predict_response import AsyncPredictResponse from mindee.product.invoice_splitter import InvoiceSplitterV1 -ASYNC_DIR = "./tests/data/async" +ASYNC_DIR = Path("./tests/data/async") -FILE_PATH_POST_SUCCESS = f"{ASYNC_DIR}/post_success.json" -FILE_PATH_POST_FAIL = f"{ASYNC_DIR}/post_fail_forbidden.json" -FILE_PATH_GET_PROCESSING = f"{ASYNC_DIR}/get_processing.json" -FILE_PATH_GET_COMPLETED = f"{ASYNC_DIR}/get_completed.json" -FILE_PATH_GET_FAILED_JOB = f"{ASYNC_DIR}/get_failed_job_error.json" +FILE_PATH_POST_SUCCESS = ASYNC_DIR / "post_success.json" +FILE_PATH_POST_FAIL = ASYNC_DIR / "post_fail_forbidden.json" +FILE_PATH_GET_PROCESSING = ASYNC_DIR / "get_processing.json" +FILE_PATH_GET_COMPLETED = ASYNC_DIR / "get_completed.json" +FILE_PATH_GET_FAILED_JOB = ASYNC_DIR / "get_failed_job_error.json" class FakeResponse(requests.Response): diff --git a/tests/test_client.py b/tests/test_client.py index a3ef0d0d..b00e1684 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -143,10 +143,11 @@ def test_local_response_from_async_json(dummy_client: Client): def test_local_response_from_invalid_file(dummy_client: Client): + local_response = LocalResponse( + PRODUCT_DATA_DIR / "invoices" / "response_v4" / "summary_full.rst" + ) with pytest.raises(MindeeError): - LocalResponse( - PRODUCT_DATA_DIR / "invoices" / "response_v4" / "summary_full.rst" - ) + print(local_response.as_dict) def test_local_response_from_invalid_dict(dummy_client: Client): From 70f302a6817b5c2a58074c3f976e15e301576a7f Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Fri, 24 May 2024 11:48:19 +0200 Subject: [PATCH 3/3] apply fixes --- mindee/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindee/client.py b/mindee/client.py index 7b43689b..241bb50f 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -187,7 +187,7 @@ def load_prediction( :param product_class: Class of the product to use. :param local_response: Local response to load. - :return: + :return: A valid prediction. """ try: if local_response.as_dict.get("job"):