From 145f8b4786d867411b0eeeb6358be42b93e41451 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 2 Jul 2025 14:32:59 +0200 Subject: [PATCH 01/17] :sparkles: add support for V2 client --- mindee/__init__.py | 1 + mindee/client.py | 104 +---------- mindee/client_mixin.py | 104 +++++++++++ mindee/client_v2.py | 184 +++++++++++++++++++ mindee/error/mindee_error.py | 4 + mindee/error/mindee_http_error_v2.py | 45 +++++ mindee/input/__init__.py | 2 + mindee/input/inference_predict_options.py | 21 +++ mindee/input/polling_options_v2.py | 19 ++ mindee/mindee_http/base_settings.py | 13 +- mindee/mindee_http/mindee_api_v2.py | 102 ++++++++++ mindee/mindee_http/response_validation_v2.py | 37 ++++ mindee/mindee_http/settings_mixin.py | 18 ++ mindee/parsing/v2/__init__.py | 13 ++ mindee/parsing/v2/base_field.py | 78 ++++++++ mindee/parsing/v2/common_response.py | 20 ++ mindee/parsing/v2/error_response.py | 11 ++ mindee/parsing/v2/inference.py | 33 ++++ mindee/parsing/v2/inference_fields.py | 7 + mindee/parsing/v2/inference_file.py | 11 ++ mindee/parsing/v2/inference_model.py | 11 ++ mindee/parsing/v2/inference_options.py | 12 ++ mindee/parsing/v2/inference_response.py | 13 ++ mindee/parsing/v2/inference_result.py | 22 +++ mindee/parsing/v2/job.py | 26 +++ mindee/parsing/v2/polling_response.py | 13 ++ mindee/parsing/v2/webhook.py | 29 +++ tests/data | 2 +- 28 files changed, 843 insertions(+), 112 deletions(-) create mode 100644 mindee/client_mixin.py create mode 100644 mindee/client_v2.py create mode 100644 mindee/error/mindee_http_error_v2.py create mode 100644 mindee/input/inference_predict_options.py create mode 100644 mindee/input/polling_options_v2.py create mode 100644 mindee/mindee_http/mindee_api_v2.py create mode 100644 mindee/mindee_http/response_validation_v2.py create mode 100644 mindee/mindee_http/settings_mixin.py create mode 100644 mindee/parsing/v2/__init__.py create mode 100644 mindee/parsing/v2/base_field.py create mode 100644 mindee/parsing/v2/common_response.py create mode 100644 mindee/parsing/v2/error_response.py create mode 100644 mindee/parsing/v2/inference.py create mode 100644 mindee/parsing/v2/inference_fields.py create mode 100644 mindee/parsing/v2/inference_file.py create mode 100644 mindee/parsing/v2/inference_model.py create mode 100644 mindee/parsing/v2/inference_options.py create mode 100644 mindee/parsing/v2/inference_response.py create mode 100644 mindee/parsing/v2/inference_result.py create mode 100644 mindee/parsing/v2/job.py create mode 100644 mindee/parsing/v2/polling_response.py create mode 100644 mindee/parsing/v2/webhook.py diff --git a/mindee/__init__.py b/mindee/__init__.py index 9066d36a..5401bca8 100644 --- a/mindee/__init__.py +++ b/mindee/__init__.py @@ -1,5 +1,6 @@ from mindee import product from mindee.client import Client +from mindee.input.inference_predict_options import InferencePredictOptions from mindee.input.local_response import LocalResponse from mindee.input.page_options import PageOptions from mindee.parsing.common.api_response import ApiResponse diff --git a/mindee/client.py b/mindee/client.py index 5e508e3a..6b8d3ba1 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -1,18 +1,14 @@ -from pathlib import Path from time import sleep -from typing import BinaryIO, Dict, Optional, Type, Union +from typing import Dict, Optional, Type, Union +from mindee.client_mixin import ClientMixin from mindee.error.mindee_error import MindeeClientError, MindeeError from mindee.error.mindee_http_error import handle_error from mindee.input import WorkflowOptions from mindee.input.local_response import LocalResponse from mindee.input.page_options import PageOptions from mindee.input.predict_options import AsyncPredictOptions, PredictOptions -from mindee.input.sources.base_64_input import Base64Input -from mindee.input.sources.bytes_input import BytesInput -from mindee.input.sources.file_input import FileInput from mindee.input.sources.local_input_source import LocalInputSource -from mindee.input.sources.path_input import PathInput from mindee.input.sources.url_input_source import UrlInputSource from mindee.logger import logger from mindee.mindee_http.endpoint import CustomEndpoint, Endpoint @@ -55,7 +51,7 @@ def _clean_account_name(account_name: str) -> str: return account_name -class Client: +class Client(ClientMixin): """ Mindee API Client. @@ -275,23 +271,6 @@ def execute_workflow( logger.debug("Sending document to workflow: %s", workflow_id) return self._send_to_workflow(GeneratedV1, input_source, workflow_id, options) - def _validate_async_params( - self, initial_delay_sec: float, delay_sec: float, max_retries: int - ) -> None: - min_delay = 1 - min_initial_delay = 1 - min_retries = 1 - if delay_sec < min_delay: - raise MindeeClientError( - f"Cannot set auto-parsing delay to less than {min_delay} second(s)." - ) - if initial_delay_sec < min_initial_delay: - raise MindeeClientError( - f"Cannot set initial parsing delay to less than {min_initial_delay} second(s)." - ) - if max_retries < min_retries: - raise MindeeClientError(f"Cannot set retries to less than {min_retries}.") - def enqueue_and_parse( # pylint: disable=too-many-locals self, product_class: Type[Inference], @@ -583,80 +562,3 @@ def create_endpoint( ) version = "1" return self._build_endpoint(endpoint_name, account_name, version) - - @staticmethod - def source_from_path( - input_path: Union[Path, str], fix_pdf: bool = False - ) -> PathInput: - """ - Load a document from an absolute path, as a string. - - :param input_path: Path of file to open - :param fix_pdf: Whether to attempt fixing PDF files before sending. - Setting this to `True` can modify the data sent to Mindee. - """ - input_doc = PathInput(input_path) - if fix_pdf: - input_doc.fix_pdf() - return input_doc - - @staticmethod - def source_from_file(input_file: BinaryIO, fix_pdf: bool = False) -> FileInput: - """ - Load a document from a normal Python file object/handle. - - :param input_file: Input file handle - :param fix_pdf: Whether to attempt fixing PDF files before sending. - Setting this to `True` can modify the data sent to Mindee. - """ - input_doc = FileInput(input_file) - if fix_pdf: - input_doc.fix_pdf() - return input_doc - - @staticmethod - def source_from_b64string( - input_string: str, filename: str, fix_pdf: bool = False - ) -> Base64Input: - """ - Load a document from a base64 encoded string. - - :param input_string: Input to parse as base64 string - :param filename: The name of the file (without the path) - :param fix_pdf: Whether to attempt fixing PDF files before sending. - Setting this to `True` can modify the data sent to Mindee. - """ - input_doc = Base64Input(input_string, filename) - if fix_pdf: - input_doc.fix_pdf() - return input_doc - - @staticmethod - def source_from_bytes( - input_bytes: bytes, filename: str, fix_pdf: bool = False - ) -> BytesInput: - """ - Load a document from raw bytes. - - :param input_bytes: Raw byte input - :param filename: The name of the file (without the path) - :param fix_pdf: Whether to attempt fixing PDF files before sending. - Setting this to `True` can modify the data sent to Mindee. - """ - input_doc = BytesInput(input_bytes, filename) - if fix_pdf: - input_doc.fix_pdf() - return input_doc - - @staticmethod - def source_from_url( - url: str, - ) -> UrlInputSource: - """ - Load a document from a URL. - - :param url: Raw byte input - """ - return UrlInputSource( - url, - ) diff --git a/mindee/client_mixin.py b/mindee/client_mixin.py new file mode 100644 index 00000000..619a4d70 --- /dev/null +++ b/mindee/client_mixin.py @@ -0,0 +1,104 @@ +from pathlib import Path +from typing import BinaryIO, Union + +from mindee.error import MindeeClientError +from mindee.input import Base64Input, BytesInput, FileInput, PathInput, UrlInputSource + + +class ClientMixin: + """Mixin for client Client V1 & V2 common static methods.""" + + @staticmethod + def source_from_path( + input_path: Union[Path, str], fix_pdf: bool = False + ) -> PathInput: + """ + Load a document from an absolute path, as a string. + + :param input_path: Path of file to open + :param fix_pdf: Whether to attempt fixing PDF files before sending. + Setting this to `True` can modify the data sent to Mindee. + """ + input_doc = PathInput(input_path) + if fix_pdf: + input_doc.fix_pdf() + return input_doc + + @staticmethod + def source_from_file(input_file: BinaryIO, fix_pdf: bool = False) -> FileInput: + """ + Load a document from a normal Python file object/handle. + + :param input_file: Input file handle + :param fix_pdf: Whether to attempt fixing PDF files before sending. + Setting this to `True` can modify the data sent to Mindee. + """ + input_doc = FileInput(input_file) + if fix_pdf: + input_doc.fix_pdf() + return input_doc + + @staticmethod + def source_from_b64string( + input_string: str, filename: str, fix_pdf: bool = False + ) -> Base64Input: + """ + Load a document from a base64 encoded string. + + :param input_string: Input to parse as base64 string + :param filename: The name of the file (without the path) + :param fix_pdf: Whether to attempt fixing PDF files before sending. + Setting this to `True` can modify the data sent to Mindee. + """ + input_doc = Base64Input(input_string, filename) + if fix_pdf: + input_doc.fix_pdf() + return input_doc + + @staticmethod + def source_from_bytes( + input_bytes: bytes, filename: str, fix_pdf: bool = False + ) -> BytesInput: + """ + Load a document from raw bytes. + + :param input_bytes: Raw byte input + :param filename: The name of the file (without the path) + :param fix_pdf: Whether to attempt fixing PDF files before sending. + Setting this to `True` can modify the data sent to Mindee. + """ + input_doc = BytesInput(input_bytes, filename) + if fix_pdf: + input_doc.fix_pdf() + return input_doc + + @staticmethod + def source_from_url( + url: str, + ) -> UrlInputSource: + """ + Load a document from a URL. + + :param url: Raw byte input + """ + return UrlInputSource( + url, + ) + + @staticmethod + def _validate_async_params( + initial_delay_sec: float, delay_sec: float, max_retries: int + ) -> None: + min_delay = 1 + min_initial_delay = 1 + min_retries = 1 + if delay_sec < min_delay: + raise MindeeClientError( + f"Cannot set auto-parsing delay to less than {min_delay} second(s)." + ) + if initial_delay_sec < min_initial_delay: + raise MindeeClientError( + f"Cannot set initial parsing delay to less than {min_initial_delay} second(s)." + ) + if max_retries < min_retries: + raise MindeeClientError(f"Cannot set retries to less than {min_retries}.") diff --git a/mindee/client_v2.py b/mindee/client_v2.py new file mode 100644 index 00000000..a9cd30a9 --- /dev/null +++ b/mindee/client_v2.py @@ -0,0 +1,184 @@ +from time import sleep +from typing import Optional, Union + +from mindee.client_mixin import ClientMixin +from mindee.error.mindee_error import MindeeError +from mindee.error.mindee_http_error_v2 import handle_error_v2 +from mindee.input.inference_predict_options import InferencePredictOptions +from mindee.input.local_response import LocalResponse +from mindee.input.page_options import PageOptions +from mindee.input.polling_options_v2 import PollingOptionsV2 +from mindee.input.sources.local_input_source import LocalInputSource +from mindee.logger import logger +from mindee.mindee_http.mindee_api_v2 import MindeeApiV2 +from mindee.mindee_http.response_validation_v2 import ( + is_valid_get_response, + is_valid_post_response, +) +from mindee.parsing.v2.inference_response import InferenceResponse +from mindee.parsing.v2.polling_response import PollingResponse + + +def load_prediction(local_response: LocalResponse) -> InferenceResponse: + """ + Load a prediction. + + :param local_response: Local response to load. + :return: A valid prediction. + """ + try: + return InferenceResponse(local_response.as_dict) + except KeyError as exc: + raise MindeeError("No prediction found in local response.") from exc + + +class ClientV2(ClientMixin): + """ + Mindee API Client. + + See: https://developers.mindee.com/docs/ + """ + + api_key: Optional[str] + mindee_api: MindeeApiV2 + + def __init__(self, api_key: Optional[str] = None) -> None: + """ + Mindee API Client. + + :param api_key: Your API key for all endpoints + """ + self.api_key = api_key + self.mindee_api = MindeeApiV2(api_key) + + def enqueue( + self, + input_source: LocalInputSource, + options: InferencePredictOptions, + page_options: Optional[PageOptions] = None, + close_file: bool = True, + ) -> PollingResponse: + """ + Enqueues a document to a given model. + + :param input_source: The document/source file to use. + Has to be created beforehand. + + :param options: Options for the prediction. + + :param close_file: Whether to ``close()`` the file after parsing it. + Set to ``False`` if you need to access the file after this operation. + + :param page_options: If set, remove pages from the document as specified. + This is done before sending the file to the server. + It is useful to avoid page limitations. + :return: A valid inference response. + """ + logger.debug("Enqueuing document to '%s'", options.model_id) + + if page_options and input_source.is_pdf(): + input_source.process_pdf( + page_options.operation, + page_options.on_min_pages, + page_options.page_indexes, + ) + + response = self.mindee_api.predict_async_req_post( + input_source=input_source, + options=options, + close_file=close_file, + ) + dict_response = response.json() + + if not is_valid_post_response(response): + handle_error_v2(dict_response) + + return PollingResponse(dict_response) + + def parse_queued( + self, + queue_id: str, + ) -> Union[InferenceResponse, PollingResponse]: + """ + Parses a queued document. + + :param queue_id: queue_id received from the API. + """ + logger.debug("Fetching from queue ''%s", queue_id) + + response = self.mindee_api.document_queue_req_get(queue_id) + if not is_valid_get_response(response): + handle_error_v2(response.json()) + + dict_response = response.json() + if dict_response.get("job"): + return PollingResponse(dict_response) + return InferenceResponse(dict_response) + + def enqueue_and_parse( + self, + input_source: LocalInputSource, + options: InferencePredictOptions, + polling_options: Optional[PollingOptionsV2] = None, + page_options: Optional[PageOptions] = None, + close_file: bool = True, + ) -> InferenceResponse: + """ + Enqueues to an asynchronous endpoint and automatically polls for a response. + + :param input_source: The document/source file to use. + Has to be created beforehand. + + :param options: Options for the prediction. + + :param polling_options: Options for polling. + + :param close_file: Whether to ``close()`` the file after parsing it. + Set to ``False`` if you need to access the file after this operation. + + :param page_options: If set, remove pages from the document as specified. + This is done before sending the file to the server. + It is useful to avoid page limitations. + + :return: A valid inference response. + """ + if not polling_options: + polling_options = PollingOptionsV2() + self._validate_async_params( + polling_options.initial_delay_sec, + polling_options.delay_sec, + polling_options.max_retries, + ) + queue_result = self.enqueue( + input_source, + options, + page_options, + close_file, + ) + logger.debug( + "Successfully enqueued document with job id: %s", queue_result.job.id + ) + sleep(polling_options.initial_delay_sec) + retry_counter = 1 + poll_results = self.parse_queued( + queue_result.job.id, + ) + while retry_counter < polling_options.max_retries: + if not isinstance(poll_results, PollingResponse): + break + if poll_results.job.status == "Failed": + raise MindeeError(f"Parsing failed for job {poll_results.job.id}") + logger.debug( + "Polling server for parsing result with job id: %s", + queue_result.job.id, + ) + retry_counter += 1 + sleep(polling_options.delay_sec) + poll_results = self.parse_queued(queue_result.job.id) + + if not isinstance(poll_results, InferenceResponse): + raise MindeeError( + f"Couldn't retrieve document after {retry_counter} tries." + ) + + return poll_results diff --git a/mindee/error/mindee_error.py b/mindee/error/mindee_error.py index 0a7058db..65302cf8 100644 --- a/mindee/error/mindee_error.py +++ b/mindee/error/mindee_error.py @@ -14,6 +14,10 @@ class MindeeApiError(MindeeError): """An exception relating to settings of the MindeeClient.""" +class MindeeAPIV2Error(MindeeError): + """An exception relating to settings of the MindeeClient V2.""" + + class MindeeSourceError(MindeeError): """An exception relating to document loading.""" diff --git a/mindee/error/mindee_http_error_v2.py b/mindee/error/mindee_http_error_v2.py new file mode 100644 index 00000000..41c0a070 --- /dev/null +++ b/mindee/error/mindee_http_error_v2.py @@ -0,0 +1,45 @@ +import json +from typing import Optional + +from mindee.parsing.common.string_dict import StringDict + + +class MindeeHTTPErrorV2(RuntimeError): + """An exception relating to HTTP calls.""" + + status: int + detail: Optional[str] + + def __init__(self, status: int, detail: Optional[str]) -> None: + """ + Base exception for HTTP calls. + + :param status: HTTP code for the error + :param detail: Error details. + """ + self.status = status + self.detail = detail + super().__init__(f"HTTP error {status} - {detail}") + + +class MindeeHTTPUnknownErrorV2(MindeeHTTPErrorV2): + """HTTP error with unknown status code.""" + + def __init__(self, status: int, detail: Optional[str]) -> None: + super().__init__(-1, f"Couldn't deserialize server error. Found: {detail}") + + +def handle_error_v2(json_response: StringDict) -> None: + """ + Handles HTTP errors by raising MindeeHTTPErrorV2 exceptions with proper details. + + :raises MindeeHTTPErrorV2: If the response has been caught. + :raises MindeeHTTPUnknownErrorV2: If the json return format is unreadable. + """ + try: + raise MindeeHTTPErrorV2( + json_response["job"]["error"]["status"], + json_response["job"]["error"]["detail"], + ) + except Exception as exc: + raise MindeeHTTPUnknownErrorV2(-1, json.dumps(json_response, indent=2)) from exc diff --git a/mindee/input/__init__.py b/mindee/input/__init__.py index 82624650..b10dece5 100644 --- a/mindee/input/__init__.py +++ b/mindee/input/__init__.py @@ -1,5 +1,7 @@ +from mindee.input.inference_predict_options import InferencePredictOptions from mindee.input.local_response import LocalResponse from mindee.input.page_options import PageOptions +from mindee.input.polling_options_v2 import PollingOptionsV2 from mindee.input.sources.base_64_input import Base64Input from mindee.input.sources.bytes_input import BytesInput from mindee.input.sources.file_input import FileInput diff --git a/mindee/input/inference_predict_options.py b/mindee/input/inference_predict_options.py new file mode 100644 index 00000000..df8dce82 --- /dev/null +++ b/mindee/input/inference_predict_options.py @@ -0,0 +1,21 @@ +from dataclasses import dataclass +from typing import List, Optional + + +@dataclass +class InferencePredictOptions: + """Inference prediction options.""" + + model_id: str + """ID of the model.""" + full_text: bool = False + """ + Whether to include the full text data for async APIs. + This performs a full OCR operation on the server and will increase response time and payload size. + """ + rag: bool = False + """If set, will enable Retrieval-Augmented Generation.""" + alias: Optional[str] = None + """Optional alias for the file.""" + webhook_ids: Optional[List[str]] = None + """IDs of webhooks to propagate the API response to.""" diff --git a/mindee/input/polling_options_v2.py b/mindee/input/polling_options_v2.py new file mode 100644 index 00000000..bf7ef142 --- /dev/null +++ b/mindee/input/polling_options_v2.py @@ -0,0 +1,19 @@ +class PollingOptionsV2: + """Options for asynchronous polling.""" + + initial_delay_sec: float + """Initial delay before the first polling attempt.""" + delay_sec: float + """Delay between each polling attempts.""" + max_retries: int + """Total amount of polling attempts.""" + + def __init__( + self, + initial_delay_sec: float = 2, + delay_sec: float = 1.5, + max_retries: int = 80, + ): + self.initial_delay_sec = initial_delay_sec + self.delay_sec = delay_sec + self.max_retries = max_retries diff --git a/mindee/mindee_http/base_settings.py b/mindee/mindee_http/base_settings.py index 1eea385e..cc6ede7e 100644 --- a/mindee/mindee_http/base_settings.py +++ b/mindee/mindee_http/base_settings.py @@ -1,8 +1,9 @@ import os from dataclasses import dataclass -from typing import Dict, Optional, Union +from typing import Dict, Optional from mindee.logger import logger +from mindee.mindee_http.settings_mixin import SettingsMixin from mindee.versions import PYTHON_VERSION, __version__, get_platform API_KEY_ENV_NAME = "MINDEE_API_KEY" @@ -19,7 +20,7 @@ @dataclass -class BaseSettings: +class BaseSettings(SettingsMixin): """Settings class relating to API requests.""" api_key: Optional[str] @@ -61,11 +62,3 @@ def set_from_env(self) -> None: if env_val: func(env_val) logger.debug("Value was set from env: %s", name) - - def set_timeout(self, value: Union[str, int]) -> None: - """Set the timeout for all requests.""" - self.request_timeout = int(value) - - def set_base_url(self, value: str) -> None: - """Set the base URL for all requests.""" - self.base_url = value diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py new file mode 100644 index 00000000..92042e53 --- /dev/null +++ b/mindee/mindee_http/mindee_api_v2.py @@ -0,0 +1,102 @@ +from typing import Dict, Optional + +import requests + +from mindee import InferencePredictOptions +from mindee.error.mindee_error import MindeeApiError +from mindee.input import LocalInputSource +from mindee.mindee_http.base_settings import USER_AGENT +from mindee.mindee_http.settings_mixin import SettingsMixin + +API_KEY_V2_ENV_NAME = "MINDEE_V2_API_KEY" +API_KEY_V2_DEFAULT = "" + +BASE_URL_ENV_NAME = "MINDEE_V2_BASE_URL" +BASE_URL_DEFAULT = "https://api-v2.mindee.com/v2" + +REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT" +TIMEOUT_DEFAULT = 120 + + +class MindeeApiV2(SettingsMixin): + """Settings class relating to API V2 requests.""" + + url_root: str + """Root of the URL to use for polling.""" + api_key: Optional[str] + """API Key for the client.""" + + def __init__( + self, + api_key: Optional[str], + ): + self.api_key = api_key + if not self.api_key or len(self.api_key) == 0: + raise MindeeApiError( + ( + f"Missing API key," + " check your Client configuration.\n" + "You can set this using the " + f"'{API_KEY_V2_ENV_NAME}' environment variable." + ) + ) + self.url_root = f"{self.base_url.rstrip('/')}" + + @property + def base_headers(self) -> Dict[str, str]: + """Base headers to send with all API requests.""" + return { + "Authorization": f"Token {self.api_key}", + "User-Agent": USER_AGENT, + } + + def predict_async_req_post( + self, + input_source: LocalInputSource, + options: InferencePredictOptions, + close_file: bool = True, + ) -> requests.Response: + """ + Make an asynchronous request to POST a document for prediction on the V2 API. + + :param input_source: Input object. + :param options: Options for the enqueueing of the document. + :param close_file: Whether to `close()` the file after parsing it. + :return: requests response. + """ + data = {} + params = {} + url = f"{self.url_root}/inferences/enqueue" + + if options.full_text: + params["full_text_ocr"] = "true" + if options.rag: + params["rag"] = "true" + if options.webhook_ids and len(options.webhook_ids) > 0: + params["webhook_ids"] = ",".join(options.webhook_ids) + if options.alias and len(options.alias): + data["alias"] = options.alias + + files = {"document": input_source.read_contents(close_file)} + response = requests.post( + url=url, + files=files, + headers=self.base_headers, + data=data, + params=params, + timeout=self.request_timeout, + ) + + return response + + def document_queue_req_get(self, queue_id: str) -> requests.Response: + """ + Sends a request matching a given queue_id. Returns either a Job or a Document. + + :param queue_id: queue_id received from the API + """ + return requests.get( + f"{self.url_root}/inferences/{queue_id}", + headers=self.base_headers, + timeout=self.request_timeout, + ) diff --git a/mindee/mindee_http/response_validation_v2.py b/mindee/mindee_http/response_validation_v2.py new file mode 100644 index 00000000..dd49792e --- /dev/null +++ b/mindee/mindee_http/response_validation_v2.py @@ -0,0 +1,37 @@ +import json + +import requests + +from mindee.mindee_http import is_valid_sync_response + + +def is_valid_post_response(response: requests.Response) -> bool: + """ + Checks if the POST response is valid and of the expected format. + + :param response: HTTP response object. + :return: True if the response is valid. + """ + if not is_valid_sync_response(response): + return False + response_json = json.loads(response.content) + if not "job" in response_json: + return False + if "job" in response_json and "error" in response_json["job"]: + return False + return True + + +def is_valid_get_response(response: requests.Response) -> bool: + """ + Checks if the GET response is valid and of the expected format. + + :param response: HTTP response object. + :return: True if the response is valid. + """ + if not is_valid_sync_response(response): + return False + response_json = json.loads(response.content) + if not "inference" in response_json: + return False + return True diff --git a/mindee/mindee_http/settings_mixin.py b/mindee/mindee_http/settings_mixin.py new file mode 100644 index 00000000..10edaf6d --- /dev/null +++ b/mindee/mindee_http/settings_mixin.py @@ -0,0 +1,18 @@ +from typing import Union + + +class SettingsMixin: + """Settings mixin for V2 & V2 common methods & attributes.""" + + base_url: str + """Base URL for all V2 requests.""" + request_timeout: int + """Timeout for all requests.""" + + def set_timeout(self, value: Union[str, int]) -> None: + """Set the timeout for all requests.""" + self.request_timeout = int(value) + + def set_base_url(self, value: str) -> None: + """Set the base URL for all requests.""" + self.base_url = value diff --git a/mindee/parsing/v2/__init__.py b/mindee/parsing/v2/__init__.py new file mode 100644 index 00000000..ff03d053 --- /dev/null +++ b/mindee/parsing/v2/__init__.py @@ -0,0 +1,13 @@ +from mindee.parsing.v2.base_field import ListField, ObjectField, SimpleField +from mindee.parsing.v2.common_response import CommonResponse +from mindee.parsing.v2.error_response import ErrorResponse +from mindee.parsing.v2.inference import Inference +from mindee.parsing.v2.inference_fields import InferenceFields +from mindee.parsing.v2.inference_file import InferenceFile +from mindee.parsing.v2.inference_model import InferenceModel +from mindee.parsing.v2.inference_options import InferenceOptions +from mindee.parsing.v2.inference_response import InferenceResponse +from mindee.parsing.v2.inference_result import InferenceResult +from mindee.parsing.v2.job import Job +from mindee.parsing.v2.polling_response import PollingResponse +from mindee.parsing.v2.webhook import Webhook diff --git a/mindee/parsing/v2/base_field.py b/mindee/parsing/v2/base_field.py new file mode 100644 index 00000000..e9332281 --- /dev/null +++ b/mindee/parsing/v2/base_field.py @@ -0,0 +1,78 @@ +from typing import Dict, List, Union + +from mindee.error.mindee_error import MindeeAPIV2Error +from mindee.parsing.common.string_dict import StringDict + + +class BaseField: + """Base field class for V2.""" + + _indent_level: int + """Indentation level for rst display.""" + + def __init__(self, indent_level=0) -> None: + self._indent_level = indent_level + + @staticmethod + def create_field(raw_response: StringDict, indent_level: int = 0) -> "BaseField": + """Factory function to create appropriate field instances.""" + if isinstance(raw_response, dict): + if "items" in raw_response: + return ListField(raw_response, indent_level) + if "fields" in raw_response: + return ObjectField(raw_response, indent_level) + if "value" in raw_response: + return SimpleField(raw_response, indent_level) + raise MindeeAPIV2Error("Unrecognized field format.") + raise MindeeAPIV2Error("Unrecognized field format.") + + +class ListField(BaseField): + """List field containing multiple fields.""" + + items: List[BaseField] + """Items contained in the list.""" + + def __init__(self, raw_response: StringDict, indent_level: int = 0): + super().__init__(indent_level) + + self.items = [] + for item in raw_response["items"]: + if isinstance(item, dict): + self.items.append(BaseField.create_field(item, 1)) + raise MindeeAPIV2Error("Unrecognized field format.") + + +class ObjectField(BaseField): + """Object field containing multiple fields.""" + + fields: Dict[str, BaseField] + """Fields contained in the object.""" + + def __init__(self, raw_response: StringDict, indent_level: int = 0): + super().__init__(indent_level) + fields: Dict[str, BaseField] = {} + for field_key, field_value in raw_response.items(): + if isinstance(field_value, dict): + fields[field_key] = BaseField.create_field(field_value, 1) + else: + raise MindeeAPIV2Error("Unrecognized field format.") + + def __str__(self) -> str: + out_str = "" + for field_key, field_value in self.fields.items(): + out_str += f"{' ' * self._indent_level}:{field_key}: {field_value}\n" + return out_str + + +class SimpleField(BaseField): + """Simple field containing a single value.""" + + value: Union[str, float, bool, None] + + def __init__(self, raw_response: StringDict, indent_level: int = 0): + super().__init__(indent_level) + self.value = raw_response["value"] if "value" in raw_response else None + + def __str__(self) -> str: + return f"{' ' * self._indent_level}{self.value}\n" diff --git a/mindee/parsing/v2/common_response.py b/mindee/parsing/v2/common_response.py new file mode 100644 index 00000000..90fbe811 --- /dev/null +++ b/mindee/parsing/v2/common_response.py @@ -0,0 +1,20 @@ +import json + +from mindee.logger import logger +from mindee.parsing.common.string_dict import StringDict + + +class CommonResponse: + """Base class for V1 & V2 responses.""" + + _raw_http: StringDict + """Raw request sent by the server, as a dict.""" + + def __init__(self, raw_response: StringDict) -> None: + logger.debug("Handling API response") + self._raw_http = raw_response + + @property + def raw_http(self) -> str: + """Displays the result of the raw response as json string.""" + return json.dumps(self._raw_http, indent=2) diff --git a/mindee/parsing/v2/error_response.py b/mindee/parsing/v2/error_response.py new file mode 100644 index 00000000..6749f73f --- /dev/null +++ b/mindee/parsing/v2/error_response.py @@ -0,0 +1,11 @@ +class ErrorResponse(RuntimeError): + """Error response info.""" + + detail: str + """Detail relevant to the error.""" + + status: int + """Http error code.""" + + def __str__(self): + return f"HTTP Status: {self.status} - {self.detail}" diff --git a/mindee/parsing/v2/inference.py b/mindee/parsing/v2/inference.py new file mode 100644 index 00000000..97dfcfe8 --- /dev/null +++ b/mindee/parsing/v2/inference.py @@ -0,0 +1,33 @@ +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.inference_file import InferenceFile +from mindee.parsing.v2.inference_model import InferenceModel +from mindee.parsing.v2.inference_result import InferenceResult + + +class Inference: + """Inference object for a V2 API return.""" + + model: InferenceModel + """Model info for the inference.""" + file: InferenceFile + """File info for the inference.""" + result: InferenceResult + """Result of the inference.""" + + def __init__(self, raw_response: StringDict): + self.model = InferenceModel(raw_response["model"]) + self.file = InferenceFile(raw_response["file"]) + self.result = InferenceResult(raw_response["result"]) + + def __str__(self) -> str: + return ( + f"Inference\n" + f"#########\n" + f":Model: {self.model.id}\n" + f":File: {self.file}\n" + f" :Name: {self.file.name}\n\n" + f" :Alias: {self.file.alias}\n\n" + f"Result\n" + f"======\n" + f"\n{self.result}\n" + ) diff --git a/mindee/parsing/v2/inference_fields.py b/mindee/parsing/v2/inference_fields.py new file mode 100644 index 00000000..d8f2c972 --- /dev/null +++ b/mindee/parsing/v2/inference_fields.py @@ -0,0 +1,7 @@ +from typing import Union + +from mindee.parsing.v2.base_field import ListField, ObjectField, SimpleField + + +class InferenceFields(dict[str, Union[ObjectField, ListField, SimpleField]]): + """Inference fields dict.""" diff --git a/mindee/parsing/v2/inference_file.py b/mindee/parsing/v2/inference_file.py new file mode 100644 index 00000000..5b175385 --- /dev/null +++ b/mindee/parsing/v2/inference_file.py @@ -0,0 +1,11 @@ +class InferenceFile: + """Inference File info.""" + + name: str + """Name of the file.""" + alais: str + """Alias of the file.""" + + def __init__(self, json_response: dict) -> None: + self.name = json_response["name"] + self.alias = json_response["alias"] diff --git a/mindee/parsing/v2/inference_model.py b/mindee/parsing/v2/inference_model.py new file mode 100644 index 00000000..0a3dd73e --- /dev/null +++ b/mindee/parsing/v2/inference_model.py @@ -0,0 +1,11 @@ +from mindee.parsing.common.string_dict import StringDict + + +class InferenceModel: + """Inference model info.""" + + id: str + """ID of the model.""" + + def __init__(self, raw_response: StringDict) -> None: + self.id = raw_response["id"] diff --git a/mindee/parsing/v2/inference_options.py b/mindee/parsing/v2/inference_options.py new file mode 100644 index 00000000..6ef1d9a7 --- /dev/null +++ b/mindee/parsing/v2/inference_options.py @@ -0,0 +1,12 @@ +from typing import List + +from mindee.parsing.common.string_dict import StringDict + + +class InferenceOptions: + """Optional information about the document.""" + + raw_text: List[str] + + def __init__(self, raw_response: StringDict): + self.raw_text = raw_response["raw_text"] diff --git a/mindee/parsing/v2/inference_response.py b/mindee/parsing/v2/inference_response.py new file mode 100644 index 00000000..e799258c --- /dev/null +++ b/mindee/parsing/v2/inference_response.py @@ -0,0 +1,13 @@ +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.common_response import CommonResponse +from mindee.parsing.v2.inference import Inference + + +class InferenceResponse(CommonResponse): + """Represent an inference response from Mindee V2 API.""" + + inference: Inference + + def __init__(self, raw_response: StringDict) -> None: + super().__init__(raw_response) + self.inference = Inference(raw_response["inference"]) diff --git a/mindee/parsing/v2/inference_result.py b/mindee/parsing/v2/inference_result.py new file mode 100644 index 00000000..52ff9439 --- /dev/null +++ b/mindee/parsing/v2/inference_result.py @@ -0,0 +1,22 @@ +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.inference_fields import InferenceFields +from mindee.parsing.v2.inference_options import InferenceOptions + + +class InferenceResult: + """Inference result info.""" + + fields: InferenceFields + """Fields contained in the inference.""" + options: InferenceOptions + """Potential options retrieved alongside the inference.""" + + def __init__(self, json_response: StringDict) -> None: + self.fields = InferenceFields(json_response["fields"]) + self.options = InferenceOptions(json_response["options"]) + + def __str__(self) -> str: + str_fields = "" + for field_key, field_value in self.fields.items(): + str_fields += f" :{field_key}: {field_value}\n" + return f":fields: {str_fields}\n" f"options: {self.options}\n" diff --git a/mindee/parsing/v2/job.py b/mindee/parsing/v2/job.py new file mode 100644 index 00000000..d4f091d9 --- /dev/null +++ b/mindee/parsing/v2/job.py @@ -0,0 +1,26 @@ +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.error_response import ErrorResponse + + +class Job: + """Job information for a V2 polling attempt.""" + + id: str + """Job ID.""" + error: ErrorResponse + """Error response if any.""" + model_id: str + """ID of the model.""" + file_name: str + """Name for the file.""" + file_alias: str + """Optional alias for the file.""" + status: str + """Status of the job.""" + + def __init__(self, raw_response: StringDict) -> None: + self.status = raw_response["status"] + self.error = ErrorResponse(raw_response["error"]) + self.model_id = raw_response["model_id"] + self.file_name = raw_response["file_name"] + self.file_alias = raw_response["file_alias"] diff --git a/mindee/parsing/v2/polling_response.py b/mindee/parsing/v2/polling_response.py new file mode 100644 index 00000000..d5fb7690 --- /dev/null +++ b/mindee/parsing/v2/polling_response.py @@ -0,0 +1,13 @@ +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.common_response import CommonResponse +from mindee.parsing.v2.job import Job + + +class PollingResponse(CommonResponse): + """Represent an inference response from Mindee V2 API.""" + + job: Job + + def __init__(self, raw_response: StringDict) -> None: + super().__init__(raw_response) + self.job = Job(raw_response["job"]) diff --git a/mindee/parsing/v2/webhook.py b/mindee/parsing/v2/webhook.py new file mode 100644 index 00000000..d151f54e --- /dev/null +++ b/mindee/parsing/v2/webhook.py @@ -0,0 +1,29 @@ +from datetime import datetime + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.error_response import ErrorResponse + + +class Webhook: + """Webhook information for a V2 polling attempt.""" + + id: str + """ID of the webhook.""" + error: ErrorResponse + """Error response if any.""" + created_at: datetime + """Date and time the webhook was sent at.""" + status: str + """Status of the webhook.""" + + def __init__(self, raw_response: StringDict) -> None: + self.id = raw_response["id"] + self.error = ErrorResponse(raw_response["error"]) + self.created_at = self.parse_date(raw_response["created_at"]) + self.status = raw_response["status"] + + @staticmethod + def parse_date(date_string: str) -> datetime: + """Shorthand to parse the date.""" + date_string = date_string.replace("Z", "+00:00") + return datetime.fromisoformat(date_string) diff --git a/tests/data b/tests/data index e48b26e5..f599a960 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit e48b26e5250cbbab9d3ee6c66c0eb85b667a4bb8 +Subproject commit f599a960e78f4a390984c6263f387aa8cdebe0f0 From 9a7774468b7929940bee57125b8d48ddf4cab9f0 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 2 Jul 2025 19:48:17 +0200 Subject: [PATCH 02/17] fix most syntaxes, add tests (integration testing not fully working) --- mindee/__init__.py | 1 + mindee/client_v2.py | 26 +-- mindee/error/__init__.py | 8 +- mindee/error/mindee_error.py | 2 +- mindee/error/mindee_http_error_v2.py | 17 +- mindee/mindee_http/mindee_api_v2.py | 34 +++- mindee/mindee_http/response_validation_v2.py | 6 +- mindee/parsing/common/api_request.py | 12 +- mindee/parsing/common/execution_file.py | 6 +- mindee/parsing/v2/base_field.py | 29 ++-- mindee/parsing/v2/inference_fields.py | 17 +- mindee/parsing/v2/inference_file.py | 9 +- mindee/parsing/v2/inference_options.py | 6 +- mindee/parsing/v2/inference_result.py | 14 +- mindee/parsing/v2/job.py | 9 +- pyproject.toml | 3 +- tests/test_client_v2.py | 68 ++++++++ tests/test_client_v2_integration.py | 104 ++++++++++++ tests/test_inputs.py | 2 +- tests/v2/__init__.py | 0 tests/v2/test_inference_response.py | 160 +++++++++++++++++++ 21 files changed, 462 insertions(+), 71 deletions(-) create mode 100644 tests/test_client_v2.py create mode 100644 tests/test_client_v2_integration.py create mode 100644 tests/v2/__init__.py create mode 100644 tests/v2/test_inference_response.py diff --git a/mindee/__init__.py b/mindee/__init__.py index 5401bca8..ba0e477f 100644 --- a/mindee/__init__.py +++ b/mindee/__init__.py @@ -1,5 +1,6 @@ from mindee import product from mindee.client import Client +from mindee.client_v2 import ClientV2 from mindee.input.inference_predict_options import InferencePredictOptions from mindee.input.local_response import LocalResponse from mindee.input.page_options import PageOptions diff --git a/mindee/client_v2.py b/mindee/client_v2.py index a9cd30a9..195f8ab3 100644 --- a/mindee/client_v2.py +++ b/mindee/client_v2.py @@ -19,19 +19,6 @@ from mindee.parsing.v2.polling_response import PollingResponse -def load_prediction(local_response: LocalResponse) -> InferenceResponse: - """ - Load a prediction. - - :param local_response: Local response to load. - :return: A valid prediction. - """ - try: - return InferenceResponse(local_response.as_dict) - except KeyError as exc: - raise MindeeError("No prediction found in local response.") from exc - - class ClientV2(ClientMixin): """ Mindee API Client. @@ -182,3 +169,16 @@ def enqueue_and_parse( ) return poll_results + + @staticmethod + def load_inference(local_response: LocalResponse) -> InferenceResponse: + """ + Load a prediction from the V2 API. + + :param local_response: Local response to load. + :return: A valid prediction. + """ + try: + return InferenceResponse(local_response.as_dict) + except KeyError as exc: + raise MindeeError("No prediction found in local response.") from exc diff --git a/mindee/error/__init__.py b/mindee/error/__init__.py index c49c3cf3..401872d2 100644 --- a/mindee/error/__init__.py +++ b/mindee/error/__init__.py @@ -1,6 +1,12 @@ from mindee.error.geometry_error import GeometryError from mindee.error.mimetype_error import MimeTypeError -from mindee.error.mindee_error import MindeeClientError, MindeeError, MindeeProductError +from mindee.error.mindee_error import ( + MindeeApiError, + MindeeApiV2Error, + MindeeClientError, + MindeeError, + MindeeProductError, +) from mindee.error.mindee_http_error import ( MindeeHTTPClientError, MindeeHTTPError, diff --git a/mindee/error/mindee_error.py b/mindee/error/mindee_error.py index 65302cf8..5565a1a1 100644 --- a/mindee/error/mindee_error.py +++ b/mindee/error/mindee_error.py @@ -14,7 +14,7 @@ class MindeeApiError(MindeeError): """An exception relating to settings of the MindeeClient.""" -class MindeeAPIV2Error(MindeeError): +class MindeeApiV2Error(MindeeError): """An exception relating to settings of the MindeeClient V2.""" diff --git a/mindee/error/mindee_http_error_v2.py b/mindee/error/mindee_http_error_v2.py index 41c0a070..05954004 100644 --- a/mindee/error/mindee_http_error_v2.py +++ b/mindee/error/mindee_http_error_v2.py @@ -25,21 +25,20 @@ def __init__(self, status: int, detail: Optional[str]) -> None: class MindeeHTTPUnknownErrorV2(MindeeHTTPErrorV2): """HTTP error with unknown status code.""" - def __init__(self, status: int, detail: Optional[str]) -> None: + def __init__(self, detail: Optional[str]) -> None: super().__init__(-1, f"Couldn't deserialize server error. Found: {detail}") -def handle_error_v2(json_response: StringDict) -> None: +def handle_error_v2(raw_response: StringDict) -> None: """ Handles HTTP errors by raising MindeeHTTPErrorV2 exceptions with proper details. :raises MindeeHTTPErrorV2: If the response has been caught. :raises MindeeHTTPUnknownErrorV2: If the json return format is unreadable. """ - try: - raise MindeeHTTPErrorV2( - json_response["job"]["error"]["status"], - json_response["job"]["error"]["detail"], - ) - except Exception as exc: - raise MindeeHTTPUnknownErrorV2(-1, json.dumps(json_response, indent=2)) from exc + if "status" not in raw_response or "detail" not in raw_response: + raise MindeeHTTPUnknownErrorV2(json.dumps(raw_response, indent=2)) + raise MindeeHTTPErrorV2( + raw_response["status"], + raw_response["detail"], + ) diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py index 92042e53..56d9a621 100644 --- a/mindee/mindee_http/mindee_api_v2.py +++ b/mindee/mindee_http/mindee_api_v2.py @@ -1,10 +1,12 @@ +import os from typing import Dict, Optional import requests -from mindee import InferencePredictOptions -from mindee.error.mindee_error import MindeeApiError +from mindee.error.mindee_error import MindeeApiV2Error from mindee.input import LocalInputSource +from mindee.input.inference_predict_options import InferencePredictOptions +from mindee.logger import logger from mindee.mindee_http.base_settings import USER_AGENT from mindee.mindee_http.settings_mixin import SettingsMixin @@ -12,7 +14,7 @@ API_KEY_V2_DEFAULT = "" BASE_URL_ENV_NAME = "MINDEE_V2_BASE_URL" -BASE_URL_DEFAULT = "https://api-v2.mindee.com/v2" +BASE_URL_DEFAULT = "https://api-v2.mindee.net/v2" REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT" TIMEOUT_DEFAULT = 120 @@ -32,7 +34,7 @@ def __init__( ): self.api_key = api_key if not self.api_key or len(self.api_key) == 0: - raise MindeeApiError( + raise MindeeApiV2Error( ( f"Missing API key," " check your Client configuration.\n" @@ -40,16 +42,31 @@ def __init__( f"'{API_KEY_V2_ENV_NAME}' environment variable." ) ) + self.request_timeout = TIMEOUT_DEFAULT + self.set_base_url(BASE_URL_DEFAULT) + self.set_from_env() self.url_root = f"{self.base_url.rstrip('/')}" @property def base_headers(self) -> Dict[str, str]: """Base headers to send with all API requests.""" return { - "Authorization": f"Token {self.api_key}", + "Authorization": self.api_key or "", "User-Agent": USER_AGENT, } + def set_from_env(self) -> None: + """Set various parameters from environment variables, if present.""" + env_vars = { + BASE_URL_ENV_NAME: self.set_base_url, + REQUEST_TIMEOUT_ENV_NAME: self.set_timeout, + } + for name, func in env_vars.items(): + env_val = os.getenv(name, "") + if env_val: + func(env_val) + logger.debug("Value was set from env: %s", name) + def predict_async_req_post( self, input_source: LocalInputSource, @@ -64,7 +81,7 @@ def predict_async_req_post( :param close_file: Whether to `close()` the file after parsing it. :return: requests response. """ - data = {} + data = {"model_id": options.model_id} params = {} url = f"{self.url_root}/inferences/enqueue" @@ -77,7 +94,10 @@ def predict_async_req_post( if options.alias and len(options.alias): data["alias"] = options.alias - files = {"document": input_source.read_contents(close_file)} + files = { + "file": input_source.read_contents(close_file) + + (input_source.file_mimetype,) + } response = requests.post( url=url, files=files, diff --git a/mindee/mindee_http/response_validation_v2.py b/mindee/mindee_http/response_validation_v2.py index dd49792e..74e30d24 100644 --- a/mindee/mindee_http/response_validation_v2.py +++ b/mindee/mindee_http/response_validation_v2.py @@ -17,7 +17,11 @@ def is_valid_post_response(response: requests.Response) -> bool: response_json = json.loads(response.content) if not "job" in response_json: return False - if "job" in response_json and "error" in response_json["job"]: + if ( + "job" in response_json + and "error" in response_json["job"] + and response_json["job"]["error"] is not None + ): return False return True diff --git a/mindee/parsing/common/api_request.py b/mindee/parsing/common/api_request.py index 6a7bc0dc..b21d6387 100644 --- a/mindee/parsing/common/api_request.py +++ b/mindee/parsing/common/api_request.py @@ -21,9 +21,9 @@ class ApiRequest: """HTTP status code.""" url: str - def __init__(self, json_response: dict) -> None: - self.url = json_response["url"] - self.error = json_response["error"] - self.resources = json_response["resources"] - self.status = RequestStatus(json_response["status"]) - self.status_code = json_response["status_code"] + def __init__(self, raw_response: StringDict) -> None: + self.url = raw_response["url"] + self.error = raw_response["error"] + self.resources = raw_response["resources"] + self.status = RequestStatus(raw_response["status"]) + self.status_code = raw_response["status_code"] diff --git a/mindee/parsing/common/execution_file.py b/mindee/parsing/common/execution_file.py index 3c728ddb..ae9ad24a 100644 --- a/mindee/parsing/common/execution_file.py +++ b/mindee/parsing/common/execution_file.py @@ -12,6 +12,6 @@ class ExecutionFile: alias: Optional[str] """File name.""" - def __init__(self, json_response: StringDict): - self.name = json_response["name"] - self.alias = json_response["alias"] + def __init__(self, raw_response: StringDict): + self.name = raw_response["name"] + self.alias = raw_response["alias"] diff --git a/mindee/parsing/v2/base_field.py b/mindee/parsing/v2/base_field.py index e9332281..c877fc37 100644 --- a/mindee/parsing/v2/base_field.py +++ b/mindee/parsing/v2/base_field.py @@ -1,6 +1,6 @@ from typing import Dict, List, Union -from mindee.error.mindee_error import MindeeAPIV2Error +from mindee.error.mindee_error import MindeeApiV2Error from mindee.parsing.common.string_dict import StringDict @@ -14,7 +14,9 @@ def __init__(self, indent_level=0) -> None: self._indent_level = indent_level @staticmethod - def create_field(raw_response: StringDict, indent_level: int = 0) -> "BaseField": + def create_field( + raw_response: StringDict, indent_level: int = 0 + ) -> Union["ListField", "ObjectField", "SimpleField"]: """Factory function to create appropriate field instances.""" if isinstance(raw_response, dict): if "items" in raw_response: @@ -23,14 +25,14 @@ def create_field(raw_response: StringDict, indent_level: int = 0) -> "BaseField" return ObjectField(raw_response, indent_level) if "value" in raw_response: return SimpleField(raw_response, indent_level) - raise MindeeAPIV2Error("Unrecognized field format.") - raise MindeeAPIV2Error("Unrecognized field format.") + raise MindeeApiV2Error(f"Unrecognized field format in {raw_response}.") + raise MindeeApiV2Error(f"Unrecognized field format {raw_response}.") class ListField(BaseField): """List field containing multiple fields.""" - items: List[BaseField] + items: List[Union["ListField", "ObjectField", "SimpleField"]] """Items contained in the list.""" def __init__(self, raw_response: StringDict, indent_level: int = 0): @@ -40,23 +42,26 @@ def __init__(self, raw_response: StringDict, indent_level: int = 0): for item in raw_response["items"]: if isinstance(item, dict): self.items.append(BaseField.create_field(item, 1)) - raise MindeeAPIV2Error("Unrecognized field format.") + else: + raise MindeeApiV2Error(f"Unrecognized field format '{item}'.") class ObjectField(BaseField): """Object field containing multiple fields.""" - fields: Dict[str, BaseField] + fields: Dict[str, Union[ListField, "ObjectField", "SimpleField"]] """Fields contained in the object.""" def __init__(self, raw_response: StringDict, indent_level: int = 0): super().__init__(indent_level) - fields: Dict[str, BaseField] = {} - for field_key, field_value in raw_response.items(): + inner_fields = raw_response.get("fields", raw_response) + + self.fields: Dict[str, Union["ListField", "ObjectField", "SimpleField"]] = {} + for field_key, field_value in inner_fields.items(): if isinstance(field_value, dict): - fields[field_key] = BaseField.create_field(field_value, 1) + self.fields[field_key] = BaseField.create_field(field_value, 1) else: - raise MindeeAPIV2Error("Unrecognized field format.") + raise MindeeApiV2Error(f"Unrecognized field format '{field_value}'.") def __str__(self) -> str: out_str = "" @@ -72,7 +77,7 @@ class SimpleField(BaseField): def __init__(self, raw_response: StringDict, indent_level: int = 0): super().__init__(indent_level) - self.value = raw_response["value"] if "value" in raw_response else None + self.value = raw_response["value"] = raw_response.get("value", None) def __str__(self) -> str: return f"{' ' * self._indent_level}{self.value}\n" diff --git a/mindee/parsing/v2/inference_fields.py b/mindee/parsing/v2/inference_fields.py index d8f2c972..a38a26f6 100644 --- a/mindee/parsing/v2/inference_fields.py +++ b/mindee/parsing/v2/inference_fields.py @@ -1,7 +1,20 @@ from typing import Union -from mindee.parsing.v2.base_field import ListField, ObjectField, SimpleField +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.base_field import BaseField, ListField, ObjectField, SimpleField -class InferenceFields(dict[str, Union[ObjectField, ListField, SimpleField]]): +class InferenceFields(dict[str, Union[SimpleField, ObjectField, ListField]]): """Inference fields dict.""" + + def __init__(self, raw_response: StringDict) -> None: + super().__init__() + for key, value in raw_response.items(): + field_obj = BaseField.create_field(value, 0) + self[key] = field_obj + + def __getattr__(self, item): + try: + return self[item] + except KeyError: + raise AttributeError(item) from None diff --git a/mindee/parsing/v2/inference_file.py b/mindee/parsing/v2/inference_file.py index 5b175385..a23ad46c 100644 --- a/mindee/parsing/v2/inference_file.py +++ b/mindee/parsing/v2/inference_file.py @@ -1,3 +1,6 @@ +from mindee.parsing.common.string_dict import StringDict + + class InferenceFile: """Inference File info.""" @@ -6,6 +9,6 @@ class InferenceFile: alais: str """Alias of the file.""" - def __init__(self, json_response: dict) -> None: - self.name = json_response["name"] - self.alias = json_response["alias"] + def __init__(self, raw_response: StringDict) -> None: + self.name = raw_response["name"] + self.alias = raw_response["alias"] diff --git a/mindee/parsing/v2/inference_options.py b/mindee/parsing/v2/inference_options.py index 6ef1d9a7..59d98051 100644 --- a/mindee/parsing/v2/inference_options.py +++ b/mindee/parsing/v2/inference_options.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional from mindee.parsing.common.string_dict import StringDict @@ -6,7 +6,7 @@ class InferenceOptions: """Optional information about the document.""" - raw_text: List[str] + raw_text: Optional[List[str]] def __init__(self, raw_response: StringDict): - self.raw_text = raw_response["raw_text"] + self.raw_text = raw_response["raw_text"] if "raw_text" in raw_response else None diff --git a/mindee/parsing/v2/inference_result.py b/mindee/parsing/v2/inference_result.py index 52ff9439..3bf99ef3 100644 --- a/mindee/parsing/v2/inference_result.py +++ b/mindee/parsing/v2/inference_result.py @@ -1,3 +1,5 @@ +from typing import Optional + from mindee.parsing.common.string_dict import StringDict from mindee.parsing.v2.inference_fields import InferenceFields from mindee.parsing.v2.inference_options import InferenceOptions @@ -8,12 +10,16 @@ class InferenceResult: fields: InferenceFields """Fields contained in the inference.""" - options: InferenceOptions + options: Optional[InferenceOptions] """Potential options retrieved alongside the inference.""" - def __init__(self, json_response: StringDict) -> None: - self.fields = InferenceFields(json_response["fields"]) - self.options = InferenceOptions(json_response["options"]) + def __init__(self, raw_response: StringDict) -> None: + self.fields = InferenceFields(raw_response["fields"]) + self.options = ( + InferenceOptions(raw_response["options"]) + if raw_response.get("options") + else None + ) def __str__(self) -> str: str_fields = "" diff --git a/mindee/parsing/v2/job.py b/mindee/parsing/v2/job.py index d4f091d9..876608aa 100644 --- a/mindee/parsing/v2/job.py +++ b/mindee/parsing/v2/job.py @@ -11,16 +11,17 @@ class Job: """Error response if any.""" model_id: str """ID of the model.""" - file_name: str + filename: str """Name for the file.""" - file_alias: str + alias: str """Optional alias for the file.""" status: str """Status of the job.""" def __init__(self, raw_response: StringDict) -> None: + self.id = raw_response["id"] self.status = raw_response["status"] self.error = ErrorResponse(raw_response["error"]) self.model_id = raw_response["model_id"] - self.file_name = raw_response["file_name"] - self.file_alias = raw_response["file_alias"] + self.filename = raw_response["filename"] + self.alias = raw_response["alias"] diff --git a/pyproject.toml b/pyproject.toml index 0e3d7b14..fd6a05e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -127,7 +127,8 @@ junit_family = "xunit2" markers = [ "regression: marks tests as regression tests - select with '-m regression'", "lineitems: debug line items", - "integration: integration tests that send calls to the API - select with '-m integration'" + "integration: integration tests that send calls to the API - select with '-m integration'", + "v2: Tests specific to version 2 of the API" ] testpaths = [ "tests", diff --git a/tests/test_client_v2.py b/tests/test_client_v2.py new file mode 100644 index 00000000..51127798 --- /dev/null +++ b/tests/test_client_v2.py @@ -0,0 +1,68 @@ +import pytest + +from mindee import ClientV2, InferencePredictOptions, LocalResponse +from mindee.error.mindee_error import MindeeApiV2Error +from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2 +from mindee.input import LocalInputSource +from tests.test_inputs import FILE_TYPES_DIR, V2_DATA_DIR +from tests.utils import dummy_envvars + + +@pytest.fixture +def env_client(monkeypatch) -> ClientV2: + dummy_envvars(monkeypatch) + return ClientV2("dummy") + + +@pytest.fixture +def custom_base_url_client(monkeypatch) -> ClientV2: + class _FakeResp: + status_code = 400 # any non-2xx will do + ok = False + + def json(self): + # Shape must match what handle_error_v2 expects + return { + "error": {"status": -1, "detail": "forced failure from test"}, + } + + monkeypatch.setenv("MINDEE_V2_BASE_URL", "https://dummy-url") + + def _fake_post(*args, **kwargs): + return _FakeResp() + + monkeypatch.setattr( + "mindee.mindee_http.mindee_api_v2.requests.post", + _fake_post, + raising=True, + ) + + return ClientV2("dummy") + + +@pytest.mark.v2 +def test_parse_path_without_token(): + with pytest.raises(MindeeApiV2Error): + ClientV2() + + +@pytest.mark.v2 +def test_parse_path_with_env_token(custom_base_url_client): + assert custom_base_url_client.mindee_api.base_url == "https://dummy-url" + assert custom_base_url_client.mindee_api.url_root == "https://dummy-url" + input_doc: LocalInputSource = custom_base_url_client.source_from_path( + f"{FILE_TYPES_DIR}/receipt.jpg" + ) + with pytest.raises(MindeeHTTPErrorV2): + custom_base_url_client.enqueue( + input_doc, InferencePredictOptions("dummy-model") + ) + + +@pytest.mark.v2 +def test_loads_from_prediction(env_client): + input_inference = LocalResponse( + V2_DATA_DIR / "products" / "financial_document" / "complete.json" + ) + prediction = env_client.load_inference(input_inference) + assert prediction.inference.model.id == "12345678-1234-1234-1234-123456789abc" diff --git a/tests/test_client_v2_integration.py b/tests/test_client_v2_integration.py new file mode 100644 index 00000000..254d75dc --- /dev/null +++ b/tests/test_client_v2_integration.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +import os +from pathlib import Path + +import pytest + +from mindee import ClientV2, InferencePredictOptions +from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2 +from mindee.parsing.v2.inference_response import InferenceResponse +from tests.test_inputs import FILE_TYPES_DIR, PRODUCT_DATA_DIR + + +@pytest.fixture(scope="session") +def findoc_model_id() -> str: + """Identifier of the Financial Document model, supplied through an env var.""" + return os.getenv("MINDEE_V2_FINDOC_MODEL_ID") + + +@pytest.fixture(scope="session") +def v2_client() -> ClientV2: + """ + Real V2 client configured with the user-supplied API key + (or skipped when the key is absent). + """ + api_key = os.getenv("MINDEE_V2_API_KEY") + return ClientV2(api_key) + + +@pytest.mark.integration +@pytest.mark.v2 +def test_parse_file_empty_multiple_pages_must_succeed( + v2_client: ClientV2, findoc_model_id: str +) -> None: + """ + Upload a 2-page blank PDF and make sure the returned inference contains the + file & model metadata. + """ + input_path: Path = FILE_TYPES_DIR / "pdf" / "multipage_cut-2.pdf" + assert input_path.exists(), f"sample file missing: {input_path}" + + input_doc = v2_client.source_from_path(input_path) + options = InferencePredictOptions(findoc_model_id) + + response: InferenceResponse = v2_client.enqueue_and_parse(input_doc, options, False) + + assert response is not None + assert response.inference is not None + + assert response.inference.file is not None + assert response.inference.file.name == "multipage_cut-2.pdf" + + assert response.inference.model is not None + assert response.inference.model.id == findoc_model_id + + +@pytest.mark.integration +@pytest.mark.v2 +def test_parse_file_filled_single_page_must_succeed( + v2_client: ClientV2, findoc_model_id: str +) -> None: + """ + Upload a filled single-page JPEG and verify that common fields are present. + """ + input_path: Path = PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg" + assert input_path.exists(), f"sample file missing: {input_path}" + + input_doc = v2_client.source_from_path(input_path) + options = InferencePredictOptions(findoc_model_id) + + response: InferenceResponse = v2_client.enqueue_and_parse(input_doc, options) + + assert response is not None + assert response.inference is not None + + assert response.inference.file is not None + assert response.inference.file.name == "default_sample.jpg" + + assert response.inference.model is not None + assert response.inference.model.id == findoc_model_id + + assert response.inference.result is not None + supplier_name = response.inference.result.fields["supplier_name"] + assert supplier_name is not None + assert supplier_name.simple_field.value == "John Smith" + + +@pytest.mark.integration +@pytest.mark.v2 +def test_invalid_uuid_must_throw_error_422(v2_client: ClientV2) -> None: + """ + Using an invalid model identifier must trigger a 422 HTTP error. + """ + input_path: Path = FILE_TYPES_DIR / "pdf" / "multipage_cut-2.pdf" + assert input_path.exists() + + input_doc = v2_client.source_from_path(input_path) + options = InferencePredictOptions("INVALID MODEL ID") + + with pytest.raises(MindeeHTTPErrorV2) as exc_info: + v2_client.enqueue(input_doc, options) + + exc: MindeeHTTPErrorV2 = exc_info.value + assert exc.status == 422 diff --git a/tests/test_inputs.py b/tests/test_inputs.py index 9eaa84c9..3c3b0d69 100644 --- a/tests/test_inputs.py +++ b/tests/test_inputs.py @@ -15,7 +15,7 @@ from tests.product import PRODUCT_DATA_DIR FILE_TYPES_DIR = Path("./tests/data/file_types") - +V2_DATA_DIR = Path("./tests/data/v2") # # PDF diff --git a/tests/v2/__init__.py b/tests/v2/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/v2/test_inference_response.py b/tests/v2/test_inference_response.py new file mode 100644 index 00000000..8964ba40 --- /dev/null +++ b/tests/v2/test_inference_response.py @@ -0,0 +1,160 @@ +import pytest + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2 import ( + Inference, + InferenceResponse, + InferenceResult, + ListField, + ObjectField, + SimpleField, +) + + +@pytest.fixture +def inference_json() -> StringDict: + return { + "inference": { + "model": {"id": "test-model-id"}, + "file": {"name": "test-file-name.jpg", "alias": None}, + "result": { + "fields": { + "field_simple": {"value": "value_1"}, + "field_object": { + "fields": { + "sub_object_simple": {"value": "value_2"}, + "sub_object_list": { + "items": [ + { + "fields": { + "sub_object_list_sub_list_simple": { + "value": "value_3" + } + } + }, + { + "fields": { + "sub_object_list_sub_list_object_subobject_1": { + "value": "value_4" + }, + "sub_object_list_sub_list_object_subobject_2": { + "value": "value_5" + }, + } + }, + ] + }, + "sub_object_object": { + "fields": { + "sub_object_object_sub_object_simple": { + "value": "value_6" + }, + "sub_object_object_sub_object_object": { + "fields": { + "sub_object_object_sub_object_object_simple_1": { + "value": "value_7" + }, + "sub_object_object_sub_object_object_simple_2": { + "value": "value_8" + }, + } + }, + "sub_object_object_sub_object_list": { + "items": [ + { + "fields": { + "sub_object_object_sub_object_list_simple": { + "value": "value_9" + }, + "sub_object_object_sub_object_list_object": { + "fields": { + "sub_object_object_sub_object_list_object_subobject_1": { + "value": "value_10" + }, + "sub_object_object_sub_object_list_object_subobject_2": { + "value": "value_11" + }, + } + }, + } + } + ] + }, + } + }, + } + }, + }, + "options": None, + }, + } + } + + +@pytest.mark.v2 +def test_inference(inference_json): + inference_result = InferenceResponse(inference_json) + assert isinstance(inference_result.inference, Inference) + assert isinstance( + inference_result.inference.result.fields.field_simple, SimpleField + ) + assert isinstance( + inference_result.inference.result.fields.field_object, ObjectField + ) + assert isinstance( + inference_result.inference.result.fields.field_object.fields["sub_object_list"], + ListField, + ) + assert isinstance( + inference_result.inference.result.fields.field_object.fields[ + "sub_object_object" + ], + ObjectField, + ) + assert isinstance( + inference_result.inference.result.fields.field_object.fields[ + "sub_object_object" + ].fields, + dict, + ) + assert isinstance( + inference_result.inference.result.fields.field_object.fields[ + "sub_object_object" + ].fields["sub_object_object_sub_object_list"], + ListField, + ) + assert isinstance( + inference_result.inference.result.fields.field_object.fields[ + "sub_object_object" + ] + .fields["sub_object_object_sub_object_list"] + .items, + list, + ) + assert isinstance( + inference_result.inference.result.fields.field_object.fields[ + "sub_object_object" + ] + .fields["sub_object_object_sub_object_list"] + .items[0], + ObjectField, + ) + assert isinstance( + inference_result.inference.result.fields.field_object.fields[ + "sub_object_object" + ] + .fields["sub_object_object_sub_object_list"] + .items[0] + .fields["sub_object_object_sub_object_list_simple"], + SimpleField, + ) + assert ( + inference_result.inference.result.fields.field_object.fields[ + "sub_object_object" + ] + .fields["sub_object_object_sub_object_list"] + .items[0] + .fields["sub_object_object_sub_object_list_simple"] + .value + == "value_9" + ) From 440fe3fb09a684bbcd99eb39fa30733bce40de39 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 3 Jul 2025 11:38:06 +0200 Subject: [PATCH 03/17] fix polling & add tests --- mindee/client_v2.py | 4 +- mindee/mindee_http/mindee_api_v2.py | 15 +++----- mindee/mindee_http/response_validation_v2.py | 2 +- mindee/parsing/v2/inference.py | 5 +++ mindee/parsing/v2/inference_response.py | 1 + mindee/parsing/v2/polling_response.py | 1 + tests/test_client_v2_integration.py | 4 +- tests/v2/test_inference_response.py | 39 ++++++++++++++++++-- 8 files changed, 52 insertions(+), 19 deletions(-) diff --git a/mindee/client_v2.py b/mindee/client_v2.py index 195f8ab3..92536629 100644 --- a/mindee/client_v2.py +++ b/mindee/client_v2.py @@ -91,14 +91,14 @@ def parse_queued( :param queue_id: queue_id received from the API. """ - logger.debug("Fetching from queue ''%s", queue_id) + logger.debug("Fetching from queue '%s'.", queue_id) response = self.mindee_api.document_queue_req_get(queue_id) if not is_valid_get_response(response): handle_error_v2(response.json()) dict_response = response.json() - if dict_response.get("job"): + if "job" in dict_response: return PollingResponse(dict_response) return InferenceResponse(dict_response) diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py index 56d9a621..60aee4d3 100644 --- a/mindee/mindee_http/mindee_api_v2.py +++ b/mindee/mindee_http/mindee_api_v2.py @@ -82,28 +82,23 @@ def predict_async_req_post( :return: requests response. """ data = {"model_id": options.model_id} - params = {} url = f"{self.url_root}/inferences/enqueue" if options.full_text: - params["full_text_ocr"] = "true" + data["full_text_ocr"] = "true" if options.rag: - params["rag"] = "true" + data["rag"] = "true" if options.webhook_ids and len(options.webhook_ids) > 0: - params["webhook_ids"] = ",".join(options.webhook_ids) + data["webhook_ids"] = ",".join(options.webhook_ids) if options.alias and len(options.alias): data["alias"] = options.alias - files = { - "file": input_source.read_contents(close_file) - + (input_source.file_mimetype,) - } + files = {"file": input_source.read_contents(close_file)} response = requests.post( url=url, files=files, headers=self.base_headers, data=data, - params=params, timeout=self.request_timeout, ) @@ -116,7 +111,7 @@ def document_queue_req_get(self, queue_id: str) -> requests.Response: :param queue_id: queue_id received from the API """ return requests.get( - f"{self.url_root}/inferences/{queue_id}", + f"{self.url_root}/jobs/{queue_id}", headers=self.base_headers, timeout=self.request_timeout, ) diff --git a/mindee/mindee_http/response_validation_v2.py b/mindee/mindee_http/response_validation_v2.py index 74e30d24..606f3037 100644 --- a/mindee/mindee_http/response_validation_v2.py +++ b/mindee/mindee_http/response_validation_v2.py @@ -36,6 +36,6 @@ def is_valid_get_response(response: requests.Response) -> bool: if not is_valid_sync_response(response): return False response_json = json.loads(response.content) - if not "inference" in response_json: + if not "inference" in response_json and not "job" in response_json: return False return True diff --git a/mindee/parsing/v2/inference.py b/mindee/parsing/v2/inference.py index 97dfcfe8..f87d45ab 100644 --- a/mindee/parsing/v2/inference.py +++ b/mindee/parsing/v2/inference.py @@ -1,3 +1,5 @@ +from typing import Optional + from mindee.parsing.common.string_dict import StringDict from mindee.parsing.v2.inference_file import InferenceFile from mindee.parsing.v2.inference_model import InferenceModel @@ -13,11 +15,14 @@ class Inference: """File info for the inference.""" result: InferenceResult """Result of the inference.""" + id: Optional[str] + """ID of the inference.""" def __init__(self, raw_response: StringDict): self.model = InferenceModel(raw_response["model"]) self.file = InferenceFile(raw_response["file"]) self.result = InferenceResult(raw_response["result"]) + self.id = raw_response["id"] if "id" in raw_response else None def __str__(self) -> str: return ( diff --git a/mindee/parsing/v2/inference_response.py b/mindee/parsing/v2/inference_response.py index e799258c..347bd6ab 100644 --- a/mindee/parsing/v2/inference_response.py +++ b/mindee/parsing/v2/inference_response.py @@ -7,6 +7,7 @@ class InferenceResponse(CommonResponse): """Represent an inference response from Mindee V2 API.""" inference: Inference + """Inference result.""" def __init__(self, raw_response: StringDict) -> None: super().__init__(raw_response) diff --git a/mindee/parsing/v2/polling_response.py b/mindee/parsing/v2/polling_response.py index d5fb7690..929e3db3 100644 --- a/mindee/parsing/v2/polling_response.py +++ b/mindee/parsing/v2/polling_response.py @@ -7,6 +7,7 @@ class PollingResponse(CommonResponse): """Represent an inference response from Mindee V2 API.""" job: Job + """Job for the polling.""" def __init__(self, raw_response: StringDict) -> None: super().__init__(raw_response) diff --git a/tests/test_client_v2_integration.py b/tests/test_client_v2_integration.py index 254d75dc..01cd526c 100644 --- a/tests/test_client_v2_integration.py +++ b/tests/test_client_v2_integration.py @@ -42,7 +42,7 @@ def test_parse_file_empty_multiple_pages_must_succeed( input_doc = v2_client.source_from_path(input_path) options = InferencePredictOptions(findoc_model_id) - response: InferenceResponse = v2_client.enqueue_and_parse(input_doc, options, False) + response: InferenceResponse = v2_client.enqueue_and_parse(input_doc, options) assert response is not None assert response.inference is not None @@ -82,7 +82,7 @@ def test_parse_file_filled_single_page_must_succeed( assert response.inference.result is not None supplier_name = response.inference.result.fields["supplier_name"] assert supplier_name is not None - assert supplier_name.simple_field.value == "John Smith" + assert supplier_name.value == "John Smith" @pytest.mark.integration diff --git a/tests/v2/test_inference_response.py b/tests/v2/test_inference_response.py index 8964ba40..65fd3bee 100644 --- a/tests/v2/test_inference_response.py +++ b/tests/v2/test_inference_response.py @@ -1,18 +1,23 @@ +import json + import pytest +from mindee import ClientV2, LocalResponse from mindee.parsing.common.string_dict import StringDict from mindee.parsing.v2 import ( Inference, + InferenceFile, + InferenceModel, InferenceResponse, - InferenceResult, ListField, ObjectField, SimpleField, ) +from tests.test_inputs import V2_DATA_DIR @pytest.fixture -def inference_json() -> StringDict: +def inference_result_json() -> StringDict: return { "inference": { "model": {"id": "test-model-id"}, @@ -92,8 +97,8 @@ def inference_json() -> StringDict: @pytest.mark.v2 -def test_inference(inference_json): - inference_result = InferenceResponse(inference_json) +def test_inference_response(inference_result_json): + inference_result = InferenceResponse(inference_result_json) assert isinstance(inference_result.inference, Inference) assert isinstance( inference_result.inference.result.fields.field_simple, SimpleField @@ -158,3 +163,29 @@ def test_inference(inference_json): .value == "value_9" ) + + +@pytest.mark.v2 +def test_full_inference_response(): + client_v2 = ClientV2("dummy") + load_response = client_v2.load_inference( + LocalResponse(V2_DATA_DIR / "products" / "financial_document" / "complete.json") + ) + + assert isinstance(load_response.inference, Inference) + assert load_response.inference.id == "12345678-1234-1234-1234-123456789abc" + assert isinstance(load_response.inference.result.fields.date, SimpleField) + assert load_response.inference.result.fields.date.value == "2019-11-02" + assert isinstance(load_response.inference.result.fields.taxes, ListField) + assert isinstance(load_response.inference.result.fields.taxes.items[0], ObjectField) + assert ( + load_response.inference.result.fields.taxes.items[0].fields["base"].value + == 31.5 + ) + + assert isinstance(load_response.inference.model, InferenceModel) + assert load_response.inference.model.id == "12345678-1234-1234-1234-123456789abc" + + assert isinstance(load_response.inference.file, InferenceFile) + assert load_response.inference.file.name == "complete.jpg" + assert load_response.inference.file.alias == None From 11cac59df4428ecec67fb3f3a767697b9daa202b Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 3 Jul 2025 12:30:31 +0200 Subject: [PATCH 04/17] fix python3.8 syntax compatibility --- mindee/parsing/v2/inference_fields.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mindee/parsing/v2/inference_fields.py b/mindee/parsing/v2/inference_fields.py index a38a26f6..c7408169 100644 --- a/mindee/parsing/v2/inference_fields.py +++ b/mindee/parsing/v2/inference_fields.py @@ -1,10 +1,11 @@ -from typing import Union +from __future__ import annotations +from typing import Union, Dict from mindee.parsing.common.string_dict import StringDict from mindee.parsing.v2.base_field import BaseField, ListField, ObjectField, SimpleField -class InferenceFields(dict[str, Union[SimpleField, ObjectField, ListField]]): +class InferenceFields(Dict[str, Union[SimpleField, ObjectField, ListField]]): """Inference fields dict.""" def __init__(self, raw_response: StringDict) -> None: From c64c9ca71029e684904d23d8230fcab834a4a794 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 3 Jul 2025 15:53:14 +0200 Subject: [PATCH 05/17] add tests & fix miscellaneous naming issues --- docs/extras/code_samples/default_v2.txt | 25 ++++++++++++++++++++++++ mindee/client_v2.py | 2 +- mindee/mindee_http/mindee_api_v2.py | 2 +- mindee/parsing/v2/base_field.py | 26 +++++++++++++++++++++---- mindee/parsing/v2/inference.py | 4 ++-- mindee/parsing/v2/inference_fields.py | 9 ++++++++- mindee/parsing/v2/inference_result.py | 8 ++++---- tests/test_code_samples.sh | 19 ++++++++++++++++++ 8 files changed, 82 insertions(+), 13 deletions(-) create mode 100644 docs/extras/code_samples/default_v2.txt diff --git a/docs/extras/code_samples/default_v2.txt b/docs/extras/code_samples/default_v2.txt new file mode 100644 index 00000000..3adf10fc --- /dev/null +++ b/docs/extras/code_samples/default_v2.txt @@ -0,0 +1,25 @@ +# +# Install the Python client library by running: +# pip install mindee +# + +from mindee import ClientV2, InferencePredictOptions +from mindee.parsing.v2 import InferenceResponse, PollingResponse +from tests.product import PRODUCT_DATA_DIR + +input_path = "/path/to/the/file.ext" +api_key = "MY_API_KEY" +model_id = "MY_MODEL_ID" + +# Init a new client +mindee_client = ClientV2(api_key) + +# Load a file from disk +input_doc = mindee_client.source_from_path(input_path) +options = InferencePredictOptions(model_id=model_id) + +# Parse the file. +response: InferenceResponse = mindee_client.enqueue_and_parse(input_doc, options) + +# Print a brief summary of the parsed data +print(response.inference) diff --git a/mindee/client_v2.py b/mindee/client_v2.py index 92536629..41e10139 100644 --- a/mindee/client_v2.py +++ b/mindee/client_v2.py @@ -93,7 +93,7 @@ def parse_queued( """ logger.debug("Fetching from queue '%s'.", queue_id) - response = self.mindee_api.document_queue_req_get(queue_id) + response = self.mindee_api.get_inference_from_queue(queue_id) if not is_valid_get_response(response): handle_error_v2(response.json()) diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py index 60aee4d3..36d35fb1 100644 --- a/mindee/mindee_http/mindee_api_v2.py +++ b/mindee/mindee_http/mindee_api_v2.py @@ -104,7 +104,7 @@ def predict_async_req_post( return response - def document_queue_req_get(self, queue_id: str) -> requests.Response: + def get_inference_from_queue(self, queue_id: str) -> requests.Response: """ Sends a request matching a given queue_id. Returns either a Job or a Document. diff --git a/mindee/parsing/v2/base_field.py b/mindee/parsing/v2/base_field.py index c877fc37..f199265b 100644 --- a/mindee/parsing/v2/base_field.py +++ b/mindee/parsing/v2/base_field.py @@ -41,10 +41,16 @@ def __init__(self, raw_response: StringDict, indent_level: int = 0): self.items = [] for item in raw_response["items"]: if isinstance(item, dict): - self.items.append(BaseField.create_field(item, 1)) + self.items.append(BaseField.create_field(item, self._indent_level + 2)) else: raise MindeeApiV2Error(f"Unrecognized field format '{item}'.") + def __str__(self) -> str: + out_str = "" + for item in self.items: + out_str += f"* {str(item)[2:] if item else ''}" + return "\n" + out_str if out_str else "" + class ObjectField(BaseField): """Object field containing multiple fields.""" @@ -59,14 +65,26 @@ def __init__(self, raw_response: StringDict, indent_level: int = 0): self.fields: Dict[str, Union["ListField", "ObjectField", "SimpleField"]] = {} for field_key, field_value in inner_fields.items(): if isinstance(field_value, dict): - self.fields[field_key] = BaseField.create_field(field_value, 1) + self.fields[field_key] = BaseField.create_field( + field_value, self._indent_level + 1 + ) else: raise MindeeApiV2Error(f"Unrecognized field format '{field_value}'.") def __str__(self) -> str: out_str = "" for field_key, field_value in self.fields.items(): - out_str += f"{' ' * self._indent_level}:{field_key}: {field_value}\n" + if isinstance(field_value, ListField): + value_str = "" + if len(field_value.items) > 0: + value_str = ( + " " * self._indent_level + str(field_value) + if field_value + else "" + ) + out_str += f"{' ' * self._indent_level}:{field_key}: {value_str}" + else: + out_str += f"{' ' * self._indent_level}:{field_key}: {field_value if field_value else ''}" return out_str @@ -80,4 +98,4 @@ def __init__(self, raw_response: StringDict, indent_level: int = 0): self.value = raw_response["value"] = raw_response.get("value", None) def __str__(self) -> str: - return f"{' ' * self._indent_level}{self.value}\n" + return f"{self.value}\n" if self.value else "\n" diff --git a/mindee/parsing/v2/inference.py b/mindee/parsing/v2/inference.py index f87d45ab..becfcaf0 100644 --- a/mindee/parsing/v2/inference.py +++ b/mindee/parsing/v2/inference.py @@ -30,9 +30,9 @@ def __str__(self) -> str: f"#########\n" f":Model: {self.model.id}\n" f":File: {self.file}\n" - f" :Name: {self.file.name}\n\n" + f" :Name: {self.file.name}\n" f" :Alias: {self.file.alias}\n\n" f"Result\n" f"======\n" - f"\n{self.result}\n" + f"{self.result}\n" ) diff --git a/mindee/parsing/v2/inference_fields.py b/mindee/parsing/v2/inference_fields.py index c7408169..dfbcfb9a 100644 --- a/mindee/parsing/v2/inference_fields.py +++ b/mindee/parsing/v2/inference_fields.py @@ -1,5 +1,6 @@ from __future__ import annotations -from typing import Union, Dict + +from typing import Dict, Union from mindee.parsing.common.string_dict import StringDict from mindee.parsing.v2.base_field import BaseField, ListField, ObjectField, SimpleField @@ -19,3 +20,9 @@ def __getattr__(self, item): return self[item] except KeyError: raise AttributeError(item) from None + + def __str__(self) -> str: + str_fields = "" + for field_key, field_value in self.items(): + str_fields += f":{field_key}: {field_value}" + return str_fields diff --git a/mindee/parsing/v2/inference_result.py b/mindee/parsing/v2/inference_result.py index 3bf99ef3..2f6ad911 100644 --- a/mindee/parsing/v2/inference_result.py +++ b/mindee/parsing/v2/inference_result.py @@ -22,7 +22,7 @@ def __init__(self, raw_response: StringDict) -> None: ) def __str__(self) -> str: - str_fields = "" - for field_key, field_value in self.fields.items(): - str_fields += f" :{field_key}: {field_value}\n" - return f":fields: {str_fields}\n" f"options: {self.options}\n" + out_str = f":fields: {self.fields}" + if self.options: + out_str += f"\n:options: {self.options}" + return out_str diff --git a/tests/test_code_samples.sh b/tests/test_code_samples.sh index 7074088d..81f7bf83 100755 --- a/tests/test_code_samples.sh +++ b/tests/test_code_samples.sh @@ -5,9 +5,19 @@ OUTPUT_FILE='./_test.py' ACCOUNT=$1 ENDPOINT=$2 API_KEY=$3 +API_KEY_V2=$4 +MODEL_ID=$5 for f in $(find ./docs/extras/code_samples -maxdepth 1 -name "*.txt" -not -name "workflow_*.txt" | sort -h) do + if echo "${f}" | grep -q "default_v2.txt"; then + if [ -z "${API_KEY_V2}" ] || [ -z "${MODEL_ID}" ]; then + echo "Skipping ${f} (API_KEY_V2 or MODEL_ID not supplied)" + echo + continue + fi + fi + echo echo "###############################################" echo "${f}" @@ -17,6 +27,15 @@ do sed "s/my-api-key/${API_KEY}/" "${f}" > $OUTPUT_FILE sed -i 's/\/path\/to\/the\/file.ext/.\/tests\/data\/file_types\/pdf\/blank_1.pdf/' $OUTPUT_FILE + + if echo "${f}" | grep -q "default_v2.txt" + then + sed -i "s/MY_API_KEY/$API_KEY_V2/" $OUTPUT_FILE + sed -i "s/MY_MODEL_ID/$MODEL_ID/" $OUTPUT_FILE + else + sed -i "s/my-api-key/$API_KEY/" $OUTPUT_FILE + fi + if echo "$f" | grep -q "custom_v1.txt" then sed -i "s/my-account/$ACCOUNT/g" $OUTPUT_FILE From d9c9c4b207f6ed67dc63a160dd1232210b44c869 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 3 Jul 2025 16:04:27 +0200 Subject: [PATCH 06/17] restore proper coverage --- tests/test_client_v2.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/tests/test_client_v2.py b/tests/test_client_v2.py index 51127798..938e4b95 100644 --- a/tests/test_client_v2.py +++ b/tests/test_client_v2.py @@ -28,12 +28,18 @@ def json(self): monkeypatch.setenv("MINDEE_V2_BASE_URL", "https://dummy-url") - def _fake_post(*args, **kwargs): + def _fake_response(*args, **kwargs): return _FakeResp() monkeypatch.setattr( "mindee.mindee_http.mindee_api_v2.requests.post", - _fake_post, + _fake_response, + raising=True, + ) + + monkeypatch.setattr( + "mindee.mindee_http.mindee_api_v2.requests.get", + _fake_response, raising=True, ) @@ -47,9 +53,10 @@ def test_parse_path_without_token(): @pytest.mark.v2 -def test_parse_path_with_env_token(custom_base_url_client): +def test_enqueue_path_with_env_token(custom_base_url_client): assert custom_base_url_client.mindee_api.base_url == "https://dummy-url" assert custom_base_url_client.mindee_api.url_root == "https://dummy-url" + assert custom_base_url_client.mindee_api.api_key == "dummy" input_doc: LocalInputSource = custom_base_url_client.source_from_path( f"{FILE_TYPES_DIR}/receipt.jpg" ) @@ -59,6 +66,23 @@ def test_parse_path_with_env_token(custom_base_url_client): ) +@pytest.mark.v2 +def test_enqueue_and_parse_path_with_env_token(custom_base_url_client): + input_doc: LocalInputSource = custom_base_url_client.source_from_path( + f"{FILE_TYPES_DIR}/receipt.jpg" + ) + with pytest.raises(MindeeHTTPErrorV2): + custom_base_url_client.enqueue_and_parse( + input_doc, InferencePredictOptions("dummy-model") + ) + + +@pytest.mark.v2 +def test_parse_queued6_and_parse_path_with_env_token(custom_base_url_client): + with pytest.raises(MindeeHTTPErrorV2): + custom_base_url_client.parse_queued("dummy-queue") + + @pytest.mark.v2 def test_loads_from_prediction(env_client): input_inference = LocalResponse( From 8a44f70bb9fe6c094607b431c8a2065db1d03dd1 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 3 Jul 2025 16:25:04 +0200 Subject: [PATCH 07/17] it do be coveraging --- tests/test_client_v2.py | 22 ++++++++++++++++++---- tests/v2/test_inference_response.py | 11 +++++++++-- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/tests/test_client_v2.py b/tests/test_client_v2.py index 938e4b95..b06bd772 100644 --- a/tests/test_client_v2.py +++ b/tests/test_client_v2.py @@ -3,7 +3,8 @@ from mindee import ClientV2, InferencePredictOptions, LocalResponse from mindee.error.mindee_error import MindeeApiV2Error from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2 -from mindee.input import LocalInputSource +from mindee.input import LocalInputSource, PathInput +from mindee.mindee_http.base_settings import USER_AGENT from tests.test_inputs import FILE_TYPES_DIR, V2_DATA_DIR from tests.utils import dummy_envvars @@ -22,9 +23,7 @@ class _FakeResp: def json(self): # Shape must match what handle_error_v2 expects - return { - "error": {"status": -1, "detail": "forced failure from test"}, - } + return {"status": -1, "detail": "forced failure from test"} monkeypatch.setenv("MINDEE_V2_BASE_URL", "https://dummy-url") @@ -57,6 +56,8 @@ def test_enqueue_path_with_env_token(custom_base_url_client): assert custom_base_url_client.mindee_api.base_url == "https://dummy-url" assert custom_base_url_client.mindee_api.url_root == "https://dummy-url" assert custom_base_url_client.mindee_api.api_key == "dummy" + assert custom_base_url_client.mindee_api.base_headers["Authorization"] == "dummy" + assert custom_base_url_client.mindee_api.base_headers["User-Agent"] == USER_AGENT input_doc: LocalInputSource = custom_base_url_client.source_from_path( f"{FILE_TYPES_DIR}/receipt.jpg" ) @@ -90,3 +91,16 @@ def test_loads_from_prediction(env_client): ) prediction = env_client.load_inference(input_inference) assert prediction.inference.model.id == "12345678-1234-1234-1234-123456789abc" + + +@pytest.mark.v2 +def test_error_handling(custom_base_url_client): + with pytest.raises(MindeeHTTPErrorV2) as e: + custom_base_url_client.enqueue( + PathInput( + V2_DATA_DIR / "products" / "financial_document" / "default_sample.jpg" + ), + InferencePredictOptions("dummy-model"), + ) + assert e.status_code == -1 + assert e.detail == "forced failure from test" diff --git a/tests/v2/test_inference_response.py b/tests/v2/test_inference_response.py index 65fd3bee..0c10219f 100644 --- a/tests/v2/test_inference_response.py +++ b/tests/v2/test_inference_response.py @@ -90,7 +90,9 @@ def inference_result_json() -> StringDict: } }, }, - "options": None, + "options": { + "raw_text": ["toto", "tata", "titi"], + }, }, } } @@ -164,6 +166,10 @@ def test_inference_response(inference_result_json): == "value_9" ) + assert inference_result.inference.result.options + assert len(inference_result.inference.result.options.raw_text) == 3 + assert inference_result.inference.result.options.raw_text[0] == "toto" + @pytest.mark.v2 def test_full_inference_response(): @@ -188,4 +194,5 @@ def test_full_inference_response(): assert isinstance(load_response.inference.file, InferenceFile) assert load_response.inference.file.name == "complete.jpg" - assert load_response.inference.file.alias == None + assert not load_response.inference.file.alias + assert not load_response.inference.result.options From f7dc7f28a1eff1bfe267e9d7ecba217acb68c253 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 3 Jul 2025 16:45:55 +0200 Subject: [PATCH 08/17] add even more coverage idk --- mindee/parsing/v2/error_response.py | 10 ++++- mindee/parsing/v2/job.py | 21 ++++++++- mindee/parsing/v2/webhook.py | 7 ++- tests/test_client_v2.py | 70 ++++++++++++++++++++++++----- 4 files changed, 91 insertions(+), 17 deletions(-) diff --git a/mindee/parsing/v2/error_response.py b/mindee/parsing/v2/error_response.py index 6749f73f..b9f7660e 100644 --- a/mindee/parsing/v2/error_response.py +++ b/mindee/parsing/v2/error_response.py @@ -1,11 +1,17 @@ -class ErrorResponse(RuntimeError): +from mindee.parsing.common.string_dict import StringDict + + +class ErrorResponse: """Error response info.""" detail: str """Detail relevant to the error.""" - status: int """Http error code.""" + def __init__(self, raw_response: StringDict): + self.detail = raw_response["detail"] + self.status = raw_response["status"] + def __str__(self): return f"HTTP Status: {self.status} - {self.detail}" diff --git a/mindee/parsing/v2/job.py b/mindee/parsing/v2/job.py index 876608aa..53d21c91 100644 --- a/mindee/parsing/v2/job.py +++ b/mindee/parsing/v2/job.py @@ -1,3 +1,6 @@ +from datetime import datetime +from typing import List, Optional + from mindee.parsing.common.string_dict import StringDict from mindee.parsing.v2.error_response import ErrorResponse @@ -7,7 +10,7 @@ class Job: id: str """Job ID.""" - error: ErrorResponse + error: Optional[ErrorResponse] """Error response if any.""" model_id: str """ID of the model.""" @@ -17,11 +20,25 @@ class Job: """Optional alias for the file.""" status: str """Status of the job.""" + polling_url: str + """URL to poll for the job status.""" + result_url: Optional[str] + """URL to poll for the job result, redirects to the result if available.""" + webhooks: List[str] + """ID of webhooks associated with the job.""" def __init__(self, raw_response: StringDict) -> None: self.id = raw_response["id"] self.status = raw_response["status"] - self.error = ErrorResponse(raw_response["error"]) + self.error = ( + ErrorResponse(raw_response["error"]) if raw_response["error"] else None + ) + self.created_at = datetime.fromisoformat( + raw_response["created_at"].replace("Z", "+00:00") + ) self.model_id = raw_response["model_id"] + self.polling_url = raw_response["polling_url"] self.filename = raw_response["filename"] + self.result_url = raw_response["result_url"] self.alias = raw_response["alias"] + self.webhooks = raw_response["webhooks"] diff --git a/mindee/parsing/v2/webhook.py b/mindee/parsing/v2/webhook.py index d151f54e..b325a323 100644 --- a/mindee/parsing/v2/webhook.py +++ b/mindee/parsing/v2/webhook.py @@ -1,4 +1,5 @@ from datetime import datetime +from typing import Optional from mindee.parsing.common.string_dict import StringDict from mindee.parsing.v2.error_response import ErrorResponse @@ -9,7 +10,7 @@ class Webhook: id: str """ID of the webhook.""" - error: ErrorResponse + error: Optional[ErrorResponse] """Error response if any.""" created_at: datetime """Date and time the webhook was sent at.""" @@ -18,7 +19,9 @@ class Webhook: def __init__(self, raw_response: StringDict) -> None: self.id = raw_response["id"] - self.error = ErrorResponse(raw_response["error"]) + self.error = ( + ErrorResponse(raw_response["error"]) if raw_response["error"] else None + ) self.created_at = self.parse_date(raw_response["created_at"]) self.status = raw_response["status"] diff --git a/tests/test_client_v2.py b/tests/test_client_v2.py index b06bd772..e7c74f3d 100644 --- a/tests/test_client_v2.py +++ b/tests/test_client_v2.py @@ -1,3 +1,5 @@ +import json + import pytest from mindee import ClientV2, InferencePredictOptions, LocalResponse @@ -5,6 +7,7 @@ from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2 from mindee.input import LocalInputSource, PathInput from mindee.mindee_http.base_settings import USER_AGENT +from mindee.parsing.v2 import Job, PollingResponse from tests.test_inputs import FILE_TYPES_DIR, V2_DATA_DIR from tests.utils import dummy_envvars @@ -17,7 +20,7 @@ def env_client(monkeypatch) -> ClientV2: @pytest.fixture def custom_base_url_client(monkeypatch) -> ClientV2: - class _FakeResp: + class _FakePostResp: status_code = 400 # any non-2xx will do ok = False @@ -25,20 +28,50 @@ def json(self): # Shape must match what handle_error_v2 expects return {"status": -1, "detail": "forced failure from test"} + class _FakeGetResp: + status_code = 200 + ok = True + + def json(self): + return { + "job": { + "id": "12345678-1234-1234-1234-123456789ABC", + "model_id": "87654321-4321-4321-4321-CBA987654321", + "filename": "default_sample.jpg", + "alias": "dummy-alias.jpg", + "created_at": "2025-07-03T14:27:58.974451", + "status": "Processing", + "polling_url": "https://api-v2.mindee.net/v2/jobs/12345678-1234-1234-1234-123456789ABC", + "result_url": None, + "webhooks": [], + "error": None, + } + } + + @property + def content(self) -> bytes: + """ + Raw (bytes) payload, mimicking `requests.Response.content`. + """ + return json.dumps(self.json()).encode("utf-8") + monkeypatch.setenv("MINDEE_V2_BASE_URL", "https://dummy-url") - def _fake_response(*args, **kwargs): - return _FakeResp() + def _fake_post_error(*args, **kwargs): + return _FakePostResp() + + def _fake_get_error(*args, **kwargs): + return _FakeGetResp() monkeypatch.setattr( "mindee.mindee_http.mindee_api_v2.requests.post", - _fake_response, + _fake_post_error, raising=True, ) monkeypatch.setattr( "mindee.mindee_http.mindee_api_v2.requests.get", - _fake_response, + _fake_get_error, raising=True, ) @@ -78,12 +111,6 @@ def test_enqueue_and_parse_path_with_env_token(custom_base_url_client): ) -@pytest.mark.v2 -def test_parse_queued6_and_parse_path_with_env_token(custom_base_url_client): - with pytest.raises(MindeeHTTPErrorV2): - custom_base_url_client.parse_queued("dummy-queue") - - @pytest.mark.v2 def test_loads_from_prediction(env_client): input_inference = LocalResponse( @@ -104,3 +131,24 @@ def test_error_handling(custom_base_url_client): ) assert e.status_code == -1 assert e.detail == "forced failure from test" + + +def test_enqueue(custom_base_url_client): + response = custom_base_url_client.parse_queued( + "12345678-1234-1234-1234-123456789ABC" + ) + assert isinstance(response, PollingResponse) + assert isinstance(response.job, Job) + assert response.job.id == "12345678-1234-1234-1234-123456789ABC" + assert response.job.model_id == "87654321-4321-4321-4321-CBA987654321" + assert response.job.filename == "default_sample.jpg" + assert response.job.alias == "dummy-alias.jpg" + assert str(response.job.created_at) == "2025-07-03 14:27:58.974451" + assert response.job.status == "Processing" + assert ( + response.job.polling_url + == "https://api-v2.mindee.net/v2/jobs/12345678-1234-1234-1234-123456789ABC" + ) + assert not response.job.result_url + assert len(response.job.webhooks) == 0 + assert not response.job.error From c06d20ebc7f26e897292e0d7c180b3dc78637a2f Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 3 Jul 2025 16:48:20 +0200 Subject: [PATCH 09/17] remove support for not-yet existing webhooks --- mindee/parsing/v2/__init__.py | 1 - mindee/parsing/v2/webhook.py | 32 -------------------------------- 2 files changed, 33 deletions(-) delete mode 100644 mindee/parsing/v2/webhook.py diff --git a/mindee/parsing/v2/__init__.py b/mindee/parsing/v2/__init__.py index ff03d053..2c855c52 100644 --- a/mindee/parsing/v2/__init__.py +++ b/mindee/parsing/v2/__init__.py @@ -10,4 +10,3 @@ from mindee.parsing.v2.inference_result import InferenceResult from mindee.parsing.v2.job import Job from mindee.parsing.v2.polling_response import PollingResponse -from mindee.parsing.v2.webhook import Webhook diff --git a/mindee/parsing/v2/webhook.py b/mindee/parsing/v2/webhook.py deleted file mode 100644 index b325a323..00000000 --- a/mindee/parsing/v2/webhook.py +++ /dev/null @@ -1,32 +0,0 @@ -from datetime import datetime -from typing import Optional - -from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.v2.error_response import ErrorResponse - - -class Webhook: - """Webhook information for a V2 polling attempt.""" - - id: str - """ID of the webhook.""" - error: Optional[ErrorResponse] - """Error response if any.""" - created_at: datetime - """Date and time the webhook was sent at.""" - status: str - """Status of the webhook.""" - - def __init__(self, raw_response: StringDict) -> None: - self.id = raw_response["id"] - self.error = ( - ErrorResponse(raw_response["error"]) if raw_response["error"] else None - ) - self.created_at = self.parse_date(raw_response["created_at"]) - self.status = raw_response["status"] - - @staticmethod - def parse_date(date_string: str) -> datetime: - """Shorthand to parse the date.""" - date_string = date_string.replace("Z", "+00:00") - return datetime.fromisoformat(date_string) From 43769274dd1912bfa3782085d135a5ef1c659bd9 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 3 Jul 2025 16:55:23 +0200 Subject: [PATCH 10/17] [TEMP] enable in-place tests --- .github/workflows/_test-code-samples.yml | 2 +- .github/workflows/_test-integrations.yml | 2 ++ .github/workflows/pull-request.yml | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_test-code-samples.yml b/.github/workflows/_test-code-samples.yml index 7bd17624..2b154f34 100644 --- a/.github/workflows/_test-code-samples.yml +++ b/.github/workflows/_test-code-samples.yml @@ -40,7 +40,7 @@ jobs: - name: Tests code samples run: | - ./tests/test_code_samples.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }} + ./tests/test_code_samples.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }} ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} - name: Notify Slack Action on Failure uses: ravsamhq/notify-slack-action@2.3.0 diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integrations.yml index b3911207..e0912f16 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integrations.yml @@ -47,6 +47,8 @@ jobs: env: MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }} + MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} + MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} run: | pytest -m integration diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index e587933f..bb68f179 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -15,10 +15,10 @@ jobs: needs: test-units secrets: inherit test-integrations: - uses: mindee/mindee-api-python/.github/workflows/_test-integrations.yml@main + uses: mindee/mindee-api-python/.github/workflows/_test-integrations.yml@client-v2 needs: test-units secrets: inherit test-code-samples: - uses: mindee/mindee-api-python/.github/workflows/_test-code-samples.yml@main + uses: mindee/mindee-api-python/.github/workflows/_test-code-samples.yml@client-v2 needs: test-units secrets: inherit From c6e9bb589f1ab51a12c818c73ee0c45d0a1f054a Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Fri, 4 Jul 2025 10:04:45 +0200 Subject: [PATCH 11/17] update PR trigger --- .github/workflows/pull-request.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index bb68f179..58ee2a12 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -5,20 +5,20 @@ on: jobs: static-analysis: - uses: mindee/mindee-api-python/.github/workflows/_static-analysis.yml@main + uses: mindee/mindee-api-python/.github/workflows/_static-analysis.yml test-units: - uses: mindee/mindee-api-python/.github/workflows/_test-units.yml@main + uses: mindee/mindee-api-python/.github/workflows/_test-units.yml needs: static-analysis secrets: inherit test-regressions: - uses: mindee/mindee-api-python/.github/workflows/_test-regressions.yml@main + uses: mindee/mindee-api-python/.github/workflows/_test-regressions.yml needs: test-units secrets: inherit test-integrations: - uses: mindee/mindee-api-python/.github/workflows/_test-integrations.yml@client-v2 + uses: mindee/mindee-api-python/.github/workflows/_test-integrations.yml needs: test-units secrets: inherit test-code-samples: - uses: mindee/mindee-api-python/.github/workflows/_test-code-samples.yml@client-v2 + uses: mindee/mindee-api-python/.github/workflows/_test-code-samples.yml needs: test-units secrets: inherit From a46e52323a26607908d9c6a939d98ef1b5b67b7e Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Fri, 4 Jul 2025 10:54:04 +0200 Subject: [PATCH 12/17] move url input sources to where they belong, tweak PR CI --- .github/workflows/pull-request.yml | 10 +++++----- mindee/client.py | 13 +++++++++++++ mindee/client_mixin.py | 13 ------------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 58ee2a12..674c4056 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -5,20 +5,20 @@ on: jobs: static-analysis: - uses: mindee/mindee-api-python/.github/workflows/_static-analysis.yml + uses: .github/workflows/_static-analysis.yml test-units: - uses: mindee/mindee-api-python/.github/workflows/_test-units.yml + uses: .github/workflows/_test-units.yml needs: static-analysis secrets: inherit test-regressions: - uses: mindee/mindee-api-python/.github/workflows/_test-regressions.yml + uses: .github/workflows/_test-regressions.yml needs: test-units secrets: inherit test-integrations: - uses: mindee/mindee-api-python/.github/workflows/_test-integrations.yml + uses: .github/workflows/_test-integrations.yml needs: test-units secrets: inherit test-code-samples: - uses: mindee/mindee-api-python/.github/workflows/_test-code-samples.yml + uses: .github/workflows/_test-code-samples.yml needs: test-units secrets: inherit diff --git a/mindee/client.py b/mindee/client.py index 6b8d3ba1..1acf1aa6 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -562,3 +562,16 @@ def create_endpoint( ) version = "1" return self._build_endpoint(endpoint_name, account_name, version) + + @staticmethod + def source_from_url( + url: str, + ) -> UrlInputSource: + """ + Load a document from a URL. + + :param url: Raw byte input + """ + return UrlInputSource( + url, + ) diff --git a/mindee/client_mixin.py b/mindee/client_mixin.py index 619a4d70..19616c0c 100644 --- a/mindee/client_mixin.py +++ b/mindee/client_mixin.py @@ -72,19 +72,6 @@ def source_from_bytes( input_doc.fix_pdf() return input_doc - @staticmethod - def source_from_url( - url: str, - ) -> UrlInputSource: - """ - Load a document from a URL. - - :param url: Raw byte input - """ - return UrlInputSource( - url, - ) - @staticmethod def _validate_async_params( initial_delay_sec: float, delay_sec: float, max_retries: int From 42c68d171c194642554d76cd8b82fd7d39c0d56f Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Fri, 4 Jul 2025 10:56:15 +0200 Subject: [PATCH 13/17] use proper local gh targetting syntax --- .github/workflows/pull-request.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 674c4056..c7edf455 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -5,20 +5,20 @@ on: jobs: static-analysis: - uses: .github/workflows/_static-analysis.yml + uses: ./.github/workflows/_static-analysis.yml test-units: - uses: .github/workflows/_test-units.yml + uses: ./.github/workflows/_test-units.yml needs: static-analysis secrets: inherit test-regressions: - uses: .github/workflows/_test-regressions.yml + uses: ./.github/workflows/_test-regressions.yml needs: test-units secrets: inherit test-integrations: - uses: .github/workflows/_test-integrations.yml + uses: ./.github/workflows/_test-integrations.yml needs: test-units secrets: inherit test-code-samples: - uses: .github/workflows/_test-code-samples.yml + uses: ./.github/workflows/_test-code-samples.yml needs: test-units secrets: inherit From 4b8b37885cd16c8fb607268ea61b5a3e1ae70b8a Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Fri, 4 Jul 2025 10:57:55 +0200 Subject: [PATCH 14/17] fix unused import --- mindee/client_mixin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindee/client_mixin.py b/mindee/client_mixin.py index 19616c0c..18be1867 100644 --- a/mindee/client_mixin.py +++ b/mindee/client_mixin.py @@ -2,7 +2,7 @@ from typing import BinaryIO, Union from mindee.error import MindeeClientError -from mindee.input import Base64Input, BytesInput, FileInput, PathInput, UrlInputSource +from mindee.input import Base64Input, BytesInput, FileInput, PathInput class ClientMixin: From 8869af1c7d383dd870ad4b6791ffd435f5c6a635 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Fri, 4 Jul 2025 10:58:19 +0200 Subject: [PATCH 15/17] drop support for python 3.7 --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fd6a05e0..009323a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,6 @@ readme = "README.md" license = {file = "LICENSE"} classifiers = [ "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -27,7 +26,7 @@ classifiers = [ "License :: OSI Approved :: MIT License", ] -requires-python = ">=3.7" +requires-python = ">=3.8" dependencies = [ "pypdfium2>=4.0,<4.30.1", From 66cd72082bf77c37b7720efd9e2ebfb807a28c16 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Fri, 4 Jul 2025 11:02:38 +0200 Subject: [PATCH 16/17] bump CI min python versions --- .github/workflows/_test-code-samples.yml | 2 +- .github/workflows/_test-integrations.yml | 2 +- .github/workflows/_test-regressions.yml | 2 +- .github/workflows/_test-units.yml | 1 - 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/_test-code-samples.yml b/.github/workflows/_test-code-samples.yml index 2b154f34..826a05ff 100644 --- a/.github/workflows/_test-code-samples.yml +++ b/.github/workflows/_test-code-samples.yml @@ -12,7 +12,7 @@ jobs: max-parallel: 2 matrix: python-version: - - "3.7" + - "3.8" - "3.12" runs-on: "ubuntu-22.04" steps: diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integrations.yml index e0912f16..af1a875f 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integrations.yml @@ -18,7 +18,7 @@ jobs: - "ubuntu-22.04" - "windows-2022" python-version: - - "3.7" + - "3.8" - "3.12" runs-on: ${{ matrix.os }} steps: diff --git a/.github/workflows/_test-regressions.yml b/.github/workflows/_test-regressions.yml index ba21ea84..e45a0f74 100644 --- a/.github/workflows/_test-regressions.yml +++ b/.github/workflows/_test-regressions.yml @@ -18,7 +18,7 @@ jobs: - "ubuntu-22.04" - "windows-2022" python-version: - - "3.7" + - "3.8" - "3.12" runs-on: ${{ matrix.os }} steps: diff --git a/.github/workflows/_test-units.yml b/.github/workflows/_test-units.yml index 5d625e88..d0915aeb 100644 --- a/.github/workflows/_test-units.yml +++ b/.github/workflows/_test-units.yml @@ -16,7 +16,6 @@ jobs: - "ubuntu-22.04" - "windows-2022" python-version: - - "3.7" - "3.8" - "3.9" - "3.10" From cad473c278f5ad7dd453a09ba12d5b9eec1c766a Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Mon, 7 Jul 2025 10:41:17 +0200 Subject: [PATCH 17/17] :recycle: set permissions on PR workflow --- .github/workflows/pull-request.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index c7edf455..d6d09cb0 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -3,6 +3,10 @@ name: Pull Request on: pull_request: +permissions: + contents: read + pull-requests: read + jobs: static-analysis: uses: ./.github/workflows/_static-analysis.yml