Skip to content

Commit 928dbc1

Browse files
✨ add support for workflows (#274)
1 parent 56a85fc commit 928dbc1

26 files changed

+579
-70
lines changed

.github/workflows/test-code-samples.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,6 @@ jobs:
5252
with:
5353
status: ${{ job.status }}
5454
notify_when: "failure"
55-
notification_title: "{workflow} is failing"
55+
notification_title: "Code sample test '{workflow}' is failing"
5656
env:
5757
SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }}

.github/workflows/test-integration.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,17 @@ jobs:
4949
- name: Run Integration Testing
5050
env:
5151
MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }}
52+
WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }}
5253
run: |
5354
pytest -m integration
55+
56+
57+
- name: Notify Slack Action on Failure
58+
uses: ravsamhq/notify-slack-action@2.3.0
59+
if: ${{ always() && github.ref_name == 'main' }}
60+
with:
61+
status: ${{ job.status }}
62+
notify_when: "failure"
63+
notification_title: "Integration test '{workflow}' is failing"
64+
env:
65+
SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }}

.github/workflows/test-regression.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,6 @@ jobs:
5959
with:
6060
status: ${{ job.status }}
6161
notify_when: "failure"
62-
notification_title: "Regression test workflow {workflow} is failing"
62+
notification_title: "Regression test workflow '{workflow}' is failing"
6363
env:
6464
SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from mindee import Client, WorkflowResponse
2+
from mindee.parsing.common import ExecutionPriority
3+
4+
# Init a new client
5+
mindee_client = Client(api_key: "my-api-key")
6+
7+
workflow_id = "workflow-id"
8+
9+
# Load a file from disk
10+
input_doc = mindee_client.source_from_path("/path/to/the/file.ext")
11+
12+
# Send the file to the workflow.
13+
result: WorkflowResponse = mindee_client.execute_workflow(
14+
input_doc,
15+
workflow_id,
16+
# Optionally, add an alias and a priority to the workflow.
17+
# alias="my-alias",
18+
# priority=ExecutionPriority.LOW
19+
)
20+
21+
# Print the ID of the execution to make sure it worked.
22+
print(result.execution.id)

mindee/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
from mindee.parsing.common.async_predict_response import AsyncPredictResponse, Job
55
from mindee.parsing.common.feedback_response import FeedbackResponse
66
from mindee.parsing.common.predict_response import PredictResponse
7+
from mindee.parsing.common.workflow_response import WorkflowResponse

mindee/client.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from mindee.error.mindee_error import MindeeClientError, MindeeError
66
from mindee.error.mindee_http_error import handle_error
7+
from mindee.input import WorkflowOptions
78
from mindee.input.local_response import LocalResponse
89
from mindee.input.page_options import PageOptions
910
from mindee.input.sources import (
@@ -22,11 +23,15 @@
2223
is_valid_async_response,
2324
is_valid_sync_response,
2425
)
26+
from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint
27+
from mindee.mindee_http.workflow_settings import WorkflowSettings
2528
from mindee.parsing.common.async_predict_response import AsyncPredictResponse
2629
from mindee.parsing.common.feedback_response import FeedbackResponse
2730
from mindee.parsing.common.inference import Inference
2831
from mindee.parsing.common.predict_response import PredictResponse
2932
from mindee.parsing.common.string_dict import StringDict
33+
from mindee.parsing.common.workflow_response import WorkflowResponse
34+
from mindee.product import GeneratedV1
3035

3136
OTS_OWNER = "mindee"
3237

@@ -230,6 +235,41 @@ def parse_queued(
230235

231236
return self._get_queued_document(product_class, endpoint, queue_id)
232237

238+
def execute_workflow(
239+
self,
240+
input_source: Union[LocalInputSource, UrlInputSource],
241+
workflow_id: str,
242+
options: Optional[WorkflowOptions] = None,
243+
page_options: Optional[PageOptions] = None,
244+
) -> WorkflowResponse:
245+
"""
246+
Send the document to a workflow execution.
247+
248+
:param input_source: The document/source file to use.
249+
Has to be created beforehand.
250+
:param workflow_id: ID of the workflow.
251+
:param page_options: If set, remove pages from the document as specified. This is done before sending the file\
252+
to the server. It is useful to avoid page limitations.
253+
:param options: Options for the workflow.
254+
:return:
255+
"""
256+
if isinstance(input_source, LocalInputSource):
257+
if page_options and input_source.is_pdf():
258+
input_source.process_pdf(
259+
page_options.operation,
260+
page_options.on_min_pages,
261+
page_options.page_indexes,
262+
)
263+
264+
logger.debug("Sending document to workflow: %s", workflow_id)
265+
266+
if not options:
267+
options = WorkflowOptions(
268+
alias=None, priority=None, full_text=False, public_url=None
269+
)
270+
271+
return self._send_to_workflow(GeneratedV1, input_source, workflow_id, options)
272+
233273
def _validate_async_params(
234274
self, initial_delay_sec: float, delay_sec: float, max_retries: int
235275
) -> None:
@@ -438,6 +478,44 @@ def _get_queued_document(
438478

439479
return AsyncPredictResponse(product_class, queue_response.json())
440480

481+
def _send_to_workflow(
482+
self,
483+
product_class: Type[Inference],
484+
input_source: Union[LocalInputSource, UrlInputSource],
485+
workflow_id: str,
486+
options: WorkflowOptions,
487+
) -> WorkflowResponse:
488+
"""
489+
Sends a document to a workflow.
490+
491+
:param product_class: The document class to use.
492+
The response object will be instantiated based on this parameter.
493+
494+
:param input_source: The document/source file to use.
495+
Has to be created beforehand.
496+
:param workflow_id: ID of the workflow.
497+
:param options: Optional options for the workflow.
498+
:return:
499+
"""
500+
if input_source is None:
501+
raise MindeeClientError("No input document provided")
502+
503+
workflow_endpoint = WorkflowEndpoint(
504+
WorkflowSettings(api_key=self.api_key, workflow_id=workflow_id)
505+
)
506+
507+
response = workflow_endpoint.workflow_execution_post(input_source, options)
508+
509+
dict_response = response.json()
510+
511+
if not is_valid_async_response(response):
512+
clean_response = clean_request_json(response)
513+
raise handle_error(
514+
str(product_class.endpoint_name),
515+
clean_response,
516+
)
517+
return WorkflowResponse(product_class, dict_response)
518+
441519
def _initialize_ots_endpoint(self, product_class: Type[Inference]) -> Endpoint:
442520
if product_class.__name__ == "CustomV1":
443521
raise MindeeClientError("Missing endpoint specifications for custom build.")

mindee/input/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@
99
PathInput,
1010
UrlInputSource,
1111
)
12+
from mindee.input.workflow_options import WorkflowOptions

mindee/input/workflow_options.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from typing import Optional
2+
3+
from mindee.parsing.common import ExecutionPriority
4+
5+
6+
class WorkflowOptions:
7+
"""Options to pass to a workflow execution."""
8+
9+
alias: Optional[str]
10+
"""Alias for the document."""
11+
priority: Optional[ExecutionPriority]
12+
"""Priority of the document."""
13+
full_text: bool
14+
"""Whether to include the full OCR text response in compatible APIs."""
15+
public_url: Optional[str]
16+
"""A unique, encrypted URL for accessing the document validation interface without requiring authentication."""
17+
18+
def __init__(
19+
self,
20+
alias: Optional[str] = None,
21+
priority: Optional[ExecutionPriority] = None,
22+
full_text: Optional[bool] = False,
23+
public_url: Optional[str] = None,
24+
):
25+
self.alias = alias
26+
self.priority = priority
27+
self.full_text = full_text if full_text else False
28+
self.public_url = public_url

mindee/mindee_http/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@
66
is_valid_async_response,
77
is_valid_sync_response,
88
)
9+
from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint
10+
from mindee.mindee_http.workflow_settings import WorkflowSettings

mindee/mindee_http/base_endpoint.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
from abc import ABC
22

3-
from mindee.mindee_http.mindee_api import MindeeApi
3+
from mindee.mindee_http.base_settings import BaseSettings
44

55

66
class BaseEndpoint(ABC):
77
"""Base endpoint class for the Mindee API."""
88

9-
def __init__(self, settings: MindeeApi) -> None:
9+
def __init__(self, settings: BaseSettings) -> None:
1010
"""
1111
Base API endpoint class for all endpoints.
1212

0 commit comments

Comments
 (0)