Skip to content

Commit 0ef7338

Browse files
✨ add support for generated APIs (#210)
1 parent 73acfb2 commit 0ef7338

26 files changed

+1569
-89
lines changed

docs/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
# Add any paths that contain custom static files (such as style sheets) here,
5555
# relative to this directory. They are copied after the builtin static files,
5656
# so a file named "default.css" will overwrite the builtin "default.css".
57-
html_static_path = ["_static"]
57+
html_static_path = []
5858

5959
# A list of paths that contain extra files not directly related to the documentation.
6060
html_extra_path = ["extras"]

docs/extras/code_samples/default.txt

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,29 @@
1-
import json
2-
import requests
1+
from mindee import Client, PredictResponse, product
32

4-
api_key = "my-api-key"
5-
account = "my-account"
6-
endpoint = "my-endpoint"
7-
version = "my-version"
3+
# Init a new client
4+
mindee_client = Client(api_key="my-api-key")
85

9-
url = f"https://api.mindee.net/v1/products/{account}/{endpoint}/v{version}/predict"
6+
# Add the corresponding endpoint (document). Set the account_name to "mindee" if you are using OTS.
7+
my_endpoint = mindee_client.create_endpoint(
8+
account_name="my-account",
9+
endpoint_name="my-endpoint",
10+
version="my-version"
11+
)
1012

11-
with open("/path/to/the/file.ext", "rb") as file_handle:
12-
files = {"document": file_handle}
13-
headers = {"Authorization": f"Token {api_key}"}
14-
response = requests.post(url, files=files, headers=headers)
13+
# Load a file from disk
14+
input_doc = mindee_client.source_from_path("/path/to/the/file.ext")
1515

16-
json_response = response.json()
16+
# Parse the file.
17+
# The endpoint must be specified since it cannot be determined from the class.
18+
result: PredictResponse = mindee_client.parse(
19+
product.GeneratedV1,
20+
input_doc,
21+
endpoint=my_endpoint
22+
)
1723

18-
if not response.ok:
19-
raise RuntimeError(json_response["api_request"]["error"])
24+
# Print a brief summary of the parsed data
25+
print(result.document)
2026

21-
print(json.dumps(json_response["document"], indent=2))
27+
# # Iterate over all the fields in the document
28+
# for field_name, field_values in result.document.inference.prediction.fields.items():
29+
# print(field_name, "=", field_values)
Lines changed: 29 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,29 @@
1-
import json
2-
import requests
3-
from time import sleep
4-
5-
api_key = "my-api-key"
6-
account = "my-account"
7-
endpoint = "my-endpoint"
8-
version = "my-version"
9-
10-
url_enqueue = f"https://api.mindee.net/v1/products/{account}/{endpoint}/v{version}/predict_async"
11-
headers = {"Authorization": f"Token {api_key}"}
12-
13-
with open("/path/to/the/file.ext", "rb") as file_handle:
14-
files = {"document": file_handle}
15-
response_enqueue = requests.post(url_enqueue, files=files, headers=headers)
16-
17-
json_response_enqueue = response_enqueue.json()
18-
19-
if not response_enqueue.ok:
20-
raise RuntimeError(json_response_enqueue["api_request"]["error"])
21-
22-
job_id = json_response_enqueue["job"]["id"]
23-
24-
url_parse_queued = f"https://api.mindee.net/v1/products/{account}/{endpoint}/v{version}/documents/queue/{job_id}"
25-
sleep(4)
26-
tries = 0
27-
job_status = ""
28-
while tries < 30:
29-
response_parse = requests.get(url_parse_queued, headers=headers)
30-
if not response_parse.ok:
31-
raise RuntimeError(json_response_enqueue["api_request"]["error"])
32-
json_response_parse = response_parse.json()
33-
job_status = json_response_parse["job"]["status"]
34-
if job_status == "completed":
35-
break
36-
else:
37-
print(json_response_parse["job"])
38-
39-
tries += 1
40-
sleep(2)
41-
42-
if job_status != "completed":
43-
raise RuntimeError(f"Async parsing timed out after {tries} tries")
44-
45-
print(json.dumps(json_response_parse["document"], indent=2))
1+
from mindee import Client, PredictResponse, product
2+
3+
# Init a new client
4+
mindee_client = Client(api_key="my-api-key")
5+
6+
# Add the corresponding endpoint (document). Set the account_name to "mindee" if you are using OTS.
7+
my_endpoint = mindee_client.create_endpoint(
8+
account_name="my-account",
9+
endpoint_name="my-endpoint",
10+
version="my-version"
11+
)
12+
13+
# Load a file from disk
14+
input_doc = mindee_client.source_from_path("/path/to/the/file.ext")
15+
16+
# Parse the file.
17+
# The endpoint must be specified since it cannot be determined from the class.
18+
result: PredictResponse = mindee_client.enqueue_and_parse(
19+
product.GeneratedV1,
20+
input_doc,
21+
endpoint=my_endpoint
22+
)
23+
24+
# Print a brief summary of the parsed data
25+
print(result.document)
26+
27+
# # Iterate over all the fields in the document
28+
# for field_name, field_values in result.document.inference.prediction.fields.items():
29+
# print(field_name, "=", field_values)

docs/extras/guide/custom_v1.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ Values of `ListField`s are stored in a `ListFieldValue` structure, which is impl
7070
* **confidence** (`float`): the confidence score of the prediction
7171
* **bounding_box** (`BBox`): 4 relative vertices corrdinates of a rectangle containing the word in the document.
7272
* **polygon** (`Polygon`): vertices of a polygon containing the word.
73-
* **page_id** (`int`): the ID of the page, is `undefined` when at document-level.
73+
* **page_id** (`int`): the ID of the page, is `None` when at document-level.
7474

7575

7676
### Classification Field

docs/extras/guide/generated_v1.md

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
---
2+
title: Generated API Python
3+
---
4+
5+
The Python OCR SDK supports generated APIs.
6+
Generated APIs can theoretically support all APIs in a catch-all generic format.
7+
8+
# Quick-Start
9+
10+
```python
11+
from mindee import Client, product
12+
13+
# Init a new client
14+
mindee_client = Client(api_key="my-api-key")
15+
16+
# Add your custom endpoint (document)
17+
my_endpoint = mindee_client.create_endpoint(
18+
account_name="my-account",
19+
endpoint_name="my-endpoint",
20+
version="my-version" # Note: version should be always provided when using for OTS products
21+
)
22+
23+
# Load a file from disk
24+
input_doc = mindee_client.source_from_path("/path/to/the/file.ext")
25+
26+
# Parse the file.
27+
# The endpoint must be specified since it cannot be determined from the class.
28+
result = mindee_client.parse(
29+
product.GeneratedV1,
30+
input_doc,
31+
endpoint=my_endpoint
32+
)
33+
34+
# Print a brief summary of the parsed data
35+
print(result.document)
36+
37+
# Iterate over all the fields in the document
38+
for field_name, field_values in result.document.fields.items():
39+
print(field_name, "=", field_values)
40+
```
41+
42+
# Generated Endpoints
43+
44+
You may have noticed in the previous step that in order to access a custom build, you will need to provide an account and an endpoint name at the very least.
45+
46+
Although it is optional, the version number should match the latest version of your build in most use-cases.
47+
If it is not set, it will default to "1".
48+
49+
# Field Types
50+
51+
## Generated Fields
52+
53+
### Generated List Field
54+
55+
A `GeneratedListField` is a special type of custom list that implements the following:
56+
57+
- **values** (`List[Union[StringField`[GeneratedObjectField](#Generated-object-field)`]]`): the confidence score of the field prediction.
58+
- **page_id** (`int`): only available for some documents ATM.
59+
60+
Since the inner contents can vary, the value isn't accessed through a property, but rather through the following functions:
61+
62+
- **contents_list()** (`-> List[Union[str, float]]`): returns a list of values for each element.
63+
- **contents_string(separator=" ")** (`-> str`): returns a list of concatenated values, with an optional **separator** `str` between them.
64+
- ****str**()**: returns a string representation of all values, appropriate spacing.
65+
66+
#### Generated Object Field
67+
68+
Unrecognized structures and sometimes values of `ListField`s are stored in a `GeneratedObjectField` structure, which is implemented dynamically depending on the object's structure.
69+
70+
No matter what, the fields will be stored in a dictionary-like structure with a `key: value` pair where `key` is a string and `value` is a nullable string. The object also contains:
71+
72+
- **page_id** (`Optional[int]`): the ID of the page, is `None` when at document-level.
73+
74+
# Attributes
75+
76+
Generated builds always have access to at least two attributes:
77+
78+
## Fields
79+
80+
**fields** (`Dict[str`: `List[Union[`[GeneratedListField](#generated-list-field)[GeneratedObjectField](#generated-object-field), `StringField]]`):
81+
82+
```python
83+
print(str(result.document.inference.prediction.fields["my-field"]))
84+
```
85+
86+
# Questions?
87+
88+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)

docs/parsing/generated.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
----------------
2+
Generated Fields
3+
----------------
4+
5+
Generated Lists
6+
===============
7+
.. autoclass:: mindee.parsing.generated.generated_list
8+
:members:
9+
10+
11+
Generated Objects
12+
=================
13+
.. autoclass:: mindee.parsing.generated.generated_object
14+
:members:
15+
16+
.. autofunction:: mindee.parsing.generated.is_generated_object

docs/parsing/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ Parsing Utilities
77
./standard
88
./common
99
./custom
10+
./generated
1011

docs/product/custom_v1.rst

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
Custom V1
2+
---------
3+
4+
**Sample Code:**
5+
6+
.. literalinclude:: /extras/code_samples/custom_v1.txt
7+
:language: Python
8+
9+
.. autoclass:: mindee.product.custom.custom_v1.CustomV1
10+
:members:
11+
:inherited-members:
12+
13+
.. autoclass:: mindee.product.custom.custom_v1.CustomV1Document
14+
:members:
15+
:inherited-members:
16+
17+
.. autoclass:: mindee.product.custom.custom_v1.CustomV1Page
18+
:members:
19+
:inherited-members:

docs/product/generated_v1.rst

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
Generated V1
2+
------------
3+
4+
5+
**Sample Code (sync):**
6+
7+
.. literalinclude:: /extras/code_samples/default.txt
8+
:language: Python
9+
10+
**Sample Code (async):**
11+
12+
.. literalinclude:: /extras/code_samples/default_async.txt
13+
:language: Python
14+
15+
.. autoclass:: mindee.product.generated.generated_v1.GeneratedV1
16+
:members:
17+
:inherited-members:
18+
19+
.. autoclass:: mindee.product.generated.generated_v1.GeneratedV1Document
20+
:members:
21+
:inherited-members:
22+
23+
.. autoclass:: mindee.product.generated.generated_v1.GeneratedV1Page
24+
:members:
25+
:inherited-members:

docs/product/international_id_v1.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ International ID V1
33

44
**Sample Code:**
55

6-
.. literalinclude:: /extras/code_samples/international_id_v1.txt
6+
.. literalinclude:: /extras/code_samples/international_id_v1_async.txt
77
:language: Python
88

99
.. autoclass:: mindee.product.international_id.international_id_v1.InternationalIdV1

0 commit comments

Comments
 (0)