Skip to content

Commit 5b0f922

Browse files
committed
♻️ minor improments to main client
1 parent 0b34fc5 commit 5b0f922

File tree

1 file changed

+32
-23
lines changed

1 file changed

+32
-23
lines changed

mindee/client.py

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,8 @@ def get_bound_classname(type_var) -> str:
3232

3333
def _clean_account_name(account_name: str) -> str:
3434
"""
35-
Checks that an account name is provided for custom builds, and sets the default one otherwise.
35+
Checks that an account name is provided for custom products, and sets the default one otherwise.
3636
37-
:param product_class: product class to use for API calls.
3837
:param account_name: name of the account's holder. Only needed for custom products.
3938
"""
4039
if not account_name or len(account_name) < 1:
@@ -80,7 +79,7 @@ def parse(
8079
The response object will be instantiated based on this parameter.
8180
8281
:param input_source: The document/source file to use.
83-
Has to be be created beforehand.
82+
Has to be created beforehand.
8483
8584
:param include_words: Whether to include the full text for each page.
8685
This performs a full OCR operation on the server and will increase response time.
@@ -133,7 +132,7 @@ def enqueue(
133132
The response object will be instantiated based on this parameter.
134133
135134
:param input_source: The document/source file to use.
136-
Has to be be created beforehand.
135+
Has to be created beforehand.
137136
138137
:param include_words: Whether to include the full text for each page.
139138
This performs a full OCR operation on the server and will increase response time.
@@ -166,7 +165,12 @@ def enqueue(
166165
page_options.page_indexes,
167166
)
168167
return self._predict_async(
169-
product_class, input_source, include_words, close_file, cropper, endpoint
168+
product_class,
169+
input_source,
170+
endpoint,
171+
include_words,
172+
close_file,
173+
cropper,
170174
)
171175

172176
def parse_queued(
@@ -191,16 +195,21 @@ def parse_queued(
191195
return self._get_queued_document(product_class, endpoint, queue_id)
192196

193197
def _validate_async_params(
194-
self, initial_delay_sec: float, delay_sec: float
198+
self, initial_delay_sec: float, delay_sec: float, max_retries: int
195199
) -> None:
196-
if delay_sec < 2:
200+
min_delay = 1
201+
min_initial_delay = 2
202+
min_retries = 2
203+
if delay_sec < min_delay:
197204
raise MindeeClientError(
198-
"Cannot set auto-parsing delay to less than 2 seconds."
205+
f"Cannot set auto-parsing delay to less than {min_delay} seconds."
199206
)
200-
if initial_delay_sec < 4:
207+
if initial_delay_sec < min_initial_delay:
201208
raise MindeeClientError(
202-
"Cannot set initial parsing delay to less than 4 seconds."
209+
f"Cannot set initial parsing delay to less than {min_initial_delay} seconds."
203210
)
211+
if max_retries < min_retries:
212+
raise MindeeClientError(f"Cannot set retries to less than {min_retries}.")
204213

205214
def enqueue_and_parse(
206215
self,
@@ -222,7 +231,7 @@ def enqueue_and_parse(
222231
The response object will be instantiated based on this parameter.
223232
224233
:param input_source: The document/source file to use.
225-
Has to be be created beforehand.
234+
Has to be created beforehand.
226235
227236
:param include_words: Whether to include the full text for each page.
228237
This performs a full OCR operation on the server and will increase response time.
@@ -246,9 +255,8 @@ def enqueue_and_parse(
246255
This should not be shorter than 2 seconds.
247256
248257
:param max_retries: Total amount of polling attempts.
249-
250258
"""
251-
self._validate_async_params(initial_delay_sec, delay_sec)
259+
self._validate_async_params(initial_delay_sec, delay_sec, max_retries)
252260
if not endpoint:
253261
endpoint = self._initialize_ots_endpoint(product_class)
254262
queue_result = self.enqueue(
@@ -345,16 +353,12 @@ def _predict_async(
345353
self,
346354
product_class: Type[Inference],
347355
input_source: Union[LocalInputSource, UrlInputSource],
356+
endpoint: Optional[Endpoint] = None,
348357
include_words: bool = False,
349358
close_file: bool = True,
350359
cropper: bool = False,
351-
endpoint: Optional[Endpoint] = None,
352360
) -> AsyncPredictResponse:
353-
"""
354-
Sends a document to the queue, and sends back an asynchronous predict response.
355-
356-
:param doc_config: Configuration of the document.
357-
"""
361+
"""Sends a document to the queue, and sends back an asynchronous predict response."""
358362
if input_source is None:
359363
raise MindeeClientError("No input document provided")
360364
if not endpoint:
@@ -384,7 +388,6 @@ def _get_queued_document(
384388
Fetches a document or a Job from a given queue.
385389
386390
:param queue_id: Queue_id received from the API
387-
:param doc_config: Pre-checked document configuration.
388391
"""
389392
queue_response = endpoint.document_queue_req_get(queue_id=queue_id)
390393

@@ -436,8 +439,6 @@ def create_endpoint(
436439
:param account_name: Your organization's username on the API Builder
437440
:param version: If set, locks the version of the model to use.
438441
If not set, use the latest version of the model.
439-
:param product_class: A document class in which the response will be extracted.
440-
Must inherit from ``mindee.product.base.Document``.
441442
"""
442443
if len(endpoint_name) == 0:
443444
raise MindeeClientError("Custom endpoint require a valid 'endpoint_name'.")
@@ -456,6 +457,8 @@ def source_from_path(
456457
Load a document from an absolute path, as a string.
457458
458459
:param input_path: Path of file to open
460+
:param fix_pdf: Whether to attempt fixing PDF files before sending.
461+
Setting this to `True` can modify the data sent to Mindee.
459462
"""
460463
input_doc = PathInput(input_path)
461464
if fix_pdf:
@@ -469,6 +472,8 @@ def source_from_file(
469472
Load a document from a normal Python file object/handle.
470473
471474
:param input_file: Input file handle
475+
:param fix_pdf: Whether to attempt fixing PDF files before sending.
476+
Setting this to `True` can modify the data sent to Mindee.
472477
"""
473478
input_doc = FileInput(input_file)
474479
if fix_pdf:
@@ -483,6 +488,8 @@ def source_from_b64string(
483488
484489
:param input_string: Input to parse as base64 string
485490
:param filename: The name of the file (without the path)
491+
:param fix_pdf: Whether to attempt fixing PDF files before sending.
492+
Setting this to `True` can modify the data sent to Mindee.
486493
"""
487494
input_doc = Base64Input(input_string, filename)
488495
if fix_pdf:
@@ -497,6 +504,8 @@ def source_from_bytes(
497504
498505
:param input_bytes: Raw byte input
499506
:param filename: The name of the file (without the path)
507+
:param fix_pdf: Whether to attempt fixing PDF files before sending.
508+
Setting this to `True` can modify the data sent to Mindee.
500509
"""
501510
input_doc = BytesInput(input_bytes, filename)
502511
if fix_pdf:
@@ -508,7 +517,7 @@ def source_from_url(
508517
url: str,
509518
) -> UrlInputSource:
510519
"""
511-
Load a document from an URL.
520+
Load a document from a URL.
512521
513522
:param url: Raw byte input
514523
"""

0 commit comments

Comments
 (0)