Skip to content

Commit 6a72a77

Browse files
✨ add support for remote resource fetching (#291)
1 parent 6cb036b commit 6a72a77

File tree

12 files changed

+253
-213
lines changed

12 files changed

+253
-213
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ Complete details on the working of the library are available in the following gu
146146
* [Python US Driver License OCR](https://developers.mindee.com/docs/python-eu-driver-license-ocr)
147147
* [Python FR Bank Account Detail OCR](https://developers.mindee.com/docs/python-fr-bank-account-details-ocr)
148148
* [Python FR Carte Grise OCR](https://developers.mindee.com/docs/python-fr-carte-grise-ocr)
149-
* [Python FR Carte Vitale OCR](https://developers.mindee.com/docs/python-fr-carte-vitale-ocr)
149+
* [Python FR Health Card OCR](https://developers.mindee.com/docs/python-fr-health-card-ocr)
150150
* [Python FR ID Card OCR](https://developers.mindee.com/docs/python-fr-carte-nationale-didentite-ocr)
151151
* [Python FR Petrol Receipts OCR](https://developers.mindee.com/docs/python-fr-petrol-receipts-ocr)
152152
* [Python US Bank Check OCR](https://developers.mindee.com/docs/python-us-bank-check-ocr)

docs/extras/code_samples/carte_vitale_v1.txt

Lines changed: 0 additions & 17 deletions
This file was deleted.

docs/product/fr/carte_vitale_v1.rst

Lines changed: 0 additions & 15 deletions
This file was deleted.

mindee/input/sources/url_input_source.py

Lines changed: 184 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
1+
import os
2+
import random
3+
import string
4+
from datetime import datetime
5+
from pathlib import Path
6+
from typing import Optional, Union
7+
from urllib.parse import urlparse
8+
9+
import requests
10+
111
from mindee.error.mindee_error import MindeeSourceError
12+
from mindee.input.sources.bytes_input import BytesInput
213
from mindee.input.sources.local_input_source import InputType
314
from mindee.logger import logger
415

@@ -13,7 +24,7 @@ def __init__(self, url: str) -> None:
1324
"""
1425
Input document from a base64 encoded string.
1526
16-
:param url: URL to send, must be HTTPS
27+
:param url: URL to send, must be HTTPS.
1728
"""
1829
if not url.lower().startswith("https"):
1930
raise MindeeSourceError("URL must be HTTPS")
@@ -23,3 +34,175 @@ def __init__(self, url: str) -> None:
2334
logger.debug("URL input: %s", url)
2435

2536
self.url = url
37+
38+
def __fetch_file_content(
39+
self,
40+
username: Optional[str] = None,
41+
password: Optional[str] = None,
42+
token: Optional[str] = None,
43+
headers: Optional[dict] = None,
44+
max_redirects: int = 3,
45+
) -> bytes:
46+
"""
47+
Fetch the content of the file from the URL.
48+
49+
:param username: Optional username for authentication.
50+
:param password: Optional password for authentication.
51+
:param token: Optional token for authentication.
52+
:param headers: Optional additional headers for the request.
53+
:param max_redirects: Maximum number of redirects to follow.
54+
:return: The content of the file as bytes.
55+
"""
56+
if not headers:
57+
headers = {}
58+
if token:
59+
headers["Authorization"] = f"Bearer {token}"
60+
auth = None if not username or not password else (username, password)
61+
62+
response = UrlInputSource.__make_request(
63+
self.url, auth, headers, 0, max_redirects=max_redirects
64+
)
65+
66+
return response
67+
68+
def save_to_file(
69+
self,
70+
filepath: Union[Path, str],
71+
filename: Optional[str] = None,
72+
username: Optional[str] = None,
73+
password: Optional[str] = None,
74+
token: Optional[str] = None,
75+
headers: Optional[dict] = None,
76+
max_redirects: int = 3,
77+
) -> Path:
78+
"""
79+
Save the content of the URL to a file.
80+
81+
:param filepath: Path to save the content to.
82+
:param filename: Optional filename to give to the file.
83+
:param username: Optional username for authentication.
84+
:param password: Optional password for authentication.
85+
:param token: Optional token for authentication.
86+
:param headers: Optional additional headers for the request.
87+
:param max_redirects: Maximum number of redirects to follow.
88+
:return: The path to the saved file.
89+
"""
90+
response = self.__fetch_file_content(
91+
username, password, token, headers, max_redirects
92+
)
93+
filename = self.__fill_filename(filename)
94+
full_path = Path(filepath) / filename
95+
with open(full_path, "wb") as binary_file:
96+
binary_file.write(response)
97+
return full_path
98+
99+
def as_local_input_source(
100+
self,
101+
filename: Optional[str] = None,
102+
username: Optional[str] = None,
103+
password: Optional[str] = None,
104+
token: Optional[str] = None,
105+
headers: Optional[dict] = None,
106+
max_redirects: int = 3,
107+
) -> BytesInput:
108+
"""
109+
Convert the URL content to a BytesInput object.
110+
111+
:param filename: Optional filename for the BytesInput.
112+
:param username: Optional username for authentication.
113+
:param password: Optional password for authentication.
114+
:param token: Optional token for authentication.
115+
:param headers: Optional additional headers for the request.
116+
:param max_redirects: Maximum number of redirects to follow.
117+
:return: A BytesInput object containing the file content.
118+
"""
119+
response = self.__fetch_file_content(
120+
username, password, token, headers, max_redirects
121+
)
122+
filename = self.__fill_filename(filename)
123+
124+
return BytesInput(response, filename)
125+
126+
@staticmethod
127+
def __extract_filename_from_url(uri) -> str:
128+
"""
129+
Extract the filename from a given URL.
130+
131+
:param uri: The URL to extract the filename from.
132+
:return: The extracted filename or an empty string if not found.
133+
"""
134+
filename = os.path.basename(urlparse(uri).path)
135+
return filename if filename else ""
136+
137+
@staticmethod
138+
def __generate_file_name(extension=".tmp") -> str:
139+
"""
140+
Generate a unique filename with a timestamp and random string.
141+
142+
:param extension: The file extension to use (default is '.tmp').
143+
:return: A generated filename.
144+
"""
145+
random_string = "".join(
146+
random.choices(string.ascii_lowercase + string.digits, k=8)
147+
)
148+
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
149+
return f"mindee_temp_{timestamp}_{random_string}{extension}"
150+
151+
@staticmethod
152+
def __get_file_extension(filename) -> Optional[str]:
153+
"""
154+
Get the extension from a filename.
155+
156+
:param filename: The filename to extract the extension from.
157+
:return: The lowercase file extension or None if not found.
158+
"""
159+
ext = os.path.splitext(filename)[1]
160+
return ext.lower() if ext else None
161+
162+
def __fill_filename(self, filename=None) -> str:
163+
"""
164+
Fill in a filename if not provided or incomplete.
165+
166+
:param filename: Optional filename to use.
167+
:return: A complete filename.
168+
"""
169+
if filename is None:
170+
filename = UrlInputSource.__extract_filename_from_url(self.url)
171+
172+
if not filename or not os.path.splitext(filename)[1]:
173+
filename = self.__generate_file_name(
174+
extension=UrlInputSource.__get_file_extension(filename)
175+
)
176+
177+
return filename
178+
179+
@staticmethod
180+
def __make_request(url, auth, headers, redirects, max_redirects) -> bytes:
181+
"""
182+
Makes an HTTP request to the given URL, while following redirections.
183+
184+
:param url: The URL to request.
185+
:param auth: Authentication tuple (username, password).
186+
:param headers: Headers for the request.
187+
:param redirects: Current number of redirects.
188+
:param max_redirects: Maximum number of redirects to follow.
189+
:return: The content of the response.
190+
:raises MindeeSourceError: If max redirects are exceeded or the request fails.
191+
"""
192+
result = requests.get(url, headers=headers, timeout=120, auth=auth)
193+
if 299 < result.status_code < 400:
194+
if redirects == max_redirects:
195+
raise MindeeSourceError(
196+
f"Can't reach URL after {redirects} out of {max_redirects} redirects, "
197+
f"aborting operation."
198+
)
199+
return UrlInputSource.__make_request(
200+
redirects.location, auth, headers, redirects + 1, max_redirects
201+
)
202+
203+
if result.status_code >= 400 or result.status_code < 200:
204+
raise MindeeSourceError(
205+
f"Couldn't retrieve file from server, error code {result.status_code}."
206+
)
207+
208+
return result.content

mindee/product/fr/__init__.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@
1515
)
1616
from mindee.product.fr.carte_grise.carte_grise_v1 import CarteGriseV1
1717
from mindee.product.fr.carte_grise.carte_grise_v1_document import CarteGriseV1Document
18-
from mindee.product.fr.carte_vitale.carte_vitale_v1 import CarteVitaleV1
19-
from mindee.product.fr.carte_vitale.carte_vitale_v1_document import (
20-
CarteVitaleV1Document,
21-
)
2218
from mindee.product.fr.energy_bill.energy_bill_v1 import EnergyBillV1
2319
from mindee.product.fr.energy_bill.energy_bill_v1_document import EnergyBillV1Document
2420
from mindee.product.fr.energy_bill.energy_bill_v1_energy_consumer import (

mindee/product/fr/carte_vitale/__init__.py

Lines changed: 0 additions & 4 deletions
This file was deleted.

mindee/product/fr/carte_vitale/carte_vitale_v1.py

Lines changed: 0 additions & 39 deletions
This file was deleted.

mindee/product/fr/carte_vitale/carte_vitale_v1_document.py

Lines changed: 0 additions & 59 deletions
This file was deleted.

0 commit comments

Comments
 (0)