Skip to content

Commit 90bdd2c

Browse files
♻️ split up sources module to accomodate for future additions (#288)
1 parent 0c4c9e6 commit 90bdd2c

File tree

8 files changed

+120
-101
lines changed

8 files changed

+120
-101
lines changed

mindee/input/__init__.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
from mindee.input.local_response import LocalResponse
22
from mindee.input.page_options import PageOptions
3-
from mindee.input.sources import (
4-
Base64Input,
5-
BytesInput,
6-
FileInput,
7-
InputType,
8-
LocalInputSource,
9-
PathInput,
10-
UrlInputSource,
11-
)
3+
from mindee.input.sources.base_64_input import Base64Input
4+
from mindee.input.sources.bytes_input import BytesInput
5+
from mindee.input.sources.file_input import FileInput
6+
from mindee.input.sources.local_input_source import InputType, LocalInputSource
7+
from mindee.input.sources.path_input import PathInput
8+
from mindee.input.sources.url_input_source import UrlInputSource
129
from mindee.input.workflow_options import WorkflowOptions

mindee/input/sources/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from mindee.input.sources.base_64_input import Base64Input
2+
from mindee.input.sources.bytes_input import BytesInput
3+
from mindee.input.sources.file_input import FileInput
4+
from mindee.input.sources.local_input_source import InputType, LocalInputSource
5+
from mindee.input.sources.path_input import PathInput
6+
from mindee.input.sources.url_input_source import UrlInputSource

mindee/input/sources/base_64_input.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import base64
2+
import io
3+
4+
from mindee.input.sources.local_input_source import InputType, LocalInputSource
5+
6+
7+
class Base64Input(LocalInputSource):
8+
"""Base64-encoded text input."""
9+
10+
def __init__(self, base64_string: str, filename: str) -> None:
11+
"""
12+
Input document from a base64 encoded string.
13+
14+
:param base64_string: Raw data as a base64 encoded string
15+
:param filename: File name of the input
16+
"""
17+
self.file_object = io.BytesIO(base64.standard_b64decode(base64_string))
18+
self.filename = filename
19+
self.filepath = None
20+
super().__init__(input_type=InputType.BASE64)

mindee/input/sources/bytes_input.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import io
2+
3+
from mindee.input.sources.local_input_source import InputType, LocalInputSource
4+
5+
6+
class BytesInput(LocalInputSource):
7+
"""Raw bytes input."""
8+
9+
def __init__(self, raw_bytes: bytes, filename: str) -> None:
10+
"""
11+
Input document from raw bytes (no buffer).
12+
13+
:param raw_bytes: Raw data as bytes
14+
:param filename: File name of the input
15+
"""
16+
self.file_object = io.BytesIO(raw_bytes)
17+
self.filename = filename
18+
self.filepath = None
19+
super().__init__(input_type=InputType.BYTES)

mindee/input/sources/file_input.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import os
2+
from typing import BinaryIO
3+
4+
from mindee.input.sources.local_input_source import InputType, LocalInputSource
5+
6+
7+
class FileInput(LocalInputSource):
8+
"""A binary file input."""
9+
10+
def __init__(self, file: BinaryIO) -> None:
11+
"""
12+
Input document from a Python binary file object.
13+
14+
Note: the calling function is responsible for closing the file.
15+
16+
:param file: FileIO object
17+
"""
18+
assert file.name, "File name must be set"
19+
20+
self.file_object = file
21+
self.filename = os.path.basename(file.name)
22+
self.filepath = file.name
23+
super().__init__(input_type=InputType.FILE)

mindee/input/sources.py renamed to mindee/input/sources/local_input_source.py

Lines changed: 1 addition & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
1-
import base64
21
import io
32
import mimetypes
4-
import os
53
import tempfile
64
from enum import Enum
7-
from pathlib import Path
8-
from typing import BinaryIO, Optional, Sequence, Tuple, Union
5+
from typing import BinaryIO, Optional, Sequence, Tuple
96

107
import pypdfium2 as pdfium
118

@@ -205,91 +202,3 @@ def read_contents(self, close_file: bool) -> Tuple[str, bytes]:
205202
def close(self) -> None:
206203
"""Close the file object."""
207204
self.file_object.close()
208-
209-
210-
class FileInput(LocalInputSource):
211-
"""A binary file input."""
212-
213-
def __init__(self, file: BinaryIO) -> None:
214-
"""
215-
Input document from a Python binary file object.
216-
217-
Note: the calling function is responsible for closing the file.
218-
219-
:param file: FileIO object
220-
"""
221-
assert file.name, "File name must be set"
222-
223-
self.file_object = file
224-
self.filename = os.path.basename(file.name)
225-
self.filepath = file.name
226-
super().__init__(input_type=InputType.FILE)
227-
228-
229-
class PathInput(LocalInputSource):
230-
"""A local path input."""
231-
232-
def __init__(self, filepath: Union[Path, str]) -> None:
233-
"""
234-
Input document from a path.
235-
236-
:param filepath: Path to open
237-
"""
238-
self.file_object = open(filepath, "rb") # pylint: disable=consider-using-with
239-
self.filename = os.path.basename(filepath)
240-
self.filepath = str(filepath)
241-
super().__init__(input_type=InputType.PATH)
242-
243-
244-
class BytesInput(LocalInputSource):
245-
"""Raw bytes input."""
246-
247-
def __init__(self, raw_bytes: bytes, filename: str) -> None:
248-
"""
249-
Input document from raw bytes (no buffer).
250-
251-
:param raw_bytes: Raw data as bytes
252-
:param filename: File name of the input
253-
"""
254-
self.file_object = io.BytesIO(raw_bytes)
255-
self.filename = filename
256-
self.filepath = None
257-
super().__init__(input_type=InputType.BYTES)
258-
259-
260-
class Base64Input(LocalInputSource):
261-
"""Base64-encoded text input."""
262-
263-
def __init__(self, base64_string: str, filename: str) -> None:
264-
"""
265-
Input document from a base64 encoded string.
266-
267-
:param base64_string: Raw data as a base64 encoded string
268-
:param filename: File name of the input
269-
"""
270-
self.file_object = io.BytesIO(base64.standard_b64decode(base64_string))
271-
self.filename = filename
272-
self.filepath = None
273-
super().__init__(input_type=InputType.BASE64)
274-
275-
276-
class UrlInputSource:
277-
"""A local or distant URL input."""
278-
279-
url: str
280-
"""The Uniform Resource Locator."""
281-
282-
def __init__(self, url: str) -> None:
283-
"""
284-
Input document from a base64 encoded string.
285-
286-
:param url: URL to send, must be HTTPS
287-
"""
288-
if not url.lower().startswith("https"):
289-
raise MindeeSourceError("URL must be HTTPS")
290-
291-
self.input_type = InputType.URL
292-
293-
logger.debug("URL input: %s", url)
294-
295-
self.url = url

mindee/input/sources/path_input.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import os
2+
from pathlib import Path
3+
from typing import Union
4+
5+
from mindee.input.sources.local_input_source import InputType, LocalInputSource
6+
7+
8+
class PathInput(LocalInputSource):
9+
"""A local path input."""
10+
11+
def __init__(self, filepath: Union[Path, str]) -> None:
12+
"""
13+
Input document from a path.
14+
15+
:param filepath: Path to open
16+
"""
17+
self.file_object = open(filepath, "rb") # pylint: disable=consider-using-with
18+
self.filename = os.path.basename(filepath)
19+
self.filepath = str(filepath)
20+
super().__init__(input_type=InputType.PATH)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from mindee.error.mindee_error import MindeeSourceError
2+
from mindee.input.sources.local_input_source import InputType
3+
from mindee.logger import logger
4+
5+
6+
class UrlInputSource:
7+
"""A local or distant URL input."""
8+
9+
url: str
10+
"""The Uniform Resource Locator."""
11+
12+
def __init__(self, url: str) -> None:
13+
"""
14+
Input document from a base64 encoded string.
15+
16+
:param url: URL to send, must be HTTPS
17+
"""
18+
if not url.lower().startswith("https"):
19+
raise MindeeSourceError("URL must be HTTPS")
20+
21+
self.input_type = InputType.URL
22+
23+
logger.debug("URL input: %s", url)
24+
25+
self.url = url

0 commit comments

Comments
 (0)