Skip to content

Commit ff2f62c

Browse files
temp save, completely untested code
1 parent 349502c commit ff2f62c

File tree

12 files changed

+550
-26
lines changed

12 files changed

+550
-26
lines changed

mindee/error/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,5 @@
77
MindeeHTTPServerError,
88
handle_error,
99
)
10+
from mindee.error.mindee_image_error import MindeeImageError
11+
from mindee.error.mindee_pdf_error import MindeePDFError

mindee/error/mindee_image_error.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
class MindeeImageError(RuntimeError):
2+
"""An exception relating to errors during image operations."""

mindee/error/mindee_pdf_error.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
class MindeePDFError(RuntimeError):
2+
"""An exception relating to errors during PDF operations."""

mindee/extraction/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from mindee.extraction.common.extracted_image import ExtractedImage
22
from mindee.extraction.common.image_extractor import (
3-
attach_image_as_new_file,
3+
attach_images_as_new_file,
44
extract_multiple_images_from_source,
55
)
66
from mindee.extraction.multi_receipts_extractor import multi_receipts_extractor

mindee/extraction/common/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from mindee.extraction.common.extracted_image import ExtractedImage
22
from mindee.extraction.common.image_extractor import (
3-
attach_image_as_new_file,
3+
attach_images_as_new_file,
44
extract_multiple_images_from_source,
55
)

mindee/extraction/common/image_extractor.py

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,35 +11,34 @@
1111
from mindee.input.sources import BytesInput, LocalInputSource
1212

1313

14-
def attach_image_as_new_file( # type: ignore
15-
input_buffer: BinaryIO,
14+
def attach_images_as_new_file( # type: ignore
15+
input_buffer_list: List[BinaryIO],
1616
) -> pdfium.PdfDocument:
1717
"""
18-
Attaches an image as a new page in a PdfDocument object.
18+
Attaches a list of images as new pages in a PdfDocument object.
1919
20-
:param input_buffer: Input buffer.
20+
:param input_buffer_list: List of images, represented as buffers.
2121
:return: A PdfDocument handle.
2222
"""
23-
# Create a new page in the PdfDocument
24-
input_buffer.seek(0)
25-
image = Image.open(input_buffer)
26-
image.convert("RGB")
27-
image_buffer = io.BytesIO()
28-
image.save(image_buffer, format="JPEG")
29-
3023
pdf = pdfium.PdfDocument.new()
31-
32-
image_pdf = pdfium.PdfImage.new(pdf)
33-
image_pdf.load_jpeg(image_buffer)
34-
width, height = image_pdf.get_size()
35-
36-
matrix = pdfium.PdfMatrix().scale(width, height)
37-
image_pdf.set_matrix(matrix)
38-
39-
page = pdf.new_page(width, height)
40-
page.insert_obj(image_pdf)
41-
page.gen_content()
42-
image.close()
24+
for input_buffer in input_buffer_list:
25+
input_buffer.seek(0)
26+
image = Image.open(input_buffer)
27+
image.convert("RGB")
28+
image_buffer = io.BytesIO()
29+
image.save(image_buffer, format="JPEG")
30+
31+
image_pdf = pdfium.PdfImage.new(pdf)
32+
image_pdf.load_jpeg(image_buffer)
33+
width, height = image_pdf.get_size()
34+
35+
matrix = pdfium.PdfMatrix().scale(width, height)
36+
image_pdf.set_matrix(matrix)
37+
38+
page = pdf.new_page(width, height)
39+
page.insert_obj(image_pdf)
40+
page.gen_content()
41+
image.close()
4342
return pdf
4443

4544

@@ -160,4 +159,4 @@ def load_pdf_doc(input_file: LocalInputSource) -> pdfium.PdfDocument: # type: i
160159
input_file.file_object.seek(0)
161160
return pdfium.PdfDocument(input_file.file_object)
162161

163-
return attach_image_as_new_file(input_file.file_object)
162+
return attach_images_as_new_file([input_file.file_object])

mindee/image_operations/__init__.py

Whitespace-only changes.
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import io
2+
from typing import Union
3+
4+
from PIL import Image
5+
6+
7+
def compress_image(
8+
image_buffer: bytes,
9+
quality: int = 85,
10+
max_width: Union[int, float, None] = None,
11+
max_height: Union[int, float, None] = None,
12+
) -> bytes:
13+
"""
14+
Compresses an image with the given parameters.
15+
16+
:param image_buffer: Buffer representation of an image.
17+
:param quality: Quality to apply to the image (JPEG compression).
18+
:param max_width: Maximum bound for the width.
19+
:param max_height: Maximum bound for the height.
20+
:return:
21+
"""
22+
with Image.open(io.BytesIO(image_buffer)) as img:
23+
original_width, original_height = img.size
24+
max_width = max_width or original_width
25+
max_height = max_height or original_height
26+
if max_width or max_height:
27+
img.thumbnail((int(max_width), int(max_height)), Image.Resampling.LANCZOS)
28+
29+
output_buffer = io.BytesIO()
30+
img.save(output_buffer, format="JPEG", quality=quality, optimize=True)
31+
32+
compressed_image = output_buffer.getvalue()
33+
return compressed_image

mindee/pdf/__init__.py

Whitespace-only changes.

mindee/pdf/pdf_char_data.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from dataclasses import dataclass
2+
from typing import Tuple
3+
4+
5+
@dataclass
6+
class PDFCharData:
7+
"""Data class representing character data."""
8+
9+
char: str
10+
"""The character."""
11+
left: int
12+
"""Left bound."""
13+
right: int
14+
"""Right bound."""
15+
top: int
16+
"""Top bound."""
17+
bottom: int
18+
"""Bottom bound."""
19+
font_name: str
20+
"""The font name."""
21+
font_size: int
22+
"""The font size in pt."""
23+
font_weight: int
24+
"""The font weight."""
25+
font_flags: int
26+
"""The font flags."""
27+
font_stroke_color: Tuple[int, int, int, int]
28+
"""RGBA representation of the font's stroke color."""
29+
font_fill_color: Tuple[int, int, int, int]
30+
"""RGBA representation of the font's fill color."""

0 commit comments

Comments
 (0)