Skip to content

✨ add support for multi-receipt extraction #240

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ repos:
- types-requests
- types-setuptools
- importlib-metadata
- types-Pillow
20 changes: 20 additions & 0 deletions examples/multi_receipts_tutorial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from mindee import Client, PredictResponse, product
from mindee.image_extraction.multi_receipts_extractor.multi_receipts_extractor import (
extract_receipts,
)

# Init a new client
mindee_client = Client()

# Load a file from disk
input_doc = mindee_client.source_from_path("path/to/your/file.ext")
result_split: PredictResponse = mindee_client.parse(
product.MultiReceiptsDetectorV1, input_doc, close_file=False
)

extracted_receipts = extract_receipts(input_doc, result_split.document.inference)
for receipt in extracted_receipts:
receipt_as_source = receipt.as_source()
# receipt.save_to_file(f"./{receipt.internal_file_name}.pdf") # Optionally: save each extracted receipt
result_receipt = mindee_client.parse(product.ReceiptV5, receipt.as_source())
print(result_receipt.document)
Empty file.
5 changes: 5 additions & 0 deletions mindee/image_extraction/common/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from mindee.image_extraction.common.extracted_image import ExtractedImage
from mindee.image_extraction.common.image_extractor import (
attach_image_as_new_file,
extract_multiple_images_from_source,
)
89 changes: 89 additions & 0 deletions mindee/image_extraction/common/extracted_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import io
from pathlib import Path
from typing import Optional

from PIL import Image

from mindee.error import MindeeError
from mindee.input import FileInput, LocalInputSource
from mindee.logger import logger


class ExtractedImage:
"""Generic class for image extraction."""

_page_id: int
"""Id of the page the image was extracted from."""
_element_id: int
"""Id of the element on a given page."""

def __init__(
self, input_source: LocalInputSource, page_id: int, element_id: int
) -> None:
"""
Initialize the ExtractedImage with a buffer and an internal file name.

:param input_source: Local source for input.
:param page_id: ID of the page the element was found on.
:param element_id: ID of the element in a page.
"""
self.buffer = io.BytesIO(input_source.file_object.read())
self.buffer.name = input_source.filename
self.buffer.seek(0)
self.internal_file_name = f"{input_source.filename}_p{page_id}_{element_id}.pdf"
self._page_id = page_id
self._element_id = 0 if element_id is None else element_id

def save_to_file(self, output_path: str, file_format: Optional[str] = None):
"""
Saves the document to a file.

:param output_path: Path to save the file to.
:param file_format: Optional PIL-compatible format for the file. Inferred from file extension if not provided.
:raises MindeeError: If an invalid path or filename is provided.
"""
try:
print(f"SAVING {self.internal_file_name}")
resolved_path = Path(output_path).resolve()
if not file_format:
if len(resolved_path.suffix) < 1:
raise ValueError("Invalid file format.")
file_format = (
resolved_path.suffix.upper()
) # technically redundant since PIL applies an upper operation
# to the parameter , but older versions may not do so.
self.buffer.seek(0)
image = Image.open(self.buffer)
image.save(resolved_path, format=file_format)
logger.info("File saved successfully to '%s'.", resolved_path)
except TypeError as exc:
raise MindeeError("Invalid path/filename provided.") from exc
except Exception as exc:
raise MindeeError(f"Could not save file {Path(output_path).name}.") from exc

def as_source(self) -> FileInput:
"""
Return the file as a Mindee-compatible BufferInput source.

:returns: A BufferInput source.
"""
self.buffer.seek(0)
return FileInput(self.buffer)

@property
def page_id(self):
"""
ID of the page the receipt was found on.

:return:
"""
return self._page_id

@property
def element_id(self):
"""
Id of the element on a given page.

:return:
"""
return self._element_id
100 changes: 100 additions & 0 deletions mindee/image_extraction/common/image_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import io
from typing import BinaryIO, List

import pypdfium2 as pdfium
from PIL import Image

from mindee.geometry import Point, get_min_max_x, get_min_max_y
from mindee.image_extraction.common import ExtractedImage
from mindee.input import BytesInput, LocalInputSource


def attach_image_as_new_file( # type: ignore
input_buffer: BinaryIO,
) -> pdfium.PdfDocument:
"""
Attaches an image as a new page in a PdfDocument object.

:param input_buffer: Input buffer. Only supports JPEG.
:return: A PdfDocument handle.
"""
# Create a new page in the PdfDocument
input_buffer.seek(0)
image = Image.open(input_buffer)
image.convert("RGB")
image_buffer = io.BytesIO()
image.save(image_buffer, format="JPEG")

pdf = pdfium.PdfDocument.new()

image_pdf = pdfium.PdfImage.new(pdf)
image_pdf.load_jpeg(image_buffer)
width, height = image_pdf.get_size()

matrix = pdfium.PdfMatrix().scale(width, height)
image_pdf.set_matrix(matrix)

page = pdf.new_page(width, height)
page.insert_obj(image_pdf)
page.gen_content()
image.close()
return pdf


def extract_multiple_images_from_source(
input_source: LocalInputSource, page_id: int, polygons: List[List[Point]]
) -> List[ExtractedImage]:
"""
Extracts elements from a page based on a list of bounding boxes.

:param input_source: Local Input source to extract elements from.
:param page_id: id of the page to extract from.
:param polygons: List of coordinates to pull the elements from.
:return: List of byte arrays representing the extracted elements.
"""
page = load_pdf_doc(input_source).get_page(page_id)
page_content = page.render().to_pil()
width, height = page.get_size()

extracted_elements = []
for element_id, polygon in enumerate(polygons):
min_max_x = get_min_max_x(polygon)
min_max_y = get_min_max_y(polygon)

pillow_page = page_content.crop(
(
int(min_max_x.min * width),
int(min_max_y.min * height),
int(min_max_x.max * width),
int(min_max_y.max * height),
)
)
buffer = io.BytesIO()
pillow_page.save(buffer, format="JPEG")
buffer.seek(0)
extracted_elements.append(
ExtractedImage(
BytesInput(
buffer.read(),
f"{input_source.filename}_p{page_id}_e{element_id}.jpg",
),
page_id,
element_id,
)
)

return extracted_elements


def load_pdf_doc(input_file: LocalInputSource) -> pdfium.PdfDocument: # type: ignore
"""
Loads a PDF document from a local input source.

:param input_file: Local input.
:return: A valid PdfDocument handle.
"""
if input_file.is_pdf():
input_file.file_object.seek(0)
return pdfium.PdfDocument(input_file.file_object)

return attach_image_as_new_file(input_file.file_object)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from mindee.image_extraction.multi_receipts_extractor import multi_receipts_extractor
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from typing import List

from mindee.error import MindeeError
from mindee.image_extraction.common.extracted_image import ExtractedImage
from mindee.image_extraction.common.image_extractor import (
extract_multiple_images_from_source,
)
from mindee.input import LocalInputSource
from mindee.parsing.common import Inference


def extract_receipts(
input_source: LocalInputSource, inference: Inference
) -> List[ExtractedImage]:
"""
Extracts individual receipts from multi-receipts documents.

:param input_source: Local Input Source to extract sub-receipts from.
:param inference: Results of the inference.
:return: Individual extracted receipts as an array of ExtractedMultiReceiptsImage.
"""
images: List[ExtractedImage] = []
if not inference.prediction.receipts:
raise MindeeError(
"No possible receipts candidates found for MultiReceipts extraction."
)
for page_id in range(input_source.count_doc_pages()):
receipt_positions = [
receipt.bounding_box
for receipt in inference.pages[page_id].prediction.receipts
]
images.extend(
extract_multiple_images_from_source(
input_source, page_id, receipt_positions
)
)
return images
2 changes: 1 addition & 1 deletion mindee/input/local_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def is_valid_hmac_signature(
Checks if the hmac signature of the local response is valid.

:param secret_key: Secret key, given as a string.
:param signature:
:param signature: HMAC signature, given as a string.
:return: True if the HMAC signature is valid.
"""
return signature == self.get_hmac_signature(secret_key)
8 changes: 5 additions & 3 deletions mindee/input/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,11 @@ def count_doc_pages(self) -> int:

:return: the number of pages.
"""
self.file_object.seek(0)
pdf = pdfium.PdfDocument(self.file_object)
return len(pdf)
if self.is_pdf():
self.file_object.seek(0)
pdf = pdfium.PdfDocument(self.file_object)
return len(pdf)
return 1

def process_pdf(
self,
Expand Down
6 changes: 5 additions & 1 deletion mindee/parsing/standard/locale.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@ def __init__(
:param reconstructed: Bool for reconstructed object (not extracted in the API)
:param page_id: Page number for multi-page document
"""
value_key = "value" if "value" in raw_prediction else "language"
value_key = (
"value"
if ("value" in raw_prediction and raw_prediction["value"])
else "language"
)

super().__init__(
raw_prediction,
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ safe_licenses = [
"MIT License",
"Mozilla Public License 2.0 (MPL 2.0)",
"BSD License",
"(Apache-2.0 OR BSD-3-Clause) AND LicenseRef-PdfiumThirdParty"
"(Apache-2.0 OR BSD-3-Clause) AND LicenseRef-PdfiumThirdParty",
"Historical Permission Notice and Disclaimer (HPND)"
]

[tool.pytest.ini_options]
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ include_package_data = True
python_requires = >=3.7
install_requires =
pypdfium2>=4.0,<5
Pillow>=9.5.0
pytz>=2023.3
requests~=2.31

Expand Down
Empty file.
40 changes: 40 additions & 0 deletions tests/image_extraction/test_image_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import json

import pytest
from PIL import Image

from mindee.image_extraction.common import extract_multiple_images_from_source
from mindee.input import PathInput
from mindee.product import BarcodeReaderV1
from tests.test_inputs import PRODUCT_DATA_DIR


@pytest.fixture
def barcode_path():
return PRODUCT_DATA_DIR / "barcode_reader" / "default_sample.jpg"


@pytest.fixture
def barcode_json_path():
return PRODUCT_DATA_DIR / "barcode_reader" / "response_v1" / "complete.json"


def test_barcode_image_extraction(barcode_path, barcode_json_path):
with open(barcode_json_path, "rb") as f:
response = json.load(f)
inference = BarcodeReaderV1(response["document"]["inference"])
barcodes_1 = [code_1d.polygon for code_1d in inference.prediction.codes_1d]
barcodes_2 = [code_2d.polygon for code_2d in inference.prediction.codes_2d]
input_source = PathInput(barcode_path)
extracted_barcodes_1d = extract_multiple_images_from_source(
input_source, 0, barcodes_1
)
extracted_barcodes_2d = extract_multiple_images_from_source(
input_source, 0, barcodes_2
)
assert len(extracted_barcodes_1d) == 1
assert len(extracted_barcodes_2d) == 2

assert Image.open(extracted_barcodes_1d[0].buffer).size == (353, 200)
assert Image.open(extracted_barcodes_2d[0].buffer).size == (214, 216)
assert Image.open(extracted_barcodes_2d[1].buffer).size == (193, 201)
Loading
Loading