Skip to content

Commit 08c3c76

Browse files
🐛 fix full text ocr extra not using the proper syntax; add integration tests (#260)
1 parent 1afa1bb commit 08c3c76

File tree

7 files changed

+95
-10
lines changed

7 files changed

+95
-10
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#
2+
# Run integration tests.
3+
#
4+
name: Integration Test
5+
6+
on:
7+
pull_request:
8+
workflow_run:
9+
workflows: ["Test Code Samples"]
10+
branches:
11+
- '*'
12+
types:
13+
- completed
14+
15+
jobs:
16+
pytest:
17+
name: Run Integration Tests
18+
timeout-minutes: 30
19+
strategy:
20+
max-parallel: 2
21+
matrix:
22+
os:
23+
- "ubuntu-latest"
24+
- "windows-latest"
25+
python-version:
26+
- "3.7"
27+
- "3.12"
28+
runs-on: ${{ matrix.os }}
29+
steps:
30+
- uses: actions/checkout@v4
31+
with:
32+
submodules: recursive
33+
34+
- name: Set up Python ${{ matrix.python-version }}
35+
uses: actions/setup-python@v4
36+
with:
37+
python-version: ${{ matrix.python-version }}
38+
39+
- name: Cache dependencies
40+
uses: actions/cache@v3
41+
with:
42+
path: ~/.cache/pip
43+
key: ${{ runner.os }}-dev-${{ hashFiles('setup.cfg') }}
44+
restore-keys: |
45+
${{ runner.os }}-dev-
46+
47+
- name: Install dependencies
48+
run: |
49+
python -m pip install pip
50+
pip install -e .[dev]
51+
- name: Run Integration Testing
52+
env:
53+
MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }}
54+
run: |
55+
pytest -m integration

mindee/mindee_http/endpoint.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,9 @@ def _custom_request(
8585
if include_words:
8686
data["include_mvision"] = "true"
8787

88-
if full_text:
89-
data["full_text_ocr"] = "true"
90-
9188
params = {}
89+
if full_text:
90+
params["full_text_ocr"] = "true"
9291
if cropper:
9392
params["cropper"] = "true"
9493

mindee/parsing/common/document.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class Document(Generic[TypePrediction, TypePage]):
2929
"""Result of the base inference"""
3030
id: str
3131
"""Id of the document as sent back by the server"""
32-
extras: Extras
32+
extras: Optional[Extras]
3333
"""Potential Extras fields sent back along the prediction"""
3434
ocr: Optional[Ocr]
3535
"""Potential raw text results read by the OCR (limited feature)"""
@@ -45,7 +45,7 @@ def __init__(
4545
self.filename = raw_response.get("name", "")
4646
if "ocr" in raw_response and raw_response["ocr"]:
4747
self.ocr = Ocr(raw_response["ocr"])
48-
if "extras" in raw_response and raw_response["extras"]:
48+
if "extras" in raw_response and raw_response["inference"]["extras"]:
4949
self.extras = Extras(raw_response["extras"])
5050
self._inject_full_text_ocr(raw_response)
5151
self.inference = inference_type(raw_response["inference"])
@@ -77,7 +77,7 @@ def _inject_full_text_ocr(self, raw_prediction: StringDict) -> None:
7777

7878
artificial_text_obj = {"content": full_text_content}
7979

80-
if not hasattr(self, "extras"):
80+
if not hasattr(self, "extras") or not self.extras:
8181
self.extras = Extras({"full_text_ocr": artificial_text_obj})
8282
else:
8383
self.extras.add_artificial_extra({"full_text_ocr": artificial_text_obj})

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,13 @@ safe_licenses = [
3737
]
3838

3939
[tool.pytest.ini_options]
40-
addopts = "--pyargs --cov mindee --cov-report term:skip-covered --cov-report term-missing -m 'not regression'"
40+
addopts = "--pyargs --cov mindee --cov-report term:skip-covered --cov-report term-missing -m 'not regression and not integration'"
4141
python_files = "test*.py"
4242
junit_family = "xunit2"
4343
markers = [
4444
"regression: marks tests as regression tests - select with '-m regression'",
45-
"lineitems: debug line items"
45+
"lineitems: debug line items",
46+
"integration: integration tests that send calls to the API - select with '-m integration'"
4647
]
4748
testpaths = [
4849
"tests",

tests/extraction/test_invoice_splitter_auto_extraction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def prepare_invoice_return(rst_file_path: Path, invoice_prediction: Document):
2626
return rst_content
2727

2828

29-
@pytest.mark.regression
29+
@pytest.mark.integration
3030
def test_pdf_should_extract_invoices_strict():
3131
client = Client()
3232
invoice_splitter_input = PathInput(
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import json
2+
3+
import pytest
4+
5+
from mindee import AsyncPredictResponse, Client
6+
from mindee.product import InternationalIdV2, InvoiceV4
7+
from tests.product import PRODUCT_DATA_DIR
8+
9+
10+
@pytest.fixture
11+
def client():
12+
client = Client()
13+
return client
14+
15+
16+
@pytest.mark.integration
17+
def test_send_cropper_extra(client):
18+
sample = client.source_from_path(
19+
PRODUCT_DATA_DIR / "invoices" / "default_sample.jpg",
20+
)
21+
response = client.parse(InvoiceV4, sample, cropper=True)
22+
assert response.document.inference.pages[0].extras.cropper
23+
24+
25+
@pytest.mark.integration
26+
def test_send_full_text_ocr_extra(client):
27+
sample = client.source_from_path(
28+
PRODUCT_DATA_DIR / "international_id" / "default_sample.jpg",
29+
)
30+
response = client.enqueue_and_parse(InternationalIdV2, sample, full_text=True)
31+
assert response.document.extras.full_text_ocr

tests/extras/test_full_text_ocr.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
# def load_pages():
1515
# with open(EXTRAS_DIR / "full_text_ocr/complete.json", "r") as file:
1616
# prediction_data = json.load(file)
17-
# print("PData", AsyncPredictResponse(InternationalIdV2, prediction_data).document.inference.pages)
1817
# return AsyncPredictResponse(InternationalIdV2, prediction_data).document.inference.pages
1918

2019

0 commit comments

Comments
 (0)