Skip to content

Commit a69cef6

Browse files
author
Nathanaël Renaud
committed
Add test for forms and standard onlyoffice pages
1 parent 9f61209 commit a69cef6

File tree

2 files changed

+56
-37
lines changed

2 files changed

+56
-37
lines changed

tests/example_files.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,5 +112,7 @@
112112
url: https://github.com/py-pdf/pypdf/files/12050253/tt.pdf
113113
- local_filename: Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf
114114
url: https://www.joinville.sc.gov.br/wp-content/uploads/2023/11/Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf
115-
- local_filename: iss2138.pdf
116-
url: https://github.com/py-pdf/pypdf/files/12483807/AEO.1172.pdf
115+
- local_filename: iss2613-onlyoffice-standardImages.pdf
116+
url: https://github.com/py-pdf/pypdf/files/15355445/iss2613-onlyoffice-standardImages.pdf
117+
- local_filename: iss2613-onlyoffice-form.pdf
118+
url: https://github.com/py-pdf/pypdf/files/15355444/iss2613-onlyoffice-form.pdf

tests/test_images.py

Lines changed: 52 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -214,47 +214,64 @@ def test_image_extraction(src, page_index, image_key, expected):
214214
assert image_similarity(BytesIO(actual_image.data), expected) >= 0.99
215215

216216

217-
@pytest.mark.parametrize(
218-
("src", "page_index", "image_key", "expected"),
219-
[
220-
(
221-
SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf",
222-
0,
223-
"/Pattern/P1/X1",
224-
SAMPLE_ROOT / "027-onlyoffice-image/P1_X1.jpg",
225-
),
226-
(
227-
SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf",
228-
0,
229-
"/Pattern/P2/X1",
230-
SAMPLE_ROOT / "027-onlyoffice-image/P2_X1.jpg",
231-
),
232-
(
233-
SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf",
234-
0,
235-
"/Pattern/P3/X1",
236-
SAMPLE_ROOT / "027-onlyoffice-image/P3_X1.jpg",
237-
),
238-
],
239-
ids=[
240-
"027-onlyoffice-image/P1_X1.jpg",
241-
"027-onlyoffice-image/P2_X1.jpg",
242-
"027-onlyoffice-image/P3_X1.jpg",
243-
],
244-
)
245-
@pytest.mark.samples()
246-
def test_patterns_image_extraction(src, page_index, image_key, expected):
247-
reader = PdfReader(src)
248-
extractedIDs = reader.pages[page_index].images
217+
@pytest.mark.enable_socket()
218+
def test_onlyoffice_standard_images_extraction():
219+
reader = PdfReader(
220+
BytesIO(get_data_from_url(name="iss2613-onlyoffice-standardImages.pdf"))
221+
)
249222

250223
assert (
251-
str(extractedIDs)
224+
str(reader.pages[0].images)
252225
== "[Image_0=/Pattern/P1/X1, Image_1=/Pattern/P2/X1, Image_2=/Pattern/P3/X1]"
253226
)
254227

255-
actual_image = reader.pages[page_index].images[image_key]
228+
url = "https://github.com/py-pdf/pypdf/assets/67143274/cc28b39b-2e96-4bd3-b33c-c545c5cec2d9"
229+
name = "iss2613-P1_X1.jpg"
230+
P1_X1 = Image.open(BytesIO(get_data_from_url(url, name=name)))
256231

257-
assert image_similarity(BytesIO(actual_image.data), expected) >= 0.99
232+
assert image_similarity(reader.pages[0].images[0].image, P1_X1) >= 0.99
233+
234+
url = "https://github.com/py-pdf/pypdf/assets/67143274/827c9066-546a-4502-a613-579ec25c598e"
235+
name = "iss2613-P2_X1.jpg"
236+
P2_X1 = Image.open(BytesIO(get_data_from_url(url, name=name)))
237+
238+
assert image_similarity(reader.pages[0].images[1].image, P2_X1) >= 0.99
239+
240+
url = "https://github.com/py-pdf/pypdf/assets/67143274/df9cb9e9-e589-4d2e-a537-ae0fe3240bbd"
241+
name = "iss2613-P3_X1.jpg"
242+
P3_X1 = Image.open(BytesIO(get_data_from_url(url, name=name)))
243+
244+
assert image_similarity(reader.pages[0].images[2].image, P3_X1) >= 0.99
245+
246+
247+
@pytest.mark.samples()
248+
def test_onlyoffice_form_images_extraction():
249+
reader = PdfReader(BytesIO(get_data_from_url(name="iss2613-onlyoffice-form.pdf")))
250+
251+
assert (
252+
str(reader.pages[0].images)
253+
== "[Image_0=/Pattern/P1/X1, Image_1=/Pattern/P2/X1]"
254+
)
255+
256+
assert str(reader.pages[1].images) == "[Image_0=/Pattern/P1/X1]"
257+
258+
url = "https://github.com/py-pdf/pypdf/assets/67143274/cc28b39b-2e96-4bd3-b33c-c545c5cec2d9"
259+
name = "iss2613-P1_X1.jpg"
260+
P1_X1 = Image.open(BytesIO(get_data_from_url(url, name=name)))
261+
262+
assert image_similarity(reader.pages[0].images[0].image, P1_X1) >= 0.99
263+
264+
url = "https://github.com/py-pdf/pypdf/assets/67143274/827c9066-546a-4502-a613-579ec25c598e"
265+
name = "iss2613-P2_X1.jpg"
266+
P2_X1 = Image.open(BytesIO(get_data_from_url(url, name=name)))
267+
268+
assert image_similarity(reader.pages[0].images[1].image, P2_X1) >= 0.99
269+
270+
url = "https://github.com/py-pdf/pypdf/assets/67143274/df9cb9e9-e589-4d2e-a537-ae0fe3240bbd"
271+
name = "iss2613-P3_X1.jpg"
272+
P3_X1 = Image.open(BytesIO(get_data_from_url(url, name=name)))
273+
274+
assert image_similarity(reader.pages[1].images[0].image, P3_X1) >= 0.99
258275

259276

260277
@pytest.mark.enable_socket()

0 commit comments

Comments
 (0)