@@ -214,47 +214,64 @@ def test_image_extraction(src, page_index, image_key, expected):
214
214
assert image_similarity (BytesIO (actual_image .data ), expected ) >= 0.99
215
215
216
216
217
- @pytest .mark .parametrize (
218
- ("src" , "page_index" , "image_key" , "expected" ),
219
- [
220
- (
221
- SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf" ,
222
- 0 ,
223
- "/Pattern/P1/X1" ,
224
- SAMPLE_ROOT / "027-onlyoffice-image/P1_X1.jpg" ,
225
- ),
226
- (
227
- SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf" ,
228
- 0 ,
229
- "/Pattern/P2/X1" ,
230
- SAMPLE_ROOT / "027-onlyoffice-image/P2_X1.jpg" ,
231
- ),
232
- (
233
- SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf" ,
234
- 0 ,
235
- "/Pattern/P3/X1" ,
236
- SAMPLE_ROOT / "027-onlyoffice-image/P3_X1.jpg" ,
237
- ),
238
- ],
239
- ids = [
240
- "027-onlyoffice-image/P1_X1.jpg" ,
241
- "027-onlyoffice-image/P2_X1.jpg" ,
242
- "027-onlyoffice-image/P3_X1.jpg" ,
243
- ],
244
- )
245
- @pytest .mark .samples ()
246
- def test_patterns_image_extraction (src , page_index , image_key , expected ):
247
- reader = PdfReader (src )
248
- extractedIDs = reader .pages [page_index ].images
217
+ @pytest .mark .enable_socket ()
218
+ def test_onlyoffice_standard_images_extraction ():
219
+ reader = PdfReader (
220
+ BytesIO (get_data_from_url (name = "iss2613-onlyoffice-standardImages.pdf" ))
221
+ )
249
222
250
223
assert (
251
- str (extractedIDs )
224
+ str (reader . pages [ 0 ]. images )
252
225
== "[Image_0=/Pattern/P1/X1, Image_1=/Pattern/P2/X1, Image_2=/Pattern/P3/X1]"
253
226
)
254
227
255
- actual_image = reader .pages [page_index ].images [image_key ]
228
+ url = "https://github.com/py-pdf/pypdf/assets/67143274/cc28b39b-2e96-4bd3-b33c-c545c5cec2d9"
229
+ name = "iss2613-P1_X1.jpg"
230
+ P1_X1 = Image .open (BytesIO (get_data_from_url (url , name = name )))
256
231
257
- assert image_similarity (BytesIO (actual_image .data ), expected ) >= 0.99
232
+ assert image_similarity (reader .pages [0 ].images [0 ].image , P1_X1 ) >= 0.99
233
+
234
+ url = "https://github.com/py-pdf/pypdf/assets/67143274/827c9066-546a-4502-a613-579ec25c598e"
235
+ name = "iss2613-P2_X1.jpg"
236
+ P2_X1 = Image .open (BytesIO (get_data_from_url (url , name = name )))
237
+
238
+ assert image_similarity (reader .pages [0 ].images [1 ].image , P2_X1 ) >= 0.99
239
+
240
+ url = "https://github.com/py-pdf/pypdf/assets/67143274/df9cb9e9-e589-4d2e-a537-ae0fe3240bbd"
241
+ name = "iss2613-P3_X1.jpg"
242
+ P3_X1 = Image .open (BytesIO (get_data_from_url (url , name = name )))
243
+
244
+ assert image_similarity (reader .pages [0 ].images [2 ].image , P3_X1 ) >= 0.99
245
+
246
+
247
+ @pytest .mark .samples ()
248
+ def test_onlyoffice_form_images_extraction ():
249
+ reader = PdfReader (BytesIO (get_data_from_url (name = "iss2613-onlyoffice-form.pdf" )))
250
+
251
+ assert (
252
+ str (reader .pages [0 ].images )
253
+ == "[Image_0=/Pattern/P1/X1, Image_1=/Pattern/P2/X1]"
254
+ )
255
+
256
+ assert str (reader .pages [1 ].images ) == "[Image_0=/Pattern/P1/X1]"
257
+
258
+ url = "https://github.com/py-pdf/pypdf/assets/67143274/cc28b39b-2e96-4bd3-b33c-c545c5cec2d9"
259
+ name = "iss2613-P1_X1.jpg"
260
+ P1_X1 = Image .open (BytesIO (get_data_from_url (url , name = name )))
261
+
262
+ assert image_similarity (reader .pages [0 ].images [0 ].image , P1_X1 ) >= 0.99
263
+
264
+ url = "https://github.com/py-pdf/pypdf/assets/67143274/827c9066-546a-4502-a613-579ec25c598e"
265
+ name = "iss2613-P2_X1.jpg"
266
+ P2_X1 = Image .open (BytesIO (get_data_from_url (url , name = name )))
267
+
268
+ assert image_similarity (reader .pages [0 ].images [1 ].image , P2_X1 ) >= 0.99
269
+
270
+ url = "https://github.com/py-pdf/pypdf/assets/67143274/df9cb9e9-e589-4d2e-a537-ae0fe3240bbd"
271
+ name = "iss2613-P3_X1.jpg"
272
+ P3_X1 = Image .open (BytesIO (get_data_from_url (url , name = name )))
273
+
274
+ assert image_similarity (reader .pages [1 ].images [0 ].image , P3_X1 ) >= 0.99
258
275
259
276
260
277
@pytest .mark .enable_socket ()
0 commit comments