From 10f3b85e5c847322b6e77a1dd50ce0d1cd7facb7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?= <perso@renaudna.fr>
Date: Sat, 11 May 2024 23:47:13 +0200
Subject: [PATCH 01/15] STY: consider images inside pages patterns.

Added code to detect patterns in "_get_ids_image".
To avoid any conflicts with images that could be located directly in a page or images using the same ID in differents patterns, images ids under patterns are returned in this form :
"/Pattern/patternNameHere/imageNameHere"

Added code to deal with Pattern images in "_get_image".
---
 pypdf/_page.py       | 96 ++++++++++++++++++++++++++++++++------------
 tests/test_images.py | 43 ++++++++++++++++++++
 2 files changed, 113 insertions(+), 26 deletions(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index ea8e6f5a9..a009e0dce 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -457,19 +457,35 @@ def _get_ids_image(
         if ancest is None:
             ancest = []
         lst: List[Union[str, List[str]]] = []
-        if PG.RESOURCES not in obj or RES.XOBJECT not in cast(
-            DictionaryObject, obj[PG.RESOURCES]
-        ):
-            return self.inline_images_keys
-
-        x_object = obj[PG.RESOURCES][RES.XOBJECT].get_object()  # type: ignore
-        for o in x_object:
-            if not isinstance(x_object[o], StreamObject):
-                continue
-            if x_object[o][IA.SUBTYPE] == "/Image":
-                lst.append(o if len(ancest) == 0 else ancest + [o])
-            else:  # is a form with possible images inside
-                lst.extend(self._get_ids_image(x_object[o], ancest + [o], call_stack))
+
+        if PG.RESOURCES in obj:
+            if RES.PATTERN in cast(DictionaryObject, obj[PG.RESOURCES]):
+                for patternName, pattern in obj[PG.RESOURCES][RES.PATTERN].items():
+                    if PG.RESOURCES in pattern.get_object():
+                        if RES.XOBJECT in cast(DictionaryObject, pattern[PG.RESOURCES]):
+                            x_object = pattern[PG.RESOURCES][RES.XOBJECT].get_object()  # type: ignore
+                            for o in x_object:
+                                if not isinstance(x_object[o], StreamObject):
+                                    continue
+                                if x_object[o][IA.SUBTYPE] == "/Image":
+                                    lst.append(
+                                        f"{RES.PATTERN}{patternName}{o}"
+                                        if len(ancest) == 0
+                                        else ancest + [o]
+                                    )
+
+            if RES.XOBJECT in cast(DictionaryObject, obj[PG.RESOURCES]):
+                x_object = obj[PG.RESOURCES][RES.XOBJECT].get_object()  # type: ignore
+                for o in x_object:
+                    if not isinstance(x_object[o], StreamObject):
+                        continue
+                    if x_object[o][IA.SUBTYPE] == "/Image":
+                        lst.append(o if len(ancest) == 0 else ancest + [o])
+                    else:  # is a form with possible images inside
+                        lst.extend(
+                            self._get_ids_image(x_object[o], ancest + [o], call_stack)
+                        )
+
         return lst + self.inline_images_keys
 
     def _get_image(
@@ -484,9 +500,27 @@ def _get_image(
         if isinstance(id, List) and len(id) == 1:
             id = id[0]
         try:
-            xobjs = cast(
-                DictionaryObject, cast(DictionaryObject, obj[PG.RESOURCES])[RES.XOBJECT]
-            )
+            if isinstance(id, str) and id.find(RES.PATTERN) == 0:
+                pattern_name = id[len(RES.PATTERN) : id.find("/", len(RES.PATTERN) + 1)]
+                image_name = id[id.rfind("/") :]
+
+                patterns = cast(
+                    DictionaryObject,
+                    cast(DictionaryObject, obj[PG.RESOURCES])[RES.PATTERN],
+                )
+
+                xobjs = cast(
+                    DictionaryObject,
+                    cast(DictionaryObject, patterns[pattern_name][PG.RESOURCES])[
+                        RES.XOBJECT
+                    ],
+                )
+
+            else:
+                xobjs = cast(
+                    DictionaryObject,
+                    cast(DictionaryObject, obj[PG.RESOURCES])[RES.XOBJECT],
+                )
         except KeyError:
             if not (id[0] == "~" and id[-1] == "~"):
                 raise
@@ -497,16 +531,26 @@ def _get_image(
                 if self.inline_images is None:  # pragma: no cover
                     raise KeyError("no inline image can be found")
                 return self.inline_images[id]
-
-            imgd = _xobj_to_image(cast(DictionaryObject, xobjs[id]))
-            extension, byte_stream = imgd[:2]
-            f = ImageFile(
-                name=f"{id[1:]}{extension}",
-                data=byte_stream,
-                image=imgd[2],
-                indirect_reference=xobjs[id].indirect_reference,
-            )
-            return f
+            elif id.find("/Pattern") == 0:
+                imgd = _xobj_to_image(cast(DictionaryObject, xobjs[image_name]))
+                extension, byte_stream = imgd[:2]
+                f = ImageFile(
+                    name=f"{pattern_name[1:]}_{image_name[1:]}{extension}",
+                    data=byte_stream,
+                    image=imgd[2],
+                    indirect_reference=xobjs[image_name].indirect_reference,
+                )
+                return f
+            else:
+                imgd = _xobj_to_image(cast(DictionaryObject, xobjs[id]))
+                extension, byte_stream = imgd[:2]
+                f = ImageFile(
+                    name=f"{id[1:]}{extension}",
+                    data=byte_stream,
+                    image=imgd[2],
+                    indirect_reference=xobjs[id].indirect_reference,
+                )
+                return f
         else:  # in a sub object
             ids = id[1:]
             return self._get_image(ids, cast(DictionaryObject, xobjs[id[0]]))
diff --git a/tests/test_images.py b/tests/test_images.py
index e77090171..08d525957 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -214,6 +214,49 @@ def test_image_extraction(src, page_index, image_key, expected):
     assert image_similarity(BytesIO(actual_image.data), expected) >= 0.99
 
 
+@pytest.mark.parametrize(
+    ("src", "page_index", "image_key", "expected"),
+    [
+        (
+            SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf",
+            0,
+            "/Pattern/P1/X1",
+            SAMPLE_ROOT / "027-onlyoffice-image/P1_X1.jpg",
+        ),
+        (
+            SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf",
+            0,
+            "/Pattern/P2/X1",
+            SAMPLE_ROOT / "027-onlyoffice-image/P2_X1.jpg",
+        ),
+        (
+            SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf",
+            0,
+            "/Pattern/P3/X1",
+            SAMPLE_ROOT / "027-onlyoffice-image/P3_X1.jpg",
+        ),
+    ],
+    ids=[
+        "027-onlyoffice-image/P1_X1.jpg",
+        "027-onlyoffice-image/P2_X1.jpg",
+        "027-onlyoffice-image/P3_X1.jpg",
+    ],
+)
+@pytest.mark.samples()
+def test_patterns_image_extraction(src, page_index, image_key, expected):
+    reader = PdfReader(src)
+    extreactedIDs = reader.pages[page_index].images
+
+    assert (
+        str(extreactedIDs)
+        == "[Image_0=/Pattern/P1/X1, Image_1=/Pattern/P2/X1, Image_2=/Pattern/P3/X1]"
+    )
+
+    actual_image = reader.pages[page_index].images[image_key]
+
+    assert image_similarity(BytesIO(actual_image.data), expected) >= 0.99
+
+
 @pytest.mark.enable_socket()
 @pytest.mark.timeout(30)
 def test_loop_in_image_keys():

From 557d77da071ad898482ed18f8806705d3828095e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?=
 <67143274+0xNath@users.noreply.github.com>
Date: Sun, 12 May 2024 10:13:42 +0200
Subject: [PATCH 02/15] Update _page.py

fixed code style
---
 pypdf/_page.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index a009e0dce..15dd62ab2 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -461,8 +461,7 @@ def _get_ids_image(
         if PG.RESOURCES in obj:
             if RES.PATTERN in cast(DictionaryObject, obj[PG.RESOURCES]):
                 for patternName, pattern in obj[PG.RESOURCES][RES.PATTERN].items():
-                    if PG.RESOURCES in pattern.get_object():
-                        if RES.XOBJECT in cast(DictionaryObject, pattern[PG.RESOURCES]):
+                    if PG.RESOURCES in pattern.get_object() and RES.XOBJECT in cast(DictionaryObject, pattern[PG.RESOURCES]):
                             x_object = pattern[PG.RESOURCES][RES.XOBJECT].get_object()  # type: ignore
                             for o in x_object:
                                 if not isinstance(x_object[o], StreamObject):

From 524ffc36a6c1a88cfc59c61c2986e2bd19dc61de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?=
 <67143274+0xNath@users.noreply.github.com>
Date: Sun, 12 May 2024 10:14:37 +0200
Subject: [PATCH 03/15] Update _page.py

fixed code style
---
 pypdf/_page.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index 15dd62ab2..19a324344 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -462,16 +462,16 @@ def _get_ids_image(
             if RES.PATTERN in cast(DictionaryObject, obj[PG.RESOURCES]):
                 for patternName, pattern in obj[PG.RESOURCES][RES.PATTERN].items():
                     if PG.RESOURCES in pattern.get_object() and RES.XOBJECT in cast(DictionaryObject, pattern[PG.RESOURCES]):
-                            x_object = pattern[PG.RESOURCES][RES.XOBJECT].get_object()  # type: ignore
-                            for o in x_object:
-                                if not isinstance(x_object[o], StreamObject):
-                                    continue
-                                if x_object[o][IA.SUBTYPE] == "/Image":
-                                    lst.append(
-                                        f"{RES.PATTERN}{patternName}{o}"
-                                        if len(ancest) == 0
-                                        else ancest + [o]
-                                    )
+                        x_object = pattern[PG.RESOURCES][RES.XOBJECT].get_object()  # type: ignore
+                        for o in x_object:
+                            if not isinstance(x_object[o], StreamObject):
+                                continue
+                            if x_object[o][IA.SUBTYPE] == "/Image":
+                                lst.append(
+                                    f"{RES.PATTERN}{patternName}{o}"
+                                    if len(ancest) == 0
+                                    else ancest + [o]
+                                )
 
             if RES.XOBJECT in cast(DictionaryObject, obj[PG.RESOURCES]):
                 x_object = obj[PG.RESOURCES][RES.XOBJECT].get_object()  # type: ignore

From 46adbe18b103d3e662e95806607769b7b03422cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?= <perso@renaudna.fr>
Date: Sun, 12 May 2024 10:18:47 +0200
Subject: [PATCH 04/15] fix code style...

---
 pypdf/_page.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index 19a324344..f4d1357d2 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -461,7 +461,9 @@ def _get_ids_image(
         if PG.RESOURCES in obj:
             if RES.PATTERN in cast(DictionaryObject, obj[PG.RESOURCES]):
                 for patternName, pattern in obj[PG.RESOURCES][RES.PATTERN].items():
-                    if PG.RESOURCES in pattern.get_object() and RES.XOBJECT in cast(DictionaryObject, pattern[PG.RESOURCES]):
+                    if PG.RESOURCES in pattern.get_object() and RES.XOBJECT in cast(
+                        DictionaryObject, pattern[PG.RESOURCES]
+                    ):
                         x_object = pattern[PG.RESOURCES][RES.XOBJECT].get_object()  # type: ignore
                         for o in x_object:
                             if not isinstance(x_object[o], StreamObject):

From 76b545fd56cefc843acea3be50e4b507eed91762 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?= <perso@renaudna.fr>
Date: Sun, 12 May 2024 10:43:51 +0200
Subject: [PATCH 05/15] fix typing

---
 pypdf/_page.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index f4d1357d2..cd121bccf 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -460,11 +460,14 @@ def _get_ids_image(
 
         if PG.RESOURCES in obj:
             if RES.PATTERN in cast(DictionaryObject, obj[PG.RESOURCES]):
-                for patternName, pattern in obj[PG.RESOURCES][RES.PATTERN].items():
+                for patternName, pattern in cast(
+                    DictionaryObject,
+                    cast(DictionaryObject, obj[PG.RESOURCES])[RES.PATTERN],
+                ).items():
                     if PG.RESOURCES in pattern.get_object() and RES.XOBJECT in cast(
                         DictionaryObject, pattern[PG.RESOURCES]
                     ):
-                        x_object = pattern[PG.RESOURCES][RES.XOBJECT].get_object()  # type: ignore
+                        x_object = pattern[PG.RESOURCES][RES.XOBJECT].get_object()
                         for o in x_object:
                             if not isinstance(x_object[o], StreamObject):
                                 continue
@@ -476,7 +479,10 @@ def _get_ids_image(
                                 )
 
             if RES.XOBJECT in cast(DictionaryObject, obj[PG.RESOURCES]):
-                x_object = obj[PG.RESOURCES][RES.XOBJECT].get_object()  # type: ignore
+                x_object = cast(
+                    DictionaryObject,
+                    cast(DictionaryObject, obj[PG.RESOURCES])[RES.XOBJECT],
+                ).get_object()
                 for o in x_object:
                     if not isinstance(x_object[o], StreamObject):
                         continue
@@ -512,11 +518,11 @@ def _get_image(
 
                 xobjs = cast(
                     DictionaryObject,
-                    cast(DictionaryObject, patterns[pattern_name][PG.RESOURCES])[
-                        RES.XOBJECT
-                    ],
+                    cast(
+                        DictionaryObject,
+                        cast(DictionaryObject, patterns[pattern_name])[PG.RESOURCES],
+                    )[RES.XOBJECT],
                 )
-
             else:
                 xobjs = cast(
                     DictionaryObject,

From 6edfa2d7c7eecb62dfb4e0144f9472a15fcce564 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?= <perso@renaudna.fr>
Date: Fri, 17 May 2024 18:09:57 +0200
Subject: [PATCH 06/15] fixed typo

---
 tests/test_images.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_images.py b/tests/test_images.py
index 08d525957..893eda33b 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -245,10 +245,10 @@ def test_image_extraction(src, page_index, image_key, expected):
 @pytest.mark.samples()
 def test_patterns_image_extraction(src, page_index, image_key, expected):
     reader = PdfReader(src)
-    extreactedIDs = reader.pages[page_index].images
+    extractedIDs = reader.pages[page_index].images
 
     assert (
-        str(extreactedIDs)
+        str(extractedIDs)
         == "[Image_0=/Pattern/P1/X1, Image_1=/Pattern/P2/X1, Image_2=/Pattern/P3/X1]"
     )
 

From 0e81eeeb0b9bcd4120030d2623f9fe667fbab59e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?= <perso@renaudna.fr>
Date: Fri, 17 May 2024 18:10:32 +0200
Subject: [PATCH 07/15] refactored code

---
 pypdf/_page.py | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index cd121bccf..6de845727 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -509,7 +509,6 @@ def _get_image(
         try:
             if isinstance(id, str) and id.find(RES.PATTERN) == 0:
                 pattern_name = id[len(RES.PATTERN) : id.find("/", len(RES.PATTERN) + 1)]
-                image_name = id[id.rfind("/") :]
 
                 patterns = cast(
                     DictionaryObject,
@@ -538,26 +537,24 @@ def _get_image(
                 if self.inline_images is None:  # pragma: no cover
                     raise KeyError("no inline image can be found")
                 return self.inline_images[id]
-            elif id.find("/Pattern") == 0:
-                imgd = _xobj_to_image(cast(DictionaryObject, xobjs[image_name]))
-                extension, byte_stream = imgd[:2]
-                f = ImageFile(
-                    name=f"{pattern_name[1:]}_{image_name[1:]}{extension}",
-                    data=byte_stream,
-                    image=imgd[2],
-                    indirect_reference=xobjs[image_name].indirect_reference,
-                )
-                return f
+
+            if id.find("/Pattern") == 0:
+                image_identifier = id[id.rfind("/") :]
+
+                image_name = pattern_name[1:] + "_" + image_identifier[1:]
             else:
-                imgd = _xobj_to_image(cast(DictionaryObject, xobjs[id]))
-                extension, byte_stream = imgd[:2]
-                f = ImageFile(
-                    name=f"{id[1:]}{extension}",
-                    data=byte_stream,
-                    image=imgd[2],
-                    indirect_reference=xobjs[id].indirect_reference,
-                )
-                return f
+                image_identifier = str(id)
+                image_name = id[1:]
+
+            imgd = _xobj_to_image(cast(DictionaryObject, xobjs[image_identifier]))
+            image_extension, byte_stream = imgd[:2]
+
+            return ImageFile(
+                name=image_name + str(image_extension),
+                data=byte_stream,
+                image=imgd[2],
+                indirect_reference=xobjs[image_identifier].indirect_reference,
+            )
         else:  # in a sub object
             ids = id[1:]
             return self._get_image(ids, cast(DictionaryObject, xobjs[id[0]]))

From b6ba82027c18660ad9e34d7f4a25d1176c6719a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?= <perso@renaudna.fr>
Date: Fri, 17 May 2024 18:13:43 +0200
Subject: [PATCH 08/15] fixed typo

---
 pypdf/_page.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index 6de845727..7a92041ec 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -540,7 +540,6 @@ def _get_image(
 
             if id.find("/Pattern") == 0:
                 image_identifier = id[id.rfind("/") :]
-
                 image_name = pattern_name[1:] + "_" + image_identifier[1:]
             else:
                 image_identifier = str(id)

From 07752a1ae83ed7c5284fade0a59596a9cb94b58b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?= <perso@renaudna.fr>
Date: Fri, 17 May 2024 20:53:43 +0200
Subject: [PATCH 09/15] Add test for forms and standard onlyoffice pages

---
 tests/example_files.yaml |  6 ++-
 tests/test_images.py     | 87 ++++++++++++++++++++++++----------------
 2 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/tests/example_files.yaml b/tests/example_files.yaml
index 049855d2f..475159ffb 100644
--- a/tests/example_files.yaml
+++ b/tests/example_files.yaml
@@ -112,5 +112,7 @@
   url: https://github.com/py-pdf/pypdf/files/12050253/tt.pdf
 - local_filename: Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf
   url: https://www.joinville.sc.gov.br/wp-content/uploads/2023/11/Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf
-- local_filename: iss2138.pdf
-  url: https://github.com/py-pdf/pypdf/files/12483807/AEO.1172.pdf
+- local_filename: iss2613-onlyoffice-standardImages.pdf
+  url: https://github.com/py-pdf/pypdf/files/15355445/iss2613-onlyoffice-standardImages.pdf
+- local_filename: iss2613-onlyoffice-form.pdf
+  url: https://github.com/py-pdf/pypdf/files/15355444/iss2613-onlyoffice-form.pdf
diff --git a/tests/test_images.py b/tests/test_images.py
index 893eda33b..85e0e605d 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -214,47 +214,64 @@ def test_image_extraction(src, page_index, image_key, expected):
     assert image_similarity(BytesIO(actual_image.data), expected) >= 0.99
 
 
-@pytest.mark.parametrize(
-    ("src", "page_index", "image_key", "expected"),
-    [
-        (
-            SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf",
-            0,
-            "/Pattern/P1/X1",
-            SAMPLE_ROOT / "027-onlyoffice-image/P1_X1.jpg",
-        ),
-        (
-            SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf",
-            0,
-            "/Pattern/P2/X1",
-            SAMPLE_ROOT / "027-onlyoffice-image/P2_X1.jpg",
-        ),
-        (
-            SAMPLE_ROOT / "027-onlyoffice-image/Patterns.pdf",
-            0,
-            "/Pattern/P3/X1",
-            SAMPLE_ROOT / "027-onlyoffice-image/P3_X1.jpg",
-        ),
-    ],
-    ids=[
-        "027-onlyoffice-image/P1_X1.jpg",
-        "027-onlyoffice-image/P2_X1.jpg",
-        "027-onlyoffice-image/P3_X1.jpg",
-    ],
-)
-@pytest.mark.samples()
-def test_patterns_image_extraction(src, page_index, image_key, expected):
-    reader = PdfReader(src)
-    extractedIDs = reader.pages[page_index].images
+@pytest.mark.enable_socket()
+def test_onlyoffice_standard_images_extraction():
+    reader = PdfReader(
+        BytesIO(get_data_from_url(name="iss2613-onlyoffice-standardImages.pdf"))
+    )
 
     assert (
-        str(extractedIDs)
+        str(reader.pages[0].images)
         == "[Image_0=/Pattern/P1/X1, Image_1=/Pattern/P2/X1, Image_2=/Pattern/P3/X1]"
     )
 
-    actual_image = reader.pages[page_index].images[image_key]
+    url = "https://github.com/py-pdf/pypdf/assets/67143274/cc28b39b-2e96-4bd3-b33c-c545c5cec2d9"
+    name = "iss2613-P1_X1.jpg"
+    P1_X1 = Image.open(BytesIO(get_data_from_url(url, name=name)))
 
-    assert image_similarity(BytesIO(actual_image.data), expected) >= 0.99
+    assert image_similarity(reader.pages[0].images[0].image, P1_X1) >= 0.99
+
+    url = "https://github.com/py-pdf/pypdf/assets/67143274/827c9066-546a-4502-a613-579ec25c598e"
+    name = "iss2613-P2_X1.jpg"
+    P2_X1 = Image.open(BytesIO(get_data_from_url(url, name=name)))
+
+    assert image_similarity(reader.pages[0].images[1].image, P2_X1) >= 0.99
+
+    url = "https://github.com/py-pdf/pypdf/assets/67143274/df9cb9e9-e589-4d2e-a537-ae0fe3240bbd"
+    name = "iss2613-P3_X1.jpg"
+    P3_X1 = Image.open(BytesIO(get_data_from_url(url, name=name)))
+
+    assert image_similarity(reader.pages[0].images[2].image, P3_X1) >= 0.99
+
+
+@pytest.mark.samples()
+def test_onlyoffice_form_images_extraction():
+    reader = PdfReader(BytesIO(get_data_from_url(name="iss2613-onlyoffice-form.pdf")))
+
+    assert (
+        str(reader.pages[0].images)
+        == "[Image_0=/Pattern/P1/X1, Image_1=/Pattern/P2/X1]"
+    )
+
+    assert str(reader.pages[1].images) == "[Image_0=/Pattern/P1/X1]"
+
+    url = "https://github.com/py-pdf/pypdf/assets/67143274/cc28b39b-2e96-4bd3-b33c-c545c5cec2d9"
+    name = "iss2613-P1_X1.jpg"
+    P1_X1 = Image.open(BytesIO(get_data_from_url(url, name=name)))
+
+    assert image_similarity(reader.pages[0].images[0].image, P1_X1) >= 0.99
+
+    url = "https://github.com/py-pdf/pypdf/assets/67143274/827c9066-546a-4502-a613-579ec25c598e"
+    name = "iss2613-P2_X1.jpg"
+    P2_X1 = Image.open(BytesIO(get_data_from_url(url, name=name)))
+
+    assert image_similarity(reader.pages[0].images[1].image, P2_X1) >= 0.99
+
+    url = "https://github.com/py-pdf/pypdf/assets/67143274/df9cb9e9-e589-4d2e-a537-ae0fe3240bbd"
+    name = "iss2613-P3_X1.jpg"
+    P3_X1 = Image.open(BytesIO(get_data_from_url(url, name=name)))
+
+    assert image_similarity(reader.pages[1].images[0].image, P3_X1) >= 0.99
 
 
 @pytest.mark.enable_socket()

From a5b980b65e50d999a2852ac3f39fa7969eddcddd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?= <perso@renaudna.fr>
Date: Fri, 17 May 2024 20:56:11 +0200
Subject: [PATCH 10/15] Fix decorator for form test

---
 tests/test_images.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_images.py b/tests/test_images.py
index 85e0e605d..7890cfa42 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -244,7 +244,7 @@ def test_onlyoffice_standard_images_extraction():
     assert image_similarity(reader.pages[0].images[2].image, P3_X1) >= 0.99
 
 
-@pytest.mark.samples()
+@pytest.mark.enable_socket()
 def test_onlyoffice_form_images_extraction():
     reader = PdfReader(BytesIO(get_data_from_url(name="iss2613-onlyoffice-form.pdf")))
 

From a23b27290f9031a0fcb69ba21e22585346442a6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?= <perso@renaudna.fr>
Date: Fri, 17 May 2024 22:35:08 +0200
Subject: [PATCH 11/15] Fix test for form

---
 tests/test_images.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_images.py b/tests/test_images.py
index 7890cfa42..723a81a24 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -250,10 +250,10 @@ def test_onlyoffice_form_images_extraction():
 
     assert (
         str(reader.pages[0].images)
-        == "[Image_0=/Pattern/P1/X1, Image_1=/Pattern/P2/X1]"
+        == "[Image_0=/Annots/Image2_af_image/Img, Image_1=/Annots/Image3_af_image/Img]"
     )
 
-    assert str(reader.pages[1].images) == "[Image_0=/Pattern/P1/X1]"
+    assert str(reader.pages[1].images) == "[Image_0=/Annots/Image4_af_image/Img]"
 
     url = "https://github.com/py-pdf/pypdf/assets/67143274/cc28b39b-2e96-4bd3-b33c-c545c5cec2d9"
     name = "iss2613-P1_X1.jpg"

From d524672c2bfeac0056c8156fabe6aafdf9fee596 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?= <perso@renaudna.fr>
Date: Fri, 17 May 2024 22:49:14 +0200
Subject: [PATCH 12/15] Add Annot parsing for onlyoffice forms Fix typo

---
 pypdf/_page.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 58 insertions(+), 2 deletions(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index 7a92041ec..a3146bc4a 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -458,9 +458,38 @@ def _get_ids_image(
             ancest = []
         lst: List[Union[str, List[str]]] = []
 
+        if PG.ANNOTS in obj:
+            for annot in cast(DictionaryObject, obj[PG.ANNOTS]):
+                if (
+                    "/AP" in cast(DictionaryObject, annot.keys())
+                    and "/N" in cast(DictionaryObject, annot["/AP"].keys())
+                    and PG.RESOURCES in annot["/AP"]["/N"].get_object()
+                    and RES.XOBJECT
+                    in cast(DictionaryObject, annot["/AP"]["/N"][PG.RESOURCES])
+                    and "/FRM"
+                    in cast(
+                        DictionaryObject, annot["/AP"]["/N"][PG.RESOURCES][RES.XOBJECT]
+                    )
+                ):
+                    frame = annot["/AP"]["/N"][PG.RESOURCES][RES.XOBJECT]["/FRM"]
+
+                    if PG.RESOURCES in frame.get_object() and RES.XOBJECT in cast(
+                        DictionaryObject, frame[PG.RESOURCES]
+                    ):
+                        x_object = frame[PG.RESOURCES][RES.XOBJECT]
+                        for o in x_object:
+                            if x_object[o][IA.SUBTYPE] == "/Image":
+                                if not isinstance(x_object[o], StreamObject):
+                                    continue
+                                lst.append(
+                                    f"{PG.ANNOTS}/{annot['/T']}{o}"
+                                    if len(ancest) == 0
+                                    else ancest + [o]
+                                )
+
         if PG.RESOURCES in obj:
             if RES.PATTERN in cast(DictionaryObject, obj[PG.RESOURCES]):
-                for patternName, pattern in cast(
+                for pattern_name, pattern in cast(
                     DictionaryObject,
                     cast(DictionaryObject, obj[PG.RESOURCES])[RES.PATTERN],
                 ).items():
@@ -473,7 +502,7 @@ def _get_ids_image(
                                 continue
                             if x_object[o][IA.SUBTYPE] == "/Image":
                                 lst.append(
-                                    f"{RES.PATTERN}{patternName}{o}"
+                                    f"{RES.PATTERN}{pattern_name}{o}"
                                     if len(ancest) == 0
                                     else ancest + [o]
                                 )
@@ -522,6 +551,30 @@ def _get_image(
                         cast(DictionaryObject, patterns[pattern_name])[PG.RESOURCES],
                     )[RES.XOBJECT],
                 )
+            elif isinstance(id, str) and id.find(PG.ANNOTS) == 0:
+                annot_name = id[len(RES.PATTERN) : id.find("/", len(RES.PATTERN) + 1)]
+                annots = cast(DictionaryObject, obj[PG.ANNOTS])
+
+                for temp_annot in annots:
+                    if temp_annot["/T"] == annot_name:
+                        annot = temp_annot
+                        break
+
+                frame_xobjs = cast(
+                    DictionaryObject,
+                    cast(
+                        DictionaryObject,
+                        cast(DictionaryObject, annot["/AP"]["/N"])[PG.RESOURCES],
+                    )[RES.XOBJECT],
+                )
+
+                xobjs = cast(
+                    DictionaryObject,
+                    cast(
+                        DictionaryObject,
+                        cast(DictionaryObject, frame_xobjs["/FRM"])[PG.RESOURCES],
+                    )[RES.XOBJECT],
+                )
             else:
                 xobjs = cast(
                     DictionaryObject,
@@ -541,6 +594,9 @@ def _get_image(
             if id.find("/Pattern") == 0:
                 image_identifier = id[id.rfind("/") :]
                 image_name = pattern_name[1:] + "_" + image_identifier[1:]
+            elif id.find("/Annot") == 0:
+                image_identifier = id[id.rfind("/") :]
+                image_name = annot_name + "_" + image_identifier[1:]
             else:
                 image_identifier = str(id)
                 image_name = id[1:]

From 24218c81bd46cca1dc058b2abd2a4b0dddab8b01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?= <perso@renaudna.fr>
Date: Fri, 17 May 2024 23:29:29 +0200
Subject: [PATCH 13/15] Squashed commit of the following:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit d0493ae20bfb243e6fddc85d03d97611e040c38e
Author: Nathanaël Renaud <perso@renaudna.fr>
Date:   Fri May 17 23:25:13 2024 +0200

    Modified _get_ids_image and _get_image so they work with onlyoffice images

commit 53a3781dd60a5b3f30af73fb422c195a34401225
Author: Nathanaël Renaud <perso@renaudna.fr>
Date:   Fri May 17 23:22:27 2024 +0200

    Added tests units about images extractions from PDF generated using onlyoffice
---
 sample-files | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sample-files b/sample-files
index 8c405ece5..91311ce97 160000
--- a/sample-files
+++ b/sample-files
@@ -1 +1 @@
-Subproject commit 8c405ece5eff12396a34a1fae3276132002e1753
+Subproject commit 91311ce97033ea9669dba2a7b6c591c05ed74c76

From e5585d74e3177a362813ad6ed37f8710ce730882 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?= <perso@renaudna.fr>
Date: Sat, 3 Aug 2024 15:43:17 +0200
Subject: [PATCH 14/15] Try fix issue with codecov and PERF401 ruff warning

---
 pypdf/_page.py | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index a3146bc4a..515d615bc 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -478,13 +478,18 @@ def _get_ids_image(
                     ):
                         x_object = frame[PG.RESOURCES][RES.XOBJECT]
                         for o in x_object:
-                            if x_object[o][IA.SUBTYPE] == "/Image":
-                                if not isinstance(x_object[o], StreamObject):
-                                    continue
-                                lst.append(
-                                    f"{PG.ANNOTS}/{annot['/T']}{o}"
-                                    if len(ancest) == 0
-                                    else ancest + [o]
+                            if (
+                                isinstance(x_object[o], StreamObject)
+                                and x_object[o][IA.SUBTYPE] == "/Image"
+                            ):
+                                lst.extend(
+                                    [
+                                        (
+                                            f"{PG.ANNOTS}/{annot['/T']}{o}"
+                                            if len(ancest) == 0
+                                            else ancest + [o]
+                                        )
+                                    ]
                                 )
 
         if PG.RESOURCES in obj:
@@ -498,13 +503,18 @@ def _get_ids_image(
                     ):
                         x_object = pattern[PG.RESOURCES][RES.XOBJECT].get_object()
                         for o in x_object:
-                            if not isinstance(x_object[o], StreamObject):
-                                continue
-                            if x_object[o][IA.SUBTYPE] == "/Image":
-                                lst.append(
-                                    f"{RES.PATTERN}{pattern_name}{o}"
-                                    if len(ancest) == 0
-                                    else ancest + [o]
+                            if (
+                                isinstance(x_object[o], StreamObject)
+                                and x_object[o][IA.SUBTYPE] == "/Image"
+                            ):
+                                lst.extend(
+                                    [
+                                        (
+                                            f"{RES.PATTERN}{pattern_name}{o}"
+                                            if len(ancest) == 0
+                                            else ancest + [o]
+                                        )
+                                    ]
                                 )
 
             if RES.XOBJECT in cast(DictionaryObject, obj[PG.RESOURCES]):

From cc1600d06f1970f157eb223eab6adebaacd44fb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nathana=C3=ABl=20Renaud?= <perso@renaudna.fr>
Date: Sat, 3 Aug 2024 16:19:19 +0200
Subject: [PATCH 15/15] Ruff not happy about whitespaces

---
 pypdf/_page.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/_page.py b/pypdf/_page.py
index 7f590f252..fe91acb82 100644
--- a/pypdf/_page.py
+++ b/pypdf/_page.py
@@ -515,7 +515,7 @@ def _get_ids_image(
                         lst.extend(
                             self._get_ids_image(x_object[o], ancest + [o], call_stack)
                         )
-        
+
         assert self.inline_images is not None
         lst.extend(list(self.inline_images.keys()))
         return lst