From 90e9d8741259993174a620a3c641a4ee3bfedaff Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 21 May 2025 14:54:16 +0100 Subject: [PATCH 1/4] MAINT: Increase readability of _merge_page Mainly moving declarations nearer their use. --- pypdf/_page.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index 45690a592..eb8f32477 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1122,9 +1122,6 @@ def _merge_page( over: bool = True, expand: bool = False, ) -> None: - # First we work on merging the resource dictionaries. This allows us - # to find out what symbols in the content streams we might need to - # rename. try: assert isinstance(self.indirect_reference, IndirectObject) if hasattr( @@ -1136,8 +1133,9 @@ def _merge_page( except (AssertionError, AttributeError): pass - new_resources = DictionaryObject() - rename = {} + # First we work on merging the resource dictionaries. This allows us + # to find out what symbols in the content streams we might need to + # rename. try: original_resources = cast(DictionaryObject, self[PG.RESOURCES].get_object()) except KeyError: @@ -1146,14 +1144,8 @@ def _merge_page( page2resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object()) except KeyError: page2resources = DictionaryObject() - new_annots = ArrayObject() - - for page in (self, page2): - if PG.ANNOTS in page: - annots = page[PG.ANNOTS] - if isinstance(annots, ArrayObject): - new_annots.extend(annots) + rename = {} for res in ( RES.EXT_G_STATE, RES.FONT, @@ -1170,7 +1162,8 @@ def _merge_page( new_resources[NameObject(res)] = new rename.update(newrename) - # Combine /ProcSet sets, making sure there's a consistent order + # Combine /ProcSet sets, making sure there is a consistent order + new_resources = DictionaryObject() new_resources[NameObject(RES.PROC_SET)] = ArrayObject( sorted( set( @@ -1222,8 +1215,15 @@ def _merge_page( if expand: self._expand_mediabox(page2, ctm) - self.replace_contents(ContentStream(new_content_array, self.pdf)) self[NameObject(PG.RESOURCES)] = new_resources + self.replace_contents(ContentStream(new_content_array, self.pdf)) + + new_annots = ArrayObject() + for page in (self, page2): + if PG.ANNOTS in page: + annots = page[PG.ANNOTS] + if isinstance(annots, ArrayObject): + new_annots.extend(annots) self[NameObject(PG.ANNOTS)] = new_annots def _merge_page_writer( @@ -1240,7 +1240,6 @@ def _merge_page_writer( assert isinstance(self.indirect_reference, IndirectObject) pdf = self.indirect_reference.pdf - rename = {} if PG.RESOURCES not in self: self[NameObject(PG.RESOURCES)] = DictionaryObject() original_resources = cast(DictionaryObject, self[PG.RESOURCES].get_object()) @@ -1249,6 +1248,7 @@ def _merge_page_writer( else: page2resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object()) + rename = {} for res in ( RES.EXT_G_STATE, RES.FONT, @@ -1265,7 +1265,7 @@ def _merge_page_writer( original_resources, page2resources, res, False ) rename.update(newrename) - # Combine /ProcSet sets. + # Combine /ProcSet sets if RES.PROC_SET in page2resources: if RES.PROC_SET not in original_resources: original_resources[NameObject(RES.PROC_SET)] = ArrayObject() From ad6e02ad8107e5a08a6125884666581636d9835a Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 21 May 2025 14:59:05 +0100 Subject: [PATCH 2/4] MAINT: Increase readability of _merge_page Mainly moving declarations nearer their use. --- pypdf/_page.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index eb8f32477..5fc64a79e 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1145,6 +1145,7 @@ def _merge_page( except KeyError: page2resources = DictionaryObject() + new_resources = DictionaryObject() rename = {} for res in ( RES.EXT_G_STATE, @@ -1163,7 +1164,6 @@ def _merge_page( rename.update(newrename) # Combine /ProcSet sets, making sure there is a consistent order - new_resources = DictionaryObject() new_resources[NameObject(RES.PROC_SET)] = ArrayObject( sorted( set( From 7387f0bd6bcd71ce6902668b1cfb4919f992da3e Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Wed, 21 May 2025 20:59:45 +0100 Subject: [PATCH 3/4] MAINT: Increase readability of _merge_page --- pypdf/_page.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index 5fc64a79e..c84798d86 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1141,9 +1141,9 @@ def _merge_page( except KeyError: original_resources = DictionaryObject() try: - page2resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object()) + page2_resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object()) except KeyError: - page2resources = DictionaryObject() + page2_resources = DictionaryObject() new_resources = DictionaryObject() rename = {} @@ -1156,12 +1156,12 @@ def _merge_page( RES.SHADING, RES.PROPERTIES, ): - new, newrename = self._merge_resources( - original_resources, page2resources, res + new, new_rename = self._merge_resources( + original_resources, page2_resources, res ) if new: new_resources[NameObject(res)] = new - rename.update(newrename) + rename.update(new_rename) # Combine /ProcSet sets, making sure there is a consistent order new_resources[NameObject(RES.PROC_SET)] = ArrayObject( @@ -1169,7 +1169,7 @@ def _merge_page( set( original_resources.get(RES.PROC_SET, ArrayObject()).get_object() ).union( - set(page2resources.get(RES.PROC_SET, ArrayObject()).get_object()) + set(page2_resources.get(RES.PROC_SET, ArrayObject()).get_object()) ) ) ) @@ -1180,10 +1180,10 @@ def _merge_page( original_content.isolate_graphics_state() new_content_array.append(original_content) - page2content = page2.get_contents() - if page2content is not None: + page2_content = page2.get_contents() + if page2_content is not None: rect = getattr(page2, MERGE_CROP_BOX) - page2content.operations.insert( + page2_content.operations.insert( 0, ( map( @@ -1198,18 +1198,18 @@ def _merge_page( b"re", ), ) - page2content.operations.insert(1, ([], b"W")) - page2content.operations.insert(2, ([], b"n")) + page2_content.operations.insert(1, ([], b"W")) + page2_content.operations.insert(2, ([], b"n")) if page2transformation is not None: - page2content = page2transformation(page2content) - page2content = PageObject._content_stream_rename( - page2content, rename, self.pdf + page2_content = page2transformation(page2_content) + page2_content = PageObject._content_stream_rename( + page2_content, rename, self.pdf ) - page2content.isolate_graphics_state() + page2_content.isolate_graphics_state() if over: - new_content_array.append(page2content) + new_content_array.append(page2_content) else: - new_content_array.insert(0, page2content) + new_content_array.insert(0, page2_content) # if expanding the page to fit a new page, calculate the new media box size if expand: From 17afa32571b7a50db188854843db8532efb9288b Mon Sep 17 00:00:00 2001 From: j-t-1 <120829237+j-t-1@users.noreply.github.com> Date: Thu, 22 May 2025 08:52:42 +0100 Subject: [PATCH 4/4] MAINT: Increase readability of _merge_page --- pypdf/_page.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index c84798d86..2d0a1dd6a 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1149,11 +1149,11 @@ def _merge_page( rename = {} for res in ( RES.EXT_G_STATE, - RES.FONT, - RES.XOBJECT, RES.COLOR_SPACE, RES.PATTERN, RES.SHADING, + RES.XOBJECT, + RES.FONT, RES.PROPERTIES, ): new, new_rename = self._merge_resources( @@ -1251,11 +1251,11 @@ def _merge_page_writer( rename = {} for res in ( RES.EXT_G_STATE, - RES.FONT, - RES.XOBJECT, RES.COLOR_SPACE, RES.PATTERN, RES.SHADING, + RES.XOBJECT, + RES.FONT, RES.PROPERTIES, ): if res in page2resources: