Skip to content

Commit abd8342

Browse files
authored
BUG: Prevent updating page contents after merging page (stamping/watermarking) (#1952)
ENH: Add the`over` parameter to `merge_page` closes #1951 closes #1953
1 parent 654be9d commit abd8342

File tree

4 files changed

+60
-54
lines changed

4 files changed

+60
-54
lines changed

docs/user/add-watermark.md

Lines changed: 23 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -4,80 +4,54 @@ Adding stamps or watermarks are two common ways to manipulate PDF files.
44
A stamp is adding something on top of the document, a watermark is in the
55
background of the document.
66

7-
## Stamp (Overlay)
7+
## Stamp (Overlay) / Watermark(Undelay)
88

9-
Using the ``Transformation()`` class, one can translate, rotate, scale, etc. the stamp before merging it to the content page.
9+
The process of stamping and watermarking is the same, you just need to set `over` parameter to `True` for stamping and `False` for watermarking.
1010

11+
You can use `merge_page()` if you don't need to transform the stamp:
1112
```python
12-
from pathlib import Path
13-
from typing import Union, Literal, List
14-
1513
from pypdf import PdfWriter, PdfReader
1614

15+
stamp = PdfReader("bg.pdf").pages[0]
16+
writer = PdfWriter(clone_from="source.pdf")
17+
for page in writer.pages:
18+
page.merge_page(stamp, over=False) # here set to False for watermarking
1719

18-
def stamp(
19-
content_pdf: Path,
20-
stamp_pdf: Path,
21-
pdf_result: Path,
22-
page_indices: Union[Literal["ALL"], List[int]] = "ALL",
23-
):
24-
stamp_page = PdfReader(stamp_pdf).pages[0]
25-
26-
writer = PdfWriter()
27-
28-
reader = PdfReader(content_pdf)
29-
if page_indices == "ALL":
30-
page_indices = list(range(0, len(reader.pages)))
31-
for index in page_indices:
32-
content_page = reader.pages[index]
33-
content_page.merge_transformed_page(
34-
stamp_page,
35-
Transformation(),
36-
)
37-
writer.add_page(content_page)
38-
39-
with open(pdf_result, "wb") as fp:
40-
writer.write(fp)
20+
writer.write("out.pdf")
4121
```
4222

43-
![stamp.png](stamp.png)
44-
45-
## Watermark (Underlay)
46-
47-
To merge the watermark *under* the content, use the argument ``over=False`` of the method ``merge_transformed_page()``.
48-
49-
Once again, watermark size and position (and more) can be customized using the ``Transformation()`` class.
23+
Else use `merge_transformed_page()` with Transformation() if you need to translate, rotate, scale, etc. the stamp before merging it to the content page.
5024

5125
```python
5226
from pathlib import Path
5327
from typing import Union, Literal, List
5428

55-
from pypdf import PdfWriter, PdfReader, Transformation
29+
from pypdf import PdfWriter, PdfReader
5630

5731

58-
def watermark(
32+
def stamp(
5933
content_pdf: Path,
6034
stamp_pdf: Path,
6135
pdf_result: Path,
6236
page_indices: Union[Literal["ALL"], List[int]] = "ALL",
6337
):
64-
reader = PdfReader(content_pdf)
65-
if page_indices == "ALL":
66-
page_indices = range(len(reader.pages))
38+
stamp_page = PdfReader(stamp_pdf).pages[0]
6739

6840
writer = PdfWriter()
69-
watermark_page = PdfReader(stamp_pdf).pages[0]
70-
for index in page_indices:
71-
content_page = reader.pages[index]
41+
# page_indices can be a List(array) of page, tuples are for range definition
42+
writer.append(content, pages=None if page_indices == "ALL" else page_indices)
43+
44+
for content_page in writer.pages:
7245
content_page.merge_transformed_page(
73-
watermark_page,
74-
Transformation(),
75-
over=False,
46+
stamp_page,
47+
Transformation().scale(0.5),
7648
)
77-
writer.add_page(content_page)
7849

79-
with open(pdf_result, "wb") as fp:
80-
writer.write(fp)
50+
writer.write(pdf_result)
8151
```
8252

53+
example of stamp:
54+
![stamp.png](stamp.png)
55+
56+
example of watermark:
8357
![watermark.png](watermark.png)

pypdf/_page.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -938,6 +938,11 @@ def replace_contents(
938938
self._objects[o.indirect_reference.idnum - 1] = NullObject() # type: ignore
939939
except AttributeError:
940940
pass
941+
942+
if isinstance(content, ArrayObject):
943+
for i in range(len(content)):
944+
content[i] = self.indirect_reference.pdf._add_object(content[i])
945+
941946
if content is None:
942947
if PG.CONTENTS not in self:
943948
return
@@ -972,7 +977,9 @@ def replace_contents(
972977
# this will be fixed with the _add_object
973978
self[NameObject(PG.CONTENTS)] = content
974979

975-
def merge_page(self, page2: "PageObject", expand: bool = False) -> None:
980+
def merge_page(
981+
self, page2: "PageObject", expand: bool = False, over: bool = True
982+
) -> None:
976983
"""
977984
Merge the content streams of two pages into one.
978985
@@ -985,10 +992,11 @@ def merge_page(self, page2: "PageObject", expand: bool = False) -> None:
985992
Args:
986993
page2: The page to be merged into this one. Should be
987994
an instance of :class:`PageObject<PageObject>`.
995+
over: set the page2 content over page1 if True(default) else under
988996
expand: If true, the current page dimensions will be
989997
expanded to accommodate the dimensions of the page to be merged.
990998
"""
991-
self._merge_page(page2, expand=expand)
999+
self._merge_page(page2, over=over, expand=expand)
9921000

9931001
def mergePage(self, page2: "PageObject") -> None: # deprecated
9941002
"""

tests/test_page.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,12 +1237,15 @@ def create_stamp_pdf() -> BytesIO:
12371237
writer.append(SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf", [1])
12381238
nb1 = len(writer._objects)
12391239

1240+
# 1 page only is modified
12401241
for page in writer.pages:
12411242
page.merge_page(template_page)
1242-
assert len(writer._objects) == nb1 + 1 # font is added that's all
1243+
# font is added; +1 streamobjects + 1 ArrayObject
1244+
assert len(writer._objects) == nb1 + 1 + 2
12431245
for page in writer.pages:
12441246
page.compress_content_streams()
1245-
assert len(writer._objects) == nb1 + 1
1247+
# objects are recycled
1248+
assert len(writer._objects) == nb1 + 1 + 2
12461249

12471250
contents = writer.pages[0]["/Contents"]
12481251
writer.pages[0].replace_contents(None)

tests/test_writer.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,9 @@ def test_add_named_destination_sort_order(pdf_file_path):
624624
root = writer.get_named_dest_root()
625625

626626
assert len(root) == 4
627-
assert root[0] == "a", '"a" was not inserted before "b" in the named destination root'
627+
assert (
628+
root[0] == "a"
629+
), '"a" was not inserted before "b" in the named destination root'
628630
assert root[2] == "b"
629631

630632
# write "output" to pypdf-output.pdf
@@ -1478,3 +1480,22 @@ def test_empty_objects_before_cloning():
14781480
{x: 1 for x, y in reader.xref_objStm.values()}
14791481
) # to remove object streams
14801482
assert len(writer._objects) == nb_obj_reader
1483+
1484+
1485+
@pytest.mark.enable_socket()
1486+
def test_watermark():
1487+
url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
1488+
name = "bgwatermark.pdf"
1489+
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
1490+
url = "https://github.com/py-pdf/pypdf/files/11985888/source.pdf"
1491+
name = "srcwatermark.pdf"
1492+
writer = PdfWriter(clone_from=BytesIO(get_pdf_from_url(url, name=name)))
1493+
for p in writer.pages:
1494+
p.merge_page(reader.pages[0], over=False)
1495+
1496+
assert isinstance(p["/Contents"], ArrayObject)
1497+
assert isinstance(p["/Contents"][0], IndirectObject)
1498+
1499+
b = BytesIO()
1500+
writer.write(b)
1501+
assert len(b.getvalue()) < 2.1 * 1024 * 1024

0 commit comments

Comments
 (0)