From e64fd47a65f693b3fbf8a6effd2393bed09d039c Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Tue, 12 Sep 2023 23:10:23 +0200
Subject: [PATCH 01/13] BUG : Accepts Kids in EmbeddedFiles

closes #2087
closes #2090
---
 pypdf/_protocols.py               |    3 +
 pypdf/_reader.py                  |  141 +-
 pypdf/_writer.py                  |    2 +
 pypdf/generic/__init__.py         |    4 +
 pypdf/generic/_base.py            |   11 +
 pypdf/generic/_data_structures.py |  220 ++
 tests/test_writer.py              | 3617 +++++++++++++++--------------
 7 files changed, 2124 insertions(+), 1874 deletions(-)

diff --git a/pypdf/_protocols.py b/pypdf/_protocols.py
index c6f2bbebd..69d21ba8a 100644
--- a/pypdf/_protocols.py
+++ b/pypdf/_protocols.py
@@ -76,6 +76,9 @@ def write(self, stream: Union[Path, StrByteType]) -> Tuple[bool, IO]:
     def _add_object(self, obj: Any) -> Any:
         ...
 
+    def _replace_object(self, indirect_reference: Any, obj: Any) -> Any:
+        ...
+
     @property
     def pages(self) -> List[Any]:
         ...
diff --git a/pypdf/_reader.py b/pypdf/_reader.py
index 75738fd17..01529ec47 100644
--- a/pypdf/_reader.py
+++ b/pypdf/_reader.py
@@ -39,7 +39,6 @@
     Callable,
     Dict,
     Iterable,
-    Iterator,
     List,
     Mapping,
     Optional,
@@ -98,6 +97,7 @@
     FloatObject,
     IndirectObject,
     NameObject,
+    NameTree,
     NullObject,
     NumberObject,
     PdfObject,
@@ -2206,14 +2206,56 @@ def rename_form_topname(self, name: str) -> Optional[DictionaryObject]:
         interim[NameObject("/T")] = TextStringObject(name)
         return interim
 
+    def _get_embedded_files_root(self) -> Optional[NameTree]:
+        """
+        Returns the EmbeddedFiles root as a NameTree Object
+        if the root does not exists, return None
+        """
+        catalog = cast(DictionaryObject, self.trailer["/Root"])
+        if "/Names" not in catalog:
+            return None
+        ef = cast(DictionaryObject, catalog["/Names"]).get("/EmbeddedFiles", None)
+        if ef is None:
+            return None
+        efo = ef.get_object()
+        # not for reader
+        """
+            if not isinstance(efo,NameTree):
+            if isinstance(ef,IndirectObject):
+                ef.replace_object(efo)
+            else:
+                cast(DictionaryObject,catalog["/Names"])[
+                    NameObject("/EmbeddedFiles")] = NameTree(efo)
+        """
+        return NameTree(efo)
+
+    @property
+    def detailed_embedded_files(self) -> Optional[Mapping[str, PdfObject]]:
+        ef = self._get_embedded_files_root()
+        if ef:
+            return ef.list_items()
+        else:
+            return None
+
+    @property
+    def embedded_files(self) -> Optional[Mapping[str, List[bytes]]]:
+        ef = self._get_embedded_files_root()
+        if ef:
+            return {k: v["/EF"]["/F"].get_data() for k, v in ef.list_items().items()}  # type: ignore
+        else:
+            return None
+
     @property
     def attachments(self) -> Mapping[str, List[bytes]]:
-        return LazyDict(
-            {
-                name: (self._get_attachment_list, name)
-                for name in self._list_attachments()
-            }
-        )
+        ef = self._get_embedded_files_root()
+        if ef:
+            d = {}
+            for k, v in ef.list_items().items():
+                if isinstance(v, list):
+                    d[k] = [e["/EF"]["/F"].get_data() for e in v]
+            return d
+        else:
+            return {}
 
     def _list_attachments(self) -> List[str]:
         """
@@ -2222,20 +2264,20 @@ def _list_attachments(self) -> List[str]:
         Returns:
             list of filenames
         """
-        catalog = cast(DictionaryObject, self.trailer["/Root"])
-        # From the catalog get the embedded file names
-        try:
-            filenames = cast(
-                ArrayObject,
-                cast(
-                    DictionaryObject,
-                    cast(DictionaryObject, catalog["/Names"])["/EmbeddedFiles"],
-                )["/Names"],
-            )
-        except KeyError:
-            return []
-        attachments_names = [f for f in filenames if isinstance(f, str)]
-        return attachments_names
+        ef = self._get_embedded_files_root()
+        if ef:
+            lst = ef.list_keys()
+        else:
+            lst = []
+        """
+        for ip, p in enumerate(self.pages):
+            for a in [_a.get_object()
+                      for _a in p.get("/Annots",[])]:
+                if _a.get_object().get("/Subtype","") != "/FileAttachements":
+                    continue
+                lst.append(f"$page_{ip}.{get_name_from_file_specification(_a)}")
+        """
+        return lst
 
     def _get_attachment_list(self, name: str) -> List[bytes]:
         out = self._get_attachments(name)[name]
@@ -2260,53 +2302,18 @@ def _get_attachments(
             dictionary of filename -> Union[bytestring or List[ByteString]]
             if the filename exists multiple times a List of the different version will be provided
         """
-        catalog = cast(DictionaryObject, self.trailer["/Root"])
-        # From the catalog get the embedded file names
-        try:
-            filenames = cast(
-                ArrayObject,
-                cast(
-                    DictionaryObject,
-                    cast(DictionaryObject, catalog["/Names"])["/EmbeddedFiles"],
-                )["/Names"],
-            )
-        except KeyError:
+        ef = self._get_embedded_files_root()
+        if ef is None:
             return {}
-        attachments: Dict[str, Union[bytes, List[bytes]]] = {}
-        # Loop through attachments
-        for i in range(len(filenames)):
-            f = filenames[i]
-            if isinstance(f, str):
-                if filename is not None and f != filename:
-                    continue
-                name = f
-                f_dict = filenames[i + 1].get_object()
-                f_data = f_dict["/EF"]["/F"].get_data()
-                if name in attachments:
-                    if not isinstance(attachments[name], list):
-                        attachments[name] = [attachments[name]]  # type:ignore
-                    attachments[name].append(f_data)  # type:ignore
-                else:
-                    attachments[name] = f_data
-        return attachments
-
-
-class LazyDict(Mapping):
-    def __init__(self, *args: Any, **kw: Any) -> None:
-        self._raw_dict = dict(*args, **kw)
-
-    def __getitem__(self, key: str) -> Any:
-        func, arg = self._raw_dict.__getitem__(key)
-        return func(arg)
-
-    def __iter__(self) -> Iterator[Any]:
-        return iter(self._raw_dict)
-
-    def __len__(self) -> int:
-        return len(self._raw_dict)
-
-    def __str__(self) -> str:
-        return f"LazyDict(keys={list(self.keys())})"
+        if filename is None:
+            return {k: v if len(v) > 1 else v[0] for k, v in self.attachments.items()}
+        else:
+            lst = ef.list_get(filename)
+            return {
+                filename: [x["/EF"]["/F"].get_data() for x in lst]  # type: ignore
+                if isinstance(lst, list)
+                else lst["/EF"]["/F"].get_data()  # type: ignore
+            }
 
 
 class PdfFileReader(PdfReader):  # deprecated
diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index 25a6444d3..1d70bba50 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -297,10 +297,12 @@ def _replace_object(
         if isinstance(indirect_reference, IndirectObject):
             assert indirect_reference.pdf == self
             indirect_reference = indirect_reference.idnum
+        gen = self._objects[indirect_reference - 1].indirect_reference.generation  # type: ignore
         self._objects[indirect_reference - 1] = obj
         return self._objects[indirect_reference - 1]
         if indirect_reference.pdf != self:
             raise ValueError("pdf must be self")
+        obj.indirect_reference = IndirectObject(indirect_reference, gen, self)
         return self._objects[indirect_reference.idnum - 1]  # type: ignore
 
     def _add_page(
diff --git a/pypdf/generic/__init__.py b/pypdf/generic/__init__.py
index 778a9339e..bed5eb601 100644
--- a/pypdf/generic/__init__.py
+++ b/pypdf/generic/__init__.py
@@ -53,8 +53,10 @@
     DictionaryObject,
     EncodedStreamObject,
     Field,
+    NameTree,
     StreamObject,
     TreeObject,
+    get_name_from_file_specification,
     read_object,
 )
 from ._fit import Fit
@@ -444,6 +446,8 @@ def link(
     "RectangleObject",
     "Field",
     "Destination",
+    "NameTree",
+    "get_name_from_file_specification",
     "ViewerPreferences",
     # --- More specific stuff
     # Outline
diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py
index 6c3e41647..bd1d15f05 100644
--- a/pypdf/generic/_base.py
+++ b/pypdf/generic/_base.py
@@ -314,6 +314,17 @@ def get_object(self) -> Optional["PdfObject"]:
             return None
         return obj.get_object()
 
+    def replace_object(self, obj: "PdfObject") -> None:
+        """
+        Replace the pointed object with obj
+        Only applies to IndirectObjects within a PdfWriter
+        """
+        pdf = self.pdf
+        if not hasattr(pdf, "_replace_object"):
+            raise TypeError("Trying to replace Object in a non PdfWriter")
+        pdf._replace_object(self.idnum, obj)
+        obj.indirect_reference = self
+
     def __repr__(self) -> str:
         return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})"
 
diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 9ad98c240..59e28250a 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1442,6 +1442,226 @@ def additionalActions(self) -> Optional[DictionaryObject]:  # deprecated
         return self.additional_actions
 
 
+class NameTree(DictionaryObject):
+    """
+    Name Tree Structure
+    Allow to list, get and set objects In a Name Tree
+    """
+
+    def __init__(self, obj: Optional[PdfObject] = None) -> None:
+        if not isinstance(obj, DictionaryObject) or all(
+            x not in obj for x in ("/Names", "/Kids")
+        ):
+            raise ValueError("source object is not a valid source object")
+        DictionaryObject.__init__(self)
+        obj = cast(DictionaryObject, obj)
+        if obj is not None:
+            self.update(obj)
+        else:  # building a new Name Tree
+            self[NameObject("/Names")] = ArrayObject()
+        if hasattr(obj, "indirect_reference"):
+            self.indirect_reference = obj.indirect_reference
+
+    def list_keys(self) -> List[str]:
+        """
+        Provides the list of keys of the items in the Name Tree
+
+        Returns:
+            List of str keys
+        """
+
+        def _list(o: Optional[PdfObject]) -> List[str]:
+            if o is None:
+                return []
+            o = cast(DictionaryObject, o)
+            _l = o.get("/Names", None)
+            a = o.get("/Kids", None)
+            _l = _l.get_object() if _l else []
+            a = a.get_object() if a else []
+            ll = [v for v in _l if isinstance(v, str)]  # and v not in ll:
+            for x in a:
+                ll.extend(_list(x.get_object()))
+                # for v in _list(x.get_object()):
+                # if v not in ll:
+                #    ll.append(v)
+            return ll
+
+        _l = _list(self)
+        _l.sort()
+        return _l
+
+    def list_items(self) -> dict[str, PdfObject]:
+        """
+        Provides the Name Tree Entries as a dictionary
+
+        Returns:
+            dictionary of objects
+        """
+
+        def _list(
+            o: Optional[PdfObject], lout: List[Tuple[str, PdfObject]]
+        ) -> List[Tuple[str, PdfObject]]:
+            def _append_with_dup(
+                ll: List[Tuple[str, Any]], _l: List[Tuple[str, Any]]
+            ) -> None:
+                for k, v in _l:
+                    try:
+                        i = tuple(x[0] for x in ll).index(k)
+                        ll[i][1].append(v)
+                    except ValueError:
+                        ll.append((k, [v]))
+
+            if o is None:
+                return lout
+            o = cast(DictionaryObject, o)
+            _l = o.get("/Names", None)
+            a = o.get("/Kids", None)
+            _l = _l.get_object() if _l else []
+            a = a.get_object() if a else []
+            _l = [
+                (v, None if isinstance(_l[i + 1], str) else _l[i + 1])
+                for i, v in enumerate(_l)
+                if isinstance(v, str)
+            ]
+            # to handle duplicates
+            _append_with_dup(lout, _l)
+            for x in a:
+                # _append_with_dup(lout, _list(x.get_object(),lout))
+                _list(x.get_object(), lout)
+            return lout
+
+        _l: List[Tuple[str, PdfObject]] = []
+        _list(self, _l)
+        return dict(_l)
+
+    def list_get(self, key: str) -> List[PdfObject]:
+        """
+        Get the entry from the Name Tree
+
+        Args:
+            key: searched entry
+
+        Returns:
+            matching PdfObject; None i
+        attributeEntries as a dictionary
+        """
+
+        def _get(key: str, o: Optional[PdfObject]) -> List[PdfObject]:
+            if o is None:
+                return []
+            rst = []
+            o = cast(DictionaryObject, o)
+            _l = o.get("/Names", None)
+            a = o.get("/Kids", None)
+            _l = _l.get_object() if _l else []
+            a = a.get_object() if a else []
+            for i, x in enumerate(_l):
+                if x == key:
+                    rst.append(_l[i + 1])
+            for x in a:
+                rst.extend(_get(key, x))
+            return rst
+
+        return _get(key, self)
+
+    def list_set(
+        self, key: str, data: PdfObject, overwrite: bool = False
+    ) -> Optional[IndirectObject]:
+        """
+        Add the data entry from the Name Tree
+
+        Args:
+            key: entry
+            data: PdfObject (it will be added to the list of objects
+            overwrite: allow to overwrite existing key
+
+        Returns:
+            matching PdfObject; None i
+        attributeEntries as a dictionary
+        """
+        try:
+            if self.indirect_reference is None:
+                raise TypeError
+            writer = self.indirect_reference.pdf
+            if not hasattr(writer, "_add_object"):
+                raise TypeError
+        except (TypeError, AttributeError):
+            raise TypeError("Object does not belong to a PdfWriter")
+
+        def _update_limits(
+            obj: DictionaryObject, lo: Optional[str], hi: Optional[str]
+        ) -> bool:
+            if "/Limits" not in obj:
+                return False
+            a = cast("ArrayObject", obj["/Limits"])
+            if lo is not None and lo < a[0]:
+                a[0] = TextStringObject(lo)
+                return True
+            if hi is not None and hi > a[0]:
+                a[1] = TextStringObject(lo)
+                return True
+            return False
+
+        def _set_in(o: Optional[PdfObject], app: bool = True) -> Optional[PdfObject]:
+            nonlocal overwrite, writer, key, data
+            if o is None:
+                return None
+            o = cast(DictionaryObject, o)
+            if "/Names" in o:
+                _l = cast(ArrayObject, o["/Names"])
+                li = o.get("/Limits", [_l[0], _l[-2]])
+                if key < li[0]:
+                    return None
+                if not app and _l > li[1]:
+                    return None
+                i = 0
+                while i < len(_l):
+                    if _l[i] == key:
+                        if not overwrite:
+                            continue
+                        d = _l[i + 1]
+                        if isinstance(d, IndirectObject):
+                            d.replace_object(data)
+                        else:  # pragma: no cover
+                            # should not occur iaw pdf spec
+                            _l[i + 1] = data
+                        return _l[i + 1]
+                    elif key < _l[i]:
+                        _l.insert(i, key)
+                        _l.insert(i + 1, writer._add_object(data))
+                        _update_limits(o, key, None)
+                        return _l[i + 1]
+                    i += 1
+                if app:
+                    _l.append(key)
+                    _l.append(writer._add_object(data))
+                    _update_limits(o, key, None)
+                    return _l[-1]
+                return None
+            else:  # kids
+                ar = cast(ArrayObject, o["/Kids"])
+                for x in ar:
+                    r = _set_in(x, x == ar[-1])
+                    if r:
+                        _update_limits(o, key, key)
+                        return r
+                return None
+
+        o = _set_in(self, True)
+        return o.indirect_reference if o is not None else None
+
+
+def get_name_from_file_specification(_a: DictionaryObject) -> str:
+    return cast(
+        str,
+        _a.get("/UF")
+        or _a.get("/F")
+        or _a.get("/DOS")
+        or _a.get("/Unix")
+        or _a.get("/Mac"),
+    )
+
+
 class Destination(TreeObject):
     """
     A class representing a destination within a PDF file.
diff --git a/tests/test_writer.py b/tests/test_writer.py
index c9766f979..81ed09440 100644
--- a/tests/test_writer.py
+++ b/tests/test_writer.py
@@ -1,1608 +1,1611 @@
-"""Test the pypdf._writer module."""
-import re
-import shutil
-import subprocess
-from io import BytesIO
-from pathlib import Path
-
-import pytest
-
-from pypdf import (
-    ObjectDeletionFlag,
-    PageObject,
-    PdfMerger,
-    PdfReader,
-    PdfWriter,
-    Transformation,
-)
-from pypdf.errors import DeprecationError, PageSizeNotDefinedError, PyPdfError
-from pypdf.generic import (
-    ArrayObject,
-    ContentStream,
-    DictionaryObject,
-    Fit,
-    IndirectObject,
-    NameObject,
-    NullObject,
-    NumberObject,
-    RectangleObject,
-    StreamObject,
-    TextStringObject,
-)
-
-from . import get_data_from_url, is_sublist
-from .test_images import image_similarity
-
-TESTS_ROOT = Path(__file__).parent.resolve()
-PROJECT_ROOT = TESTS_ROOT.parent
-RESOURCE_ROOT = PROJECT_ROOT / "resources"
-SAMPLE_ROOT = Path(PROJECT_ROOT) / "sample-files"
-GHOSTSCRIPT_BINARY = shutil.which("gs")
-
-
-def test_writer_exception_non_binary(tmp_path, caplog):
-    src = RESOURCE_ROOT / "pdflatex-outline.pdf"
-
-    reader = PdfReader(src)
-    writer = PdfWriter()
-    writer.add_page(reader.pages[0])
-
-    with open(tmp_path / "out.txt", "w") as fp, pytest.raises(TypeError):
-        writer.write_stream(fp)
-    ending = "to write to is not in binary mode. It may not be written to correctly.\n"
-    assert caplog.text.endswith(ending)
-
-
-def test_writer_clone():
-    src = RESOURCE_ROOT / "pdflatex-outline.pdf"
-
-    reader = PdfReader(src)
-    writer = PdfWriter(clone_from=reader)
-    assert len(writer.pages) == 4
-    assert "PageObject" in str(type(writer.pages[0]))
-
-    writer = PdfWriter(clone_from=src)
-    assert len(writer.pages) == 4
-    assert "PageObject" in str(type(writer.pages[0]))
-
-
-def test_writer_clone_bookmarks():
-    # Arrange
-    src = RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf"
-    reader = PdfReader(src)
-    writer = PdfWriter()
-
-    # Act + test cat
-    cat = ""
-
-    def cat1(p) -> None:
-        nonlocal cat
-        cat += p.__repr__()
-
-    writer.clone_document_from_reader(reader, cat1)
-    assert "/Page" in cat
-    assert writer.pages[0].raw_get("/Parent") == writer._pages
-    writer.add_outline_item("Page 1", 0)
-    writer.add_outline_item("Page 2", 1)
-
-    # Assert
-    bytes_stream = BytesIO()
-    writer.write(bytes_stream)
-    bytes_stream.seek(0)
-    reader2 = PdfReader(bytes_stream)
-    assert len(reader2.pages) == len(reader.pages)
-    assert len(reader2.outline) == 2
-
-    # test with append
-    writer = PdfWriter()
-    writer.append(reader)
-    writer.add_outline_item("Page 1", 0)
-    writer.add_outline_item("Page 2", 1)
-
-    # Assert
-    bytes_stream = BytesIO()
-    writer.write(bytes_stream)
-    bytes_stream.seek(0)
-    reader2 = PdfReader(bytes_stream)
-    assert len(reader2.pages) == len(reader.pages)
-    assert len(reader2.outline) == 2
-
-
-def writer_operate(writer: PdfWriter) -> None:
-    """
-    To test the writer that initialized by each of the four usages.
-
-    Args:
-        writer: A PdfWriter object
-    """
-    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
-    pdf_outline_path = RESOURCE_ROOT / "pdflatex-outline.pdf"
-
-    reader = PdfReader(pdf_path)
-    reader_outline = PdfReader(pdf_outline_path)
-
-    page = reader.pages[0]
-    with pytest.raises(PageSizeNotDefinedError) as exc:
-        writer.add_blank_page()
-    assert exc.value.args == ()
-    writer.insert_page(page, 1)
-    writer.insert_page(reader_outline.pages[0], 0)
-    writer.add_outline_item_destination(page)
-    writer.remove_links()
-    writer.add_outline_item_destination(page)
-    oi = writer.add_outline_item(
-        "An outline item", 0, None, (255, 0, 15), True, True, Fit.fit_box_vertically(10)
-    )
-    writer.add_outline_item(
-        "The XYZ fit", 0, oi, (255, 0, 15), True, True, Fit.xyz(left=10, top=20, zoom=3)
-    )
-    writer.add_outline_item(
-        "The XYZ fit no args", 0, oi, (255, 0, 15), True, True, Fit.xyz()
-    )
-    writer.add_outline_item(
-        "The FitH fit", 0, oi, (255, 0, 15), True, True, Fit.fit_horizontally(top=10)
-    )
-    writer.add_outline_item(
-        "The FitV fit", 0, oi, (255, 0, 15), True, True, Fit.fit_vertically(left=10)
-    )
-    writer.add_outline_item(
-        "The FitR fit",
-        0,
-        oi,
-        (255, 0, 15),
-        True,
-        True,
-        Fit.fit_rectangle(left=10, bottom=20, right=30, top=40),
-    )
-    writer.add_outline_item(
-        "The FitB fit", 0, oi, (255, 0, 15), True, True, Fit.fit_box()
-    )
-    writer.add_outline_item(
-        "The FitBH fit",
-        0,
-        oi,
-        (255, 0, 15),
-        True,
-        True,
-        Fit.fit_box_horizontally(top=10),
-    )
-    writer.add_outline_item(
-        "The FitBV fit",
-        0,
-        oi,
-        (255, 0, 15),
-        True,
-        True,
-        Fit.fit_box_vertically(left=10),
-    )
-    writer.add_blank_page()
-    writer.add_uri(2, "https://example.com", RectangleObject([0, 0, 100, 100]))
-    with pytest.warns(
-        DeprecationWarning, match="'pagenum' argument of add_uri is deprecated"
-    ):
-        writer.add_uri(
-            2, "https://example.com", RectangleObject([0, 0, 100, 100]), pagenum=2
-        )
-    with pytest.raises(DeprecationError):
-        writer.add_link(2, 1, RectangleObject([0, 0, 100, 100]))
-    assert writer._get_page_layout() is None
-    writer.page_layout = "broken"
-    assert writer.page_layout == "broken"
-    writer.page_layout = NameObject("/SinglePage")
-    assert writer._get_page_layout() == "/SinglePage"
-    assert writer._get_page_mode() is None
-    writer.set_page_mode("/UseNone")
-    assert writer._get_page_mode() == "/UseNone"
-    writer.set_page_mode(NameObject("/UseOC"))
-    assert writer._get_page_mode() == "/UseOC"
-    writer.insert_blank_page(width=100, height=100)
-    writer.insert_blank_page()  # without parameters
-
-    writer.remove_images()
-
-    writer.add_metadata(reader.metadata)
-    writer.add_metadata({"/Author": "Martin Thoma"})
-    writer.add_metadata({"/MyCustom": 1234})
-
-    writer.add_attachment("foobar.gif", b"foobarcontent")
-
-    # Check that every key in _idnum_hash is correct
-    objects_hash = [o.hash_value() for o in writer._objects]
-    for k, v in writer._idnum_hash.items():
-        assert v.pdf == writer
-        assert k in objects_hash, f"Missing {v}"
-
-
-tmp_path = "dont_commit_writer.pdf"
-
-
-@pytest.mark.parametrize(
-    ("write_data_here", "needs_cleanup"),
-    [
-        ("dont_commit_writer.pdf", True),
-        (Path("dont_commit_writer.pdf"), True),
-        (BytesIO(), False),
-    ],
-)
-def test_writer_operations_by_traditional_usage(write_data_here, needs_cleanup):
-    writer = PdfWriter()
-
-    writer_operate(writer)
-
-    # finally, write "output" to pypdf-output.pdf
-    if needs_cleanup:
-        with open(write_data_here, "wb") as output_stream:
-            writer.write(output_stream)
-    else:
-        output_stream = write_data_here
-        writer.write(output_stream)
-
-    if needs_cleanup:
-        Path(write_data_here).unlink()
-
-
-@pytest.mark.parametrize(
-    ("write_data_here", "needs_cleanup"),
-    [
-        ("dont_commit_writer.pdf", True),
-        (Path("dont_commit_writer.pdf"), True),
-        (BytesIO(), False),
-    ],
-)
-def test_writer_operations_by_semi_traditional_usage(write_data_here, needs_cleanup):
-    with PdfWriter() as writer:
-        writer_operate(writer)
-
-        # finally, write "output" to pypdf-output.pdf
-        if needs_cleanup:
-            with open(write_data_here, "wb") as output_stream:
-                writer.write(output_stream)
-        else:
-            output_stream = write_data_here
-            writer.write(output_stream)
-
-    if needs_cleanup:
-        Path(write_data_here).unlink()
-
-
-@pytest.mark.parametrize(
-    ("write_data_here", "needs_cleanup"),
-    [
-        ("dont_commit_writer.pdf", True),
-        (Path("dont_commit_writer.pdf"), True),
-        (BytesIO(), False),
-    ],
-)
-def test_writer_operations_by_semi_new_traditional_usage(
-    write_data_here, needs_cleanup
-):
-    with PdfWriter() as writer:
-        writer_operate(writer)
-
-        # finally, write "output" to pypdf-output.pdf
-        writer.write(write_data_here)
-
-    if needs_cleanup:
-        Path(write_data_here).unlink()
-
-
-@pytest.mark.parametrize(
-    ("write_data_here", "needs_cleanup"),
-    [
-        ("dont_commit_writer.pdf", True),
-        (Path("dont_commit_writer.pdf"), True),
-        (BytesIO(), False),
-    ],
-)
-def test_writer_operation_by_new_usage(write_data_here, needs_cleanup):
-    # This includes write "output" to pypdf-output.pdf
-    with PdfWriter(write_data_here) as writer:
-        writer_operate(writer)
-
-    if needs_cleanup:
-        Path(write_data_here).unlink()
-
-
-@pytest.mark.parametrize(
-    "input_path",
-    [
-        "side-by-side-subfig.pdf",
-        "reportlab-inline-image.pdf",
-    ],
-)
-def test_remove_images(pdf_file_path, input_path):
-    pdf_path = RESOURCE_ROOT / input_path
-
-    reader = PdfReader(pdf_path)
-    writer = PdfWriter()
-
-    page = reader.pages[0]
-    writer.insert_page(page, 0)
-    writer.remove_images()
-    page_contents_stream = writer.pages[0]["/Contents"]._data
-    assert len(page_contents_stream.strip())
-
-    # finally, write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-    with open(pdf_file_path, "rb") as input_stream:
-        reader = PdfReader(input_stream)
-        if input_path == "side-by-side-subfig.pdf":
-            extracted_text = reader.pages[0].extract_text()
-            assert extracted_text
-            assert "Lorem ipsum dolor sit amet" in extracted_text
-
-
-@pytest.mark.enable_socket()
-def test_remove_images_sub_level():
-    """Cf #2035"""
-    url = "https://github.com/py-pdf/pypdf/files/12394781/2210.03142-1.pdf"
-    name = "iss2103.pdf"
-    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
-    writer.remove_images()
-    assert (
-        len(
-            [
-                o.get_object()
-                for o in writer.pages[0]["/Resources"]["/XObject"]["/Fm1"][
-                    "/Resources"
-                ]["/XObject"]["/Im1"]["/Resources"]["/XObject"].values()
-                if not isinstance(o.get_object(), NullObject)
-            ]
-        )
-        == 0
-    )
-
-
-@pytest.mark.parametrize(
-    "input_path",
-    [
-        "side-by-side-subfig.pdf",
-        "reportlab-inline-image.pdf",
-    ],
-)
-def test_remove_text(input_path, pdf_file_path):
-    pdf_path = RESOURCE_ROOT / input_path
-
-    reader = PdfReader(pdf_path)
-    writer = PdfWriter()
-
-    page = reader.pages[0]
-    writer.insert_page(page, 0)
-    writer.remove_text()
-
-    # finally, write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_remove_text_all_operators(pdf_file_path):
-    stream = (
-        b"BT "
-        b"/F0 36 Tf "
-        b"50 706 Td "
-        b"36 TL "
-        b"(The Tj operator) Tj "
-        b'1 2 (The double quote operator) " '
-        b"(The single quote operator) ' "
-        b"ET"
-    )
-    pdf_data = (
-        b"%%PDF-1.7\n"
-        b"1 0 obj << /Count 1 /Kids [5 0 R] /Type /Pages >> endobj\n"
-        b"2 0 obj << >> endobj\n"
-        b"3 0 obj << >> endobj\n"
-        b"4 0 obj << /Length %d >>\n"
-        b"stream\n" + (b"%s\n" % stream) + b"endstream\n"
-        b"endobj\n"
-        b"5 0 obj << /Contents 4 0 R /CropBox [0.0 0.0 2550.0 3508.0]\n"
-        b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
-        b" /Resources << /Font << >> >>"
-        b" /Rotate 0 /Type /Page >> endobj\n"
-        b"6 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
-        b"xref 1 6\n"
-        b"%010d 00000 n\n"
-        b"%010d 00000 n\n"
-        b"%010d 00000 n\n"
-        b"%010d 00000 n\n"
-        b"%010d 00000 n\n"
-        b"%010d 00000 n\n"
-        b"trailer << /Root 6 0 R /Size 6 >>\n"
-        b"startxref\n%d\n"
-        b"%%%%EOF"
-    )
-    startx_correction = -1
-    pdf_data = pdf_data % (
-        len(stream),
-        pdf_data.find(b"1 0 obj") + startx_correction,
-        pdf_data.find(b"2 0 obj") + startx_correction,
-        pdf_data.find(b"3 0 obj") + startx_correction,
-        pdf_data.find(b"4 0 obj") + startx_correction,
-        pdf_data.find(b"5 0 obj") + startx_correction,
-        pdf_data.find(b"6 0 obj") + startx_correction,
-        # startx_correction should be -1 due to double % at the beginning
-        # inducing an error on startxref computation
-        pdf_data.find(b"xref"),
-    )
-    pdf_stream = BytesIO(pdf_data)
-
-    reader = PdfReader(pdf_stream, strict=False)
-    writer = PdfWriter()
-
-    page = reader.pages[0]
-    writer.insert_page(page, 0)
-    writer.remove_text()
-
-    # finally, write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_write_metadata(pdf_file_path):
-    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
-
-    reader = PdfReader(pdf_path)
-    writer = PdfWriter()
-
-    writer.add_page(reader.pages[0])
-    for page in reader.pages:
-        writer.add_page(page)
-
-    metadata = reader.metadata
-    writer.add_metadata(metadata)
-
-    writer.add_metadata({"/Title": "The Crazy Ones"})
-
-    # finally, write data to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-    # Check if the title was set
-    reader = PdfReader(pdf_file_path)
-    metadata = reader.metadata
-    assert metadata.get("/Title") == "The Crazy Ones"
-
-
-def test_fill_form(pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
-    writer = PdfWriter()
-
-    writer.append(reader, [0])
-    writer.append(RESOURCE_ROOT / "crazyones.pdf", [0])
-
-    writer.update_page_form_field_values(
-        writer.pages[0], {"foo": "some filled in text"}, flags=1
-    )
-
-    # check if no fields to fill in the page
-    writer.update_page_form_field_values(
-        writer.pages[1], {"foo": "some filled in text"}, flags=1
-    )
-
-    writer.update_page_form_field_values(
-        writer.pages[0], {"foo": "some filled in text"}
-    )
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_fill_form_with_qualified():
-    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
-    reader.add_form_topname("top")
-
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.add_page(reader.pages[0])
-    writer.update_page_form_field_values(
-        writer.pages[0], {"top.foo": "filling"}, flags=1
-    )
-    b = BytesIO()
-    writer.write(b)
-
-    reader2 = PdfReader(b)
-    fields = reader2.get_fields()
-    assert fields["top.foo"]["/V"] == "filling"
-
-
-@pytest.mark.parametrize(
-    ("use_128bit", "user_password", "owner_password"),
-    [(True, "userpwd", "ownerpwd"), (False, "userpwd", "ownerpwd")],
-)
-def test_encrypt(use_128bit, user_password, owner_password, pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
-    writer = PdfWriter()
-
-    page = reader.pages[0]
-    orig_text = page.extract_text()
-
-    writer.add_page(page)
-
-    with pytest.raises(ValueError, match="owner_pwd of encrypt is deprecated."):
-        writer.encrypt(
-            owner_pwd=user_password,
-            owner_password=owner_password,
-            user_password=user_password,
-            use_128bit=use_128bit,
-        )
-    with pytest.raises(ValueError, match="'user_pwd' argument is deprecated"):
-        writer.encrypt(
-            owner_password=owner_password,
-            user_password=user_password,
-            user_pwd=user_password,
-            use_128bit=use_128bit,
-        )
-    writer.encrypt(
-        user_password=user_password,
-        owner_password=owner_password,
-        use_128bit=use_128bit,
-    )
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-    # Test that the data is not there in clear text
-    with open(pdf_file_path, "rb") as input_stream:
-        data = input_stream.read()
-    assert b"foo" not in data
-
-    # Test the user password (str):
-    reader = PdfReader(pdf_file_path, password="userpwd")
-    new_text = reader.pages[0].extract_text()
-    assert reader.metadata.get("/Producer") == "pypdf"
-    assert new_text == orig_text
-
-    # Test the owner password (str):
-    reader = PdfReader(pdf_file_path, password="ownerpwd")
-    new_text = reader.pages[0].extract_text()
-    assert reader.metadata.get("/Producer") == "pypdf"
-    assert new_text == orig_text
-
-    # Test the user password (bytes):
-    reader = PdfReader(pdf_file_path, password=b"userpwd")
-    new_text = reader.pages[0].extract_text()
-    assert reader.metadata.get("/Producer") == "pypdf"
-    assert new_text == orig_text
-
-    # Test the owner password (stbytesr):
-    reader = PdfReader(pdf_file_path, password=b"ownerpwd")
-    new_text = reader.pages[0].extract_text()
-    assert reader.metadata.get("/Producer") == "pypdf"
-    assert new_text == orig_text
-
-
-def test_add_outline_item(pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
-    writer = PdfWriter()
-
-    for page in reader.pages:
-        writer.add_page(page)
-
-    outline_item = writer.add_outline_item(
-        "An outline item",
-        1,
-        None,
-        (255, 0, 15),
-        True,
-        True,
-        Fit.fit(),
-        is_open=False,
-    )
-    _o2a = writer.add_outline_item(
-        "Another", 2, outline_item, None, False, False, Fit.fit()
-    )
-    _o2b = writer.add_outline_item(
-        "Another bis", 2, outline_item, None, False, False, Fit.fit()
-    )
-    outline_item2 = writer.add_outline_item(
-        "An outline item 2",
-        1,
-        None,
-        (255, 0, 15),
-        True,
-        True,
-        Fit.fit(),
-        is_open=True,
-    )
-    _o3a = writer.add_outline_item(
-        "Another 2", 2, outline_item2, None, False, False, Fit.fit()
-    )
-    _o3b = writer.add_outline_item(
-        "Another 2bis", 2, outline_item2, None, False, False, Fit.fit()
-    )
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "w+b") as output_stream:
-        writer.write(output_stream)
-        output_stream.seek(0)
-        reader = PdfReader(output_stream)
-        assert reader.trailer["/Root"]["/Outlines"]["/Count"] == 3
-        assert reader.outline[0]["/Count"] == -2
-        assert reader.outline[0]["/%is_open%"] == False  # noqa
-        assert reader.outline[2]["/Count"] == 2
-        assert reader.outline[2]["/%is_open%"] == True  # noqa
-        assert reader.outline[1][0]["/Count"] == 0
-
-
-def test_add_named_destination(pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
-    writer = PdfWriter()
-    assert writer.get_named_dest_root() == []
-
-    for page in reader.pages:
-        writer.add_page(page)
-
-    assert writer.get_named_dest_root() == []
-
-    writer.add_named_destination(TextStringObject("A named dest"), 2)
-    writer.add_named_destination(TextStringObject("A named dest2"), 2)
-
-    with pytest.warns(DeprecationWarning, match="pagenum is deprecated as an argument"):
-        writer.add_named_destination(TextStringObject("A named dest3"), pagenum=2)
-
-    with pytest.raises(ValueError):
-        writer.add_named_destination(
-            TextStringObject("A named dest3"), pagenum=2, page_number=2
-        )
-
-    root = writer.get_named_dest_root()
-    assert root[0] == "A named dest"
-    assert root[1].pdf == writer
-    assert root[1].get_object()["/S"] == NameObject("/GoTo")
-    assert root[1].get_object()["/D"][0] == writer.pages[2].indirect_reference
-    assert root[2] == "A named dest2"
-    assert root[3].pdf == writer
-    assert root[3].get_object()["/S"] == NameObject("/GoTo")
-    assert root[3].get_object()["/D"][0] == writer.pages[2].indirect_reference
-    assert root[4] == "A named dest3"
-
-    # test get_object
-
-    assert writer.get_object(root[1].idnum) == writer.get_object(root[1])
-    with pytest.raises(ValueError) as exc:
-        writer.get_object(reader.pages[0].indirect_reference)
-    assert exc.value.args[0] == "pdf must be self"
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_add_named_destination_sort_order(pdf_file_path):
-    """
-    Issue #1927 does not appear.
-
-    add_named_destination() maintains the named destination list sort order
-    """
-    writer = PdfWriter()
-
-    assert writer.get_named_dest_root() == []
-
-    writer.add_blank_page(200, 200)
-    writer.add_named_destination("b", 0)
-    # "a" should be moved before "b" on insert
-    writer.add_named_destination("a", 0)
-
-    root = writer.get_named_dest_root()
-
-    assert len(root) == 4
-    assert (
-        root[0] == "a"
-    ), '"a" was not inserted before "b" in the named destination root'
-    assert root[2] == "b"
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_add_uri(pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
-    writer = PdfWriter()
-
-    for page in reader.pages:
-        writer.add_page(page)
-
-    writer.add_uri(
-        1,
-        "http://www.example.com",
-        RectangleObject([0, 0, 100, 100]),
-        border=[1, 2, 3, [4]],
-    )
-    writer.add_uri(
-        2,
-        "https://pypdf.readthedocs.io/en/latest/",
-        RectangleObject([20, 30, 50, 80]),
-        border=[1, 2, 3],
-    )
-    writer.add_uri(
-        3,
-        "https://pypdf.readthedocs.io/en/latest/user/adding-pdf-annotations.html",
-        "[ 200 300 250 350 ]",
-        border=[0, 0, 0],
-    )
-    writer.add_uri(
-        3,
-        "https://pypdf.readthedocs.io/en/latest/user/adding-pdf-annotations.html",
-        [100, 200, 150, 250],
-        border=[0, 0, 0],
-    )
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_add_link(pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
-    writer = PdfWriter()
-
-    for page in reader.pages:
-        writer.add_page(page)
-
-    with pytest.raises(
-        DeprecationError,
-        match=(
-            re.escape(
-                "add_link is deprecated and was removed in pypdf 3.0.0. "
-                "Use add_annotation(pypdf.annotations.Link(...)) instead."
-            )
-        ),
-    ):
-        writer.add_link(
-            1,
-            2,
-            RectangleObject([0, 0, 100, 100]),
-            border=[1, 2, 3, [4]],
-            fit="/Fit",
-        )
-        writer.add_link(
-            2, 3, RectangleObject([20, 30, 50, 80]), [1, 2, 3], "/FitH", None
-        )
-        writer.add_link(
-            3,
-            0,
-            "[ 200 300 250 350 ]",
-            [0, 0, 0],
-            "/XYZ",
-            0,
-            0,
-            2,
-        )
-        writer.add_link(
-            3,
-            0,
-            [100, 200, 150, 250],
-            border=[0, 0, 0],
-        )
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_io_streams():
-    """This is the example from the docs ("Streaming data")."""
-    filepath = RESOURCE_ROOT / "pdflatex-outline.pdf"
-    with open(filepath, "rb") as fh:
-        bytes_stream = BytesIO(fh.read())
-
-    # Read from bytes stream
-    reader = PdfReader(bytes_stream)
-    assert len(reader.pages) == 4
-
-    # Write to bytes stream
-    writer = PdfWriter()
-    with BytesIO() as output_stream:
-        writer.write(output_stream)
-
-
-def test_regression_issue670(pdf_file_path):
-    filepath = RESOURCE_ROOT / "crazyones.pdf"
-    reader = PdfReader(filepath, strict=False)
-    for _ in range(2):
-        writer = PdfWriter()
-        writer.add_page(reader.pages[0])
-        with open(pdf_file_path, "wb") as f_pdf:
-            writer.write(f_pdf)
-
-
-def test_issue301():
-    """Test with invalid stream length object."""
-    with open(RESOURCE_ROOT / "issue-301.pdf", "rb") as f:
-        reader = PdfReader(f)
-        writer = PdfWriter()
-        writer.append_pages_from_reader(reader)
-        b = BytesIO()
-        writer.write(b)
-
-
-def test_append_pages_from_reader_append():
-    """Use append_pages_from_reader with a callable."""
-    with open(RESOURCE_ROOT / "issue-301.pdf", "rb") as f:
-        reader = PdfReader(f)
-        writer = PdfWriter()
-        writer.append_pages_from_reader(reader, callable)
-        b = BytesIO()
-        writer.write(b)
-
-
-@pytest.mark.enable_socket()
-@pytest.mark.slow()
-@pytest.mark.filterwarnings("ignore::DeprecationWarning")
-def test_sweep_indirect_references_nullobject_exception(pdf_file_path):
-    # TODO: Check this more closely... this looks weird
-    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
-    name = "tika-924666.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    merger = PdfMerger()
-    merger.append(reader)
-    merger.write(pdf_file_path)
-
-
-@pytest.mark.enable_socket()
-@pytest.mark.slow()
-@pytest.mark.parametrize(
-    ("url", "name"),
-    [
-        (
-            "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf",
-            "test_sweep_indirect_references_nullobject_exception.pdf",
-        ),
-        (
-            "https://corpora.tika.apache.org/base/docs/govdocs1/922/922840.pdf",
-            "test_write_outline_item_on_page_fitv.pdf",
-        ),
-        ("https://github.com/py-pdf/pypdf/files/10715624/test.pdf", "iss1627.pdf"),
-    ],
-)
-@pytest.mark.filterwarnings("ignore::DeprecationWarning")
-def test_some_appends(pdf_file_path, url, name):
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    # PdfMerger
-    merger = PdfMerger()
-    merger.append(reader)
-    merger.write(pdf_file_path)
-    # PdfWriter
-    merger = PdfWriter()
-    merger.append(reader)
-    merger.write(pdf_file_path)
-
-
-def test_pdf_header():
-    writer = PdfWriter()
-    assert writer.pdf_header == b"%PDF-1.3"
-
-    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
-    writer.add_page(reader.pages[0])
-    assert writer.pdf_header == b"%PDF-1.5"
-
-    writer.pdf_header = b"%PDF-1.6"
-    assert writer.pdf_header == b"%PDF-1.6"
-
-
-def test_write_dict_stream_object(pdf_file_path):
-    stream = (
-        b"BT "
-        b"/F0 36 Tf "
-        b"50 706 Td "
-        b"36 TL "
-        b"(The Tj operator) Tj "
-        b'1 2 (The double quote operator) " '
-        b"(The single quote operator) ' "
-        b"ET"
-    )
-
-    stream_object = StreamObject()
-    stream_object[NameObject("/Type")] = NameObject("/Text")
-    stream_object._data = stream
-
-    writer = PdfWriter()
-
-    page_object = PageObject.create_blank_page(writer, 1000, 1000)
-    # Construct dictionary object (PageObject) with stream object
-    # Writer will replace this stream object with indirect object
-    page_object[NameObject("/Test")] = stream_object
-
-    page_object = writer.add_page(page_object)
-    with open(pdf_file_path, "wb") as fp:
-        writer.write(fp)
-
-    for k, v in page_object.items():
-        if k == "/Test":
-            assert str(v) != str(stream_object)
-            assert isinstance(v, IndirectObject)
-            assert str(v.get_object()) == str(stream_object)
-            break
-    else:
-        pytest.fail("/Test not found")
-
-    # Check that every key in _idnum_hash is correct
-    objects_hash = [o.hash_value() for o in writer._objects]
-    for k, v in writer._idnum_hash.items():
-        assert v.pdf == writer
-        assert k in objects_hash, "Missing %s" % v
-
-
-def test_add_single_annotation(pdf_file_path):
-    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
-    reader = PdfReader(pdf_path)
-    page = reader.pages[0]
-    writer = PdfWriter()
-    writer.add_page(page)
-
-    annot_dict = {
-        "/Type": "/Annot",
-        "/Subtype": "/Text",
-        "/Rect": [270.75, 596.25, 294.75, 620.25],
-        "/Contents": "Note in second paragraph",
-        "/C": [1, 1, 0],
-        "/M": "D:20220406191858+02'00",
-        "/Popup": {
-            "/Type": "/Annot",
-            "/Subtype": "/Popup",
-            "/Rect": [294.75, 446.25, 494.75, 596.25],
-            "/M": "D:20220406191847+02'00",
-        },
-        "/T": "moose",
-    }
-    writer.add_annotation(0, annot_dict)
-
-    # Inspect manually by adding 'assert False' and viewing the PDF
-    with open(pdf_file_path, "wb") as fp:
-        writer.write(fp)
-
-
-def test_deprecation_bookmark_decorator():
-    reader = PdfReader(RESOURCE_ROOT / "outlines-with-invalid-destinations.pdf")
-    page = reader.pages[0]
-    outline_item = reader.outline[0]
-    writer = PdfWriter()
-    writer.add_page(page)
-    with pytest.raises(
-        DeprecationError,
-        match="bookmark is deprecated as an argument. Use outline_item instead",
-    ):
-        writer.add_outline_item_dict(bookmark=outline_item)
-
-
-@pytest.mark.samples()
-def test_colors_in_outline_item(pdf_file_path):
-    reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf")
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    purple_rgb = (0.5019607843137255, 0.0, 0.5019607843137255)
-    writer.add_outline_item("First Outline Item", page_number=2, color="800080")
-    writer.add_outline_item("Second Outline Item", page_number=3, color="#800080")
-    writer.add_outline_item("Third Outline Item", page_number=4, color=purple_rgb)
-
-    with open(pdf_file_path, "wb") as f:
-        writer.write(f)
-
-    reader2 = PdfReader(pdf_file_path)
-    for outline_item in reader2.outline:
-        # convert float to string because of mutability
-        assert [str(c) for c in outline_item.color] == [str(p) for p in purple_rgb]
-
-
-@pytest.mark.samples()
-def test_write_empty_stream():
-    reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf")
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-
-    with pytest.raises(ValueError) as exc:
-        writer.write("")
-    assert exc.value.args[0] == "Output(stream=) is empty."
-
-
-def test_startup_dest():
-    pdf_file_writer = PdfWriter()
-    pdf_file_writer.append_pages_from_reader(PdfReader(RESOURCE_ROOT / "issue-604.pdf"))
-
-    assert pdf_file_writer.open_destination is None
-    pdf_file_writer.open_destination = pdf_file_writer.pages[9]
-    # checked also using Acrobrat to verify the good page is opened
-    op = pdf_file_writer._root_object["/OpenAction"]
-    assert op[0] == pdf_file_writer.pages[9].indirect_reference
-    assert op[1] == "/Fit"
-    op = pdf_file_writer.open_destination
-    assert op.raw_get("/Page") == pdf_file_writer.pages[9].indirect_reference
-    assert op["/Type"] == "/Fit"
-    pdf_file_writer.open_destination = op
-    assert pdf_file_writer.open_destination == op
-
-    # irrelevant, just for coverage
-    pdf_file_writer._root_object[NameObject("/OpenAction")][0] = NumberObject(0)
-    pdf_file_writer.open_destination
-    with pytest.raises(Exception) as exc:
-        del pdf_file_writer._root_object[NameObject("/OpenAction")][0]
-        pdf_file_writer.open_destination
-    assert "Invalid Destination" in str(exc.value)
-
-    pdf_file_writer.open_destination = "Test"
-    # checked also using Acrobrat to verify open_destination
-    op = pdf_file_writer._root_object["/OpenAction"]
-    assert isinstance(op, TextStringObject)
-    assert op == "Test"
-    op = pdf_file_writer.open_destination
-    assert isinstance(op, TextStringObject)
-    assert op == "Test"
-
-    # irrelevant, this is just for coverage
-    pdf_file_writer._root_object[NameObject("/OpenAction")] = NumberObject(0)
-    assert pdf_file_writer.open_destination is None
-    pdf_file_writer.open_destination = None
-    assert "/OpenAction" not in pdf_file_writer._root_object
-    pdf_file_writer.open_destination = None
-
-
-@pytest.mark.enable_socket()
-def test_iss471():
-    url = "https://github.com/py-pdf/pypdf/files/9139245/book.pdf"
-    name = "book_471.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-
-    writer = PdfWriter()
-    writer.append(reader, excluded_fields=[])
-    assert isinstance(
-        writer.pages[0]["/Annots"][0].get_object()["/Dest"], TextStringObject
-    )
-
-
-@pytest.mark.enable_socket()
-def test_reset_translation():
-    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
-    name = "tika-924666.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader, (0, 10))
-    nb = len(writer._objects)
-    writer.append(reader, (0, 10))
-    assert (
-        len(writer._objects) == nb + 11
-    )  # +10 (pages) +1 because of the added outline
-    nb += 1
-    writer.reset_translation(reader)
-    writer.append(reader, (0, 10))
-    assert len(writer._objects) >= nb + 200
-    nb = len(writer._objects)
-    writer.reset_translation(reader.pages[0].indirect_reference)
-    writer.append(reader, (0, 10))
-    assert len(writer._objects) >= nb + 200
-    nb = len(writer._objects)
-    writer.reset_translation()
-    writer.append(reader, (0, 10))
-    assert len(writer._objects) >= nb + 200
-    nb = len(writer.pages)
-    writer.append(reader, [reader.pages[0], reader.pages[0]])
-    assert len(writer.pages) == nb + 2
-
-
-def test_threads_empty():
-    writer = PdfWriter()
-    thr = writer.threads
-    assert isinstance(thr, ArrayObject)
-    assert len(thr) == 0
-    thr2 = writer.threads
-    assert thr == thr2
-
-
-@pytest.mark.enable_socket()
-def test_append_without_annots_and_articles():
-    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
-    name = "tika-924666.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader, None, (0, 10), True, ["/B"])
-    writer.reset_translation()
-    writer.append(reader, (0, 10), True, ["/B"])
-    assert writer.threads == []
-    writer = PdfWriter()
-    writer.append(reader, None, (0, 10), True, ["/Annots"])
-    assert "/Annots" not in writer.pages[5]
-    writer = PdfWriter()
-    writer.append(reader, None, (0, 10), True, [])
-    assert "/Annots" in writer.pages[5]
-    assert len(writer.threads) >= 1
-
-
-@pytest.mark.enable_socket()
-def test_append_multiple():
-    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
-    name = "tika-924666.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(
-        reader, [0, 0, 0]
-    )  # to demonstre multiple insertion of same page at once
-    writer.append(reader, [0, 0, 0])  # second pack
-    pages = writer._root_object["/Pages"]["/Kids"]
-    assert pages[0] not in pages[1:]  # page not repeated
-    assert pages[-1] not in pages[0:-1]  # page not repeated
-
-
-@pytest.mark.samples()
-def test_set_page_label(pdf_file_path):
-    src = RESOURCE_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"  # File without labels
-    reader = PdfReader(src)
-
-    expected = [
-        "i",
-        "ii",
-        "1",
-        "2",
-        "A",
-        "B",
-        "1",
-        "2",
-        "3",
-        "4",
-        "A",
-        "i",
-        "I",
-        "II",
-        "1",
-        "2",
-        "3",
-        "I",
-        "II",
-    ]
-
-    # Tests full lenght with labels assigned at first and last elements
-    # Tests different labels assigned to consecutive ranges
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.set_page_label(0, 1, "/r")
-    writer.set_page_label(4, 5, "/A")
-    writer.set_page_label(10, 10, "/A")
-    writer.set_page_label(11, 11, "/r")
-    writer.set_page_label(12, 13, "/R")
-    writer.set_page_label(17, 18, "/R")
-    writer.write(pdf_file_path)
-    assert PdfReader(pdf_file_path).page_labels == expected
-
-    writer = PdfWriter()  # Same labels, different set order
-    writer.clone_document_from_reader(reader)
-    writer.set_page_label(17, 18, "/R")
-    writer.set_page_label(4, 5, "/A")
-    writer.set_page_label(10, 10, "/A")
-    writer.set_page_label(0, 1, "/r")
-    writer.set_page_label(12, 13, "/R")
-    writer.set_page_label(11, 11, "/r")
-    writer.write(pdf_file_path)
-    assert PdfReader(pdf_file_path).page_labels == expected
-
-    # Tests labels assigned only in the middle
-    # Tests label assigned to a range already containing labled ranges
-    expected = ["1", "2", "i", "ii", "iii", "iv", "v", "1"]
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.set_page_label(3, 4, "/a")
-    writer.set_page_label(5, 5, "/A")
-    writer.set_page_label(2, 6, "/r")
-    writer.write(pdf_file_path)
-    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
-
-    # Tests labels assigned inside a previously existing range
-    expected = ["1", "2", "i", "a", "b", "A", "1", "1", "2"]
-    # Ones repeat because user didnt cover the entire original range
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.set_page_label(2, 6, "/r")
-    writer.set_page_label(3, 4, "/a")
-    writer.set_page_label(5, 5, "/A")
-    writer.write(pdf_file_path)
-    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
-
-    # Tests invalid user input
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    with pytest.raises(
-        ValueError, match="at least one between style and prefix must be given"
-    ):
-        writer.set_page_label(0, 5, start=2)
-    with pytest.raises(
-        ValueError, match="page_index_from must be equal or greater then 0"
-    ):
-        writer.set_page_label(-1, 5, "/r")
-    with pytest.raises(
-        ValueError, match="page_index_to must be equal or greater then page_index_from"
-    ):
-        writer.set_page_label(5, 0, "/r")
-    with pytest.raises(ValueError, match="page_index_to exceeds number of pages"):
-        writer.set_page_label(0, 19, "/r")
-    with pytest.raises(
-        ValueError, match="if given, start must be equal or greater than one"
-    ):
-        writer.set_page_label(0, 5, "/r", start=-1)
-
-    pdf_file_path.unlink()
-
-    src = (
-        SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf"
-    )  # File with pre existing labels
-    reader = PdfReader(src)
-
-    # Tests adding labels to existing ones
-    expected = ["i", "ii", "A", "B", "1"]
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.set_page_label(2, 3, "/A")
-    writer.write(pdf_file_path)
-    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
-
-    # Tests replacing existing lables
-    expected = ["A", "B", "1", "1", "2"]
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.set_page_label(0, 1, "/A")
-    writer.write(pdf_file_path)
-    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
-
-    pdf_file_path.unlink()
-
-    # Tests prefix and start.
-    src = RESOURCE_ROOT / "issue-604.pdf"  # File without page labels
-    reader = PdfReader(src)
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-
-    writer.set_page_label(0, 0, prefix="FRONT")
-    writer.set_page_label(1, 2, "/D", start=2)
-    writer.set_page_label(3, 6, prefix="UPDATES")
-    writer.set_page_label(7, 10, "/D", prefix="THYR-")
-    writer.set_page_label(11, 21, "/D", prefix="PAP-")
-    writer.set_page_label(22, 30, "/D", prefix="FOLL-")
-    writer.set_page_label(31, 39, "/D", prefix="HURT-")
-    writer.write(pdf_file_path)
-
-
-@pytest.mark.enable_socket()
-def test_iss1601():
-    url = "https://github.com/py-pdf/pypdf/files/10579503/badges-38.pdf"
-    name = "badge-38.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    original_cs_operations = ContentStream(
-        reader.pages[0].get_contents(), reader
-    ).operations
-    writer = PdfWriter()
-    page_1 = writer.add_blank_page(
-        reader.pages[0].mediabox[2], reader.pages[0].mediabox[3]
-    )
-    page_1.merge_transformed_page(reader.pages[0], Transformation())
-    page_1_cs_operations = page_1.get_contents().operations
-    assert is_sublist(original_cs_operations, page_1_cs_operations)
-    page_1 = writer.add_blank_page(
-        reader.pages[0].mediabox[2], reader.pages[0].mediabox[3]
-    )
-    page_1.merge_page(reader.pages[0])
-    page_1_cs_operations = page_1.get_contents().operations
-    assert is_sublist(original_cs_operations, page_1_cs_operations)
-
-
-def test_attachments():
-    writer = PdfWriter()
-    writer.add_blank_page(100, 100)
-    b = BytesIO()
-    writer.write(b)
-    b.seek(0)
-    reader = PdfReader(b)
-    b = None
-    assert reader.attachments == {}
-    assert reader._list_attachments() == []
-    assert reader._get_attachments() == {}
-    to_add = [
-        ("foobar.txt", b"foobarcontent"),
-        ("foobar2.txt", b"foobarcontent2"),
-        ("foobar2.txt", b"2nd_foobarcontent"),
-    ]
-    for name, content in to_add:
-        writer.add_attachment(name, content)
-
-    b = BytesIO()
-    writer.write(b)
-    b.seek(0)
-    reader = PdfReader(b)
-    b = None
-    assert sorted(reader.attachments.keys()) == sorted({name for name, _ in to_add})
-    assert str(reader.attachments) == "LazyDict(keys=['foobar.txt', 'foobar2.txt'])"
-    assert reader._list_attachments() == [name for name, _ in to_add]
-
-    # We've added the same key twice - hence only 2 and not 3:
-    att = reader._get_attachments()
-    assert len(att) == 2  # we have 2 keys, but 3 attachments!
-
-    # The content for foobar.txt is clear and just a single value:
-    assert att["foobar.txt"] == b"foobarcontent"
-
-    # The content for foobar2.txt is a list!
-    att = reader._get_attachments("foobar2.txt")
-    assert len(att) == 1
-    assert att["foobar2.txt"] == [b"foobarcontent2", b"2nd_foobarcontent"]
-
-    # Let's do both cases with the public interface:
-    assert reader.attachments["foobar.txt"][0] == b"foobarcontent"
-    assert reader.attachments["foobar2.txt"][0] == b"foobarcontent2"
-    assert reader.attachments["foobar2.txt"][1] == b"2nd_foobarcontent"
-
-
-@pytest.mark.enable_socket()
-def test_iss1614():
-    # test of an annotation(link) directly stored in the /Annots in the page
-    url = "https://github.com/py-pdf/pypdf/files/10669995/broke.pdf"
-    name = "iss1614.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader)
-    # test for 2nd error case reported in #1614
-    url = "https://github.com/py-pdf/pypdf/files/10696390/broken.pdf"
-    name = "iss1614.2.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer.append(reader)
-
-
-@pytest.mark.enable_socket()
-def test_new_removes():
-    # test of an annotation(link) directly stored in the /Annots in the page
-    url = "https://github.com/py-pdf/pypdf/files/10807951/tt.pdf"
-    name = "iss1650.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.remove_images()
-    b = BytesIO()
-    writer.write(b)
-    bb = bytes(b.getbuffer())
-    assert b"/Im0 Do" not in bb
-    assert b"/Fm0 Do" in bb
-    assert b" TJ" in bb
-
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.remove_text()
-    b = BytesIO()
-    writer.write(b)
-    bb = bytes(b.getbuffer())
-    assert b"/Im0" in bb
-    assert b"Chap" not in bb
-    assert b" TJ" not in bb
-
-    url = "https://github.com/py-pdf/pypdf/files/10832029/tt2.pdf"
-    name = "GeoBaseWithComments.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer.append(reader)
-    writer.remove_objects_from_page(writer.pages[0], [ObjectDeletionFlag.LINKS])
-    assert "/Links" not in [
-        a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]
-    ]
-    writer.remove_objects_from_page(writer.pages[0], ObjectDeletionFlag.ATTACHMENTS)
-    assert "/FileAttachment" not in [
-        a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]
-    ]
-
-    writer.pages[0]["/Annots"].append(
-        DictionaryObject({NameObject("/Subtype"): TextStringObject("/3D")})
-    )
-    assert "/3D" in [a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]]
-    writer.remove_objects_from_page(writer.pages[0], ObjectDeletionFlag.OBJECTS_3D)
-    assert "/3D" not in [a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]]
-
-    writer.remove_links()
-    assert len(writer.pages[0]["/Annots"]) == 0
-    assert len(writer.pages[3]["/Annots"]) == 0
-
-    writer.remove_annotations("/Text")
-
-
-@pytest.mark.enable_socket()
-def test_late_iss1654():
-    url = "https://github.com/py-pdf/pypdf/files/10935632/bid1.pdf"
-    name = "bid1.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    for p in writer.pages:
-        p.compress_content_streams()
-    b = BytesIO()
-    writer.write(b)
-
-
-@pytest.mark.enable_socket()
-def test_iss1723():
-    # test of an annotation(link) directly stored in the /Annots in the page
-    url = "https://github.com/py-pdf/pypdf/files/11015242/inputFile.pdf"
-    name = "iss1723.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader, (3, 5))
-
-
-@pytest.mark.enable_socket()
-def test_iss1767():
-    # test with a pdf which is buggy because the object 389,0 exists 3 times:
-    # twice to define catalog and one as an XObject inducing a loop when
-    # cloning
-    url = "https://github.com/py-pdf/pypdf/files/11138472/test.pdf"
-    name = "iss1723.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    PdfWriter(clone_from=reader)
-
-
-@pytest.mark.enable_socket()
-def test_named_dest_page_number():
-    """
-    Closes iss471
-    tests appending with named destinations as integers
-    """
-    url = "https://github.com/py-pdf/pypdf/files/10704333/central.pdf"
-    name = "central.pdf"
-    writer = PdfWriter()
-    writer.add_blank_page(100, 100)
-    writer.append(BytesIO(get_data_from_url(url, name=name)), pages=[0, 1, 2])
-    assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 2
-    assert writer._root_object["/Names"]["/Dests"]["/Names"][-1][0] == (1 + 1)
-    writer.append(BytesIO(get_data_from_url(url, name=name)))
-    assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 6
-    writer2 = PdfWriter()
-    writer2.add_blank_page(100, 100)
-    dest = writer2.add_named_destination("toto", 0)
-    dest.get_object()[NameObject("/D")][0] = NullObject()
-    b = BytesIO()
-    writer2.write(b)
-    b.seek(0)
-    writer.append(b)
-    assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 6
-
-
-@pytest.mark.parametrize(
-    ("write_data_here", "needs_cleanup"),
-    [
-        (
-            "dont_commit_writer.pdf",
-            True,
-        )
-    ],
-)
-def test_update_form_fields(write_data_here, needs_cleanup):
-    writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf")
-    writer.update_page_form_field_values(
-        writer.pages[0],
-        {
-            "CheckBox1": "/Yes",
-            "Text1": "mon Text1",
-            "Text2": "ligne1\nligne2",
-            "RadioGroup1": "/2",
-            "RdoS1": "/",
-            "Combo1": "!!monCombo!!",
-            "Liste1": "Liste2",
-            "Liste2": ["Lst1", "Lst3"],
-            "DropList1": "DropListe3",
-        },
-        auto_regenerate=False,
-    )
-    del writer.pages[0]["/Annots"][1].get_object()["/AP"]["/N"]
-    writer.update_page_form_field_values(
-        writer.pages[0],
-        {"Text1": "my Text1", "Text2": "ligne1\nligne2\nligne3"},
-        auto_regenerate=False,
-    )
-
-    writer.write("dont_commit_writer.pdf")
-    reader = PdfReader("dont_commit_writer.pdf")
-    flds = reader.get_fields()
-    assert flds["CheckBox1"]["/V"] == "/Yes"
-    assert flds["CheckBox1"].indirect_reference.get_object()["/AS"] == "/Yes"
-    assert (
-        b"(my Text1)"
-        in flds["Text1"].indirect_reference.get_object()["/AP"]["/N"].get_data()
-    )
-    assert flds["Text2"]["/V"] == "ligne1\nligne2\nligne3"
-    assert (
-        b"(ligne3)"
-        in flds["Text2"].indirect_reference.get_object()["/AP"]["/N"].get_data()
-    )
-    assert flds["RadioGroup1"]["/V"] == "/2"
-    assert flds["RadioGroup1"]["/Kids"][0].get_object()["/AS"] == "/Off"
-    assert flds["RadioGroup1"]["/Kids"][1].get_object()["/AS"] == "/2"
-    assert all(x in flds["Liste2"]["/V"] for x in ["Lst1", "Lst3"])
-
-    assert all(x in flds["CheckBox1"]["/_States_"] for x in ["/Off", "/Yes"])
-    assert all(x in flds["RadioGroup1"]["/_States_"] for x in ["/1", "/2", "/3"])
-    assert all(x in flds["Liste1"]["/_States_"] for x in ["Liste1", "Liste2", "Liste3"])
-
-    if needs_cleanup:
-        Path(write_data_here).unlink()
-
-
-@pytest.mark.enable_socket()
-def test_iss1862():
-    # The file here has "/B" entry to define the font in a object below the page
-    # The excluded field shall be considered only at first level (page) and not
-    # below
-    url = "https://github.com/py-pdf/pypdf/files/11708801/intro.pdf"
-    name = "iss1862.pdf"
-    writer = PdfWriter()
-    writer.append(BytesIO(get_data_from_url(url, name=name)))
-    # check that "/B" is in the font
-    writer.pages[0]["/Resources"]["/Font"]["/F1"]["/CharProcs"]["/B"].get_data()
-
-
-def test_empty_objects_before_cloning():
-    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
-    reader = PdfReader(pdf_path)
-    writer = PdfWriter(clone_from=reader)
-    nb_obj_reader = len(reader.xref_objStm) + sum(
-        len(reader.xref[i]) for i in reader.xref
-    )
-    nb_obj_reader -= 1  # for trailer
-    nb_obj_reader -= len(
-        {x: 1 for x, y in reader.xref_objStm.values()}
-    )  # to remove object streams
-    assert len(writer._objects) == nb_obj_reader
-
-
-@pytest.mark.enable_socket()
-def test_watermark():
-    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
-    name = "bgwatermark.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    url = "https://github.com/py-pdf/pypdf/files/11985888/source.pdf"
-    name = "srcwatermark.pdf"
-    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
-    for p in writer.pages:
-        p.merge_page(reader.pages[0], over=False)
-
-    assert isinstance(p["/Contents"], ArrayObject)
-    assert isinstance(p["/Contents"][0], IndirectObject)
-
-    b = BytesIO()
-    writer.write(b)
-    assert len(b.getvalue()) < 2.1 * 1024 * 1024
-
-
-@pytest.mark.enable_socket()
-@pytest.mark.timeout(4)  # this was a lot slower before PR #2086
-def test_watermarking_speed():
-    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
-    name = "bgwatermark.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    url = "https://arxiv.org/pdf/2201.00214.pdf"
-    name = "2201.00214.pdf"
-    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
-    for p in writer.pages:
-        p.merge_page(reader.pages[0], over=False)
-    out_pdf_bytesio = BytesIO()
-    writer.write(out_pdf_bytesio)
-    pdf_size_in_mib = len(out_pdf_bytesio.getvalue()) / 1024 / 1024
-    assert pdf_size_in_mib < 20
-
-
-@pytest.mark.enable_socket()
-@pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript")
-def test_watermark_rendering(tmp_path):
-    """Ensure the visual appearance of watermarking stays correct."""
-    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
-    name = "bgwatermark.pdf"
-    watermark = PdfReader(BytesIO(get_data_from_url(url, name=name))).pages[0]
-    url = "https://github.com/py-pdf/pypdf/files/11985888/source.pdf"
-    name = "srcwatermark.pdf"
-    page = PdfReader(BytesIO(get_data_from_url(url, name=name))).pages[0]
-    writer = PdfWriter()
-    page.merge_page(watermark, over=False)
-    writer.add_page(page)
-
-    target_png_path = tmp_path / "target.png"
-    url = "https://github.com/py-pdf/pypdf/assets/96178532/d5c72d0e-7047-4504-bbf6-bc591c80d7c0"
-    name = "dstwatermark.png"
-    target_png_path.write_bytes(get_data_from_url(url, name=name))
-
-    pdf_path = tmp_path / "out.pdf"
-    png_path = tmp_path / "out.png"
+"""Test the pypdf._writer module."""
+import re
+import shutil
+import subprocess
+from io import BytesIO
+from pathlib import Path
+
+import pytest
+
+from pypdf import (
+    ObjectDeletionFlag,
+    PageObject,
+    PdfMerger,
+    PdfReader,
+    PdfWriter,
+    Transformation,
+)
+from pypdf.errors import DeprecationError, PageSizeNotDefinedError, PyPdfError
+from pypdf.generic import (
+    ArrayObject,
+    ContentStream,
+    DictionaryObject,
+    Fit,
+    IndirectObject,
+    NameObject,
+    NullObject,
+    NumberObject,
+    RectangleObject,
+    StreamObject,
+    TextStringObject,
+)
+
+from . import get_data_from_url, is_sublist
+from .test_images import image_similarity
+
+TESTS_ROOT = Path(__file__).parent.resolve()
+PROJECT_ROOT = TESTS_ROOT.parent
+RESOURCE_ROOT = PROJECT_ROOT / "resources"
+SAMPLE_ROOT = Path(PROJECT_ROOT) / "sample-files"
+GHOSTSCRIPT_BINARY = shutil.which("gs")
+
+
+def test_writer_exception_non_binary(tmp_path, caplog):
+    src = RESOURCE_ROOT / "pdflatex-outline.pdf"
+
+    reader = PdfReader(src)
+    writer = PdfWriter()
+    writer.add_page(reader.pages[0])
+
+    with open(tmp_path / "out.txt", "w") as fp, pytest.raises(TypeError):
+        writer.write_stream(fp)
+    ending = "to write to is not in binary mode. It may not be written to correctly.\n"
+    assert caplog.text.endswith(ending)
+
+
+def test_writer_clone():
+    src = RESOURCE_ROOT / "pdflatex-outline.pdf"
+
+    reader = PdfReader(src)
+    writer = PdfWriter(clone_from=reader)
+    assert len(writer.pages) == 4
+    assert "PageObject" in str(type(writer.pages[0]))
+
+    writer = PdfWriter(clone_from=src)
+    assert len(writer.pages) == 4
+    assert "PageObject" in str(type(writer.pages[0]))
+
+
+def test_writer_clone_bookmarks():
+    # Arrange
+    src = RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf"
+    reader = PdfReader(src)
+    writer = PdfWriter()
+
+    # Act + test cat
+    cat = ""
+
+    def cat1(p) -> None:
+        nonlocal cat
+        cat += p.__repr__()
+
+    writer.clone_document_from_reader(reader, cat1)
+    assert "/Page" in cat
+    assert writer.pages[0].raw_get("/Parent") == writer._pages
+    writer.add_outline_item("Page 1", 0)
+    writer.add_outline_item("Page 2", 1)
+
+    # Assert
+    bytes_stream = BytesIO()
+    writer.write(bytes_stream)
+    bytes_stream.seek(0)
+    reader2 = PdfReader(bytes_stream)
+    assert len(reader2.pages) == len(reader.pages)
+    assert len(reader2.outline) == 2
+
+    # test with append
+    writer = PdfWriter()
+    writer.append(reader)
+    writer.add_outline_item("Page 1", 0)
+    writer.add_outline_item("Page 2", 1)
+
+    # Assert
+    bytes_stream = BytesIO()
+    writer.write(bytes_stream)
+    bytes_stream.seek(0)
+    reader2 = PdfReader(bytes_stream)
+    assert len(reader2.pages) == len(reader.pages)
+    assert len(reader2.outline) == 2
+
+
+def writer_operate(writer: PdfWriter) -> None:
+    """
+    To test the writer that initialized by each of the four usages.
+
+    Args:
+        writer: A PdfWriter object
+    """
+    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
+    pdf_outline_path = RESOURCE_ROOT / "pdflatex-outline.pdf"
+
+    reader = PdfReader(pdf_path)
+    reader_outline = PdfReader(pdf_outline_path)
+
+    page = reader.pages[0]
+    with pytest.raises(PageSizeNotDefinedError) as exc:
+        writer.add_blank_page()
+    assert exc.value.args == ()
+    writer.insert_page(page, 1)
+    writer.insert_page(reader_outline.pages[0], 0)
+    writer.add_outline_item_destination(page)
+    writer.remove_links()
+    writer.add_outline_item_destination(page)
+    oi = writer.add_outline_item(
+        "An outline item", 0, None, (255, 0, 15), True, True, Fit.fit_box_vertically(10)
+    )
+    writer.add_outline_item(
+        "The XYZ fit", 0, oi, (255, 0, 15), True, True, Fit.xyz(left=10, top=20, zoom=3)
+    )
+    writer.add_outline_item(
+        "The XYZ fit no args", 0, oi, (255, 0, 15), True, True, Fit.xyz()
+    )
+    writer.add_outline_item(
+        "The FitH fit", 0, oi, (255, 0, 15), True, True, Fit.fit_horizontally(top=10)
+    )
+    writer.add_outline_item(
+        "The FitV fit", 0, oi, (255, 0, 15), True, True, Fit.fit_vertically(left=10)
+    )
+    writer.add_outline_item(
+        "The FitR fit",
+        0,
+        oi,
+        (255, 0, 15),
+        True,
+        True,
+        Fit.fit_rectangle(left=10, bottom=20, right=30, top=40),
+    )
+    writer.add_outline_item(
+        "The FitB fit", 0, oi, (255, 0, 15), True, True, Fit.fit_box()
+    )
+    writer.add_outline_item(
+        "The FitBH fit",
+        0,
+        oi,
+        (255, 0, 15),
+        True,
+        True,
+        Fit.fit_box_horizontally(top=10),
+    )
+    writer.add_outline_item(
+        "The FitBV fit",
+        0,
+        oi,
+        (255, 0, 15),
+        True,
+        True,
+        Fit.fit_box_vertically(left=10),
+    )
+    writer.add_blank_page()
+    writer.add_uri(2, "https://example.com", RectangleObject([0, 0, 100, 100]))
+    with pytest.warns(
+        DeprecationWarning, match="'pagenum' argument of add_uri is deprecated"
+    ):
+        writer.add_uri(
+            2, "https://example.com", RectangleObject([0, 0, 100, 100]), pagenum=2
+        )
+    with pytest.raises(DeprecationError):
+        writer.add_link(2, 1, RectangleObject([0, 0, 100, 100]))
+    assert writer._get_page_layout() is None
+    writer.page_layout = "broken"
+    assert writer.page_layout == "broken"
+    writer.page_layout = NameObject("/SinglePage")
+    assert writer._get_page_layout() == "/SinglePage"
+    assert writer._get_page_mode() is None
+    writer.set_page_mode("/UseNone")
+    assert writer._get_page_mode() == "/UseNone"
+    writer.set_page_mode(NameObject("/UseOC"))
+    assert writer._get_page_mode() == "/UseOC"
+    writer.insert_blank_page(width=100, height=100)
+    writer.insert_blank_page()  # without parameters
+
+    writer.remove_images()
+
+    writer.add_metadata(reader.metadata)
+    writer.add_metadata({"/Author": "Martin Thoma"})
+    writer.add_metadata({"/MyCustom": 1234})
+
+    writer.add_attachment("foobar.gif", b"foobarcontent")
+
+    # Check that every key in _idnum_hash is correct
+    objects_hash = [o.hash_value() for o in writer._objects]
+    for k, v in writer._idnum_hash.items():
+        assert v.pdf == writer
+        assert k in objects_hash, f"Missing {v}"
+
+
+tmp_path = "dont_commit_writer.pdf"
+
+
+@pytest.mark.parametrize(
+    ("write_data_here", "needs_cleanup"),
+    [
+        ("dont_commit_writer.pdf", True),
+        (Path("dont_commit_writer.pdf"), True),
+        (BytesIO(), False),
+    ],
+)
+def test_writer_operations_by_traditional_usage(write_data_here, needs_cleanup):
+    writer = PdfWriter()
+
+    writer_operate(writer)
+
+    # finally, write "output" to pypdf-output.pdf
+    if needs_cleanup:
+        with open(write_data_here, "wb") as output_stream:
+            writer.write(output_stream)
+    else:
+        output_stream = write_data_here
+        writer.write(output_stream)
+
+    if needs_cleanup:
+        Path(write_data_here).unlink()
+
+
+@pytest.mark.parametrize(
+    ("write_data_here", "needs_cleanup"),
+    [
+        ("dont_commit_writer.pdf", True),
+        (Path("dont_commit_writer.pdf"), True),
+        (BytesIO(), False),
+    ],
+)
+def test_writer_operations_by_semi_traditional_usage(write_data_here, needs_cleanup):
+    with PdfWriter() as writer:
+        writer_operate(writer)
+
+        # finally, write "output" to pypdf-output.pdf
+        if needs_cleanup:
+            with open(write_data_here, "wb") as output_stream:
+                writer.write(output_stream)
+        else:
+            output_stream = write_data_here
+            writer.write(output_stream)
+
+    if needs_cleanup:
+        Path(write_data_here).unlink()
+
+
+@pytest.mark.parametrize(
+    ("write_data_here", "needs_cleanup"),
+    [
+        ("dont_commit_writer.pdf", True),
+        (Path("dont_commit_writer.pdf"), True),
+        (BytesIO(), False),
+    ],
+)
+def test_writer_operations_by_semi_new_traditional_usage(
+    write_data_here, needs_cleanup
+):
+    with PdfWriter() as writer:
+        writer_operate(writer)
+
+        # finally, write "output" to pypdf-output.pdf
+        writer.write(write_data_here)
+
+    if needs_cleanup:
+        Path(write_data_here).unlink()
+
+
+@pytest.mark.parametrize(
+    ("write_data_here", "needs_cleanup"),
+    [
+        ("dont_commit_writer.pdf", True),
+        (Path("dont_commit_writer.pdf"), True),
+        (BytesIO(), False),
+    ],
+)
+def test_writer_operation_by_new_usage(write_data_here, needs_cleanup):
+    # This includes write "output" to pypdf-output.pdf
+    with PdfWriter(write_data_here) as writer:
+        writer_operate(writer)
+
+    if needs_cleanup:
+        Path(write_data_here).unlink()
+
+
+@pytest.mark.parametrize(
+    "input_path",
+    [
+        "side-by-side-subfig.pdf",
+        "reportlab-inline-image.pdf",
+    ],
+)
+def test_remove_images(pdf_file_path, input_path):
+    pdf_path = RESOURCE_ROOT / input_path
+
+    reader = PdfReader(pdf_path)
+    writer = PdfWriter()
+
+    page = reader.pages[0]
+    writer.insert_page(page, 0)
+    writer.remove_images()
+    page_contents_stream = writer.pages[0]["/Contents"]._data
+    assert len(page_contents_stream.strip())
+
+    # finally, write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+    with open(pdf_file_path, "rb") as input_stream:
+        reader = PdfReader(input_stream)
+        if input_path == "side-by-side-subfig.pdf":
+            extracted_text = reader.pages[0].extract_text()
+            assert extracted_text
+            assert "Lorem ipsum dolor sit amet" in extracted_text
+
+
+@pytest.mark.enable_socket()
+def test_remove_images_sub_level():
+    """Cf #2035"""
+    url = "https://github.com/py-pdf/pypdf/files/12394781/2210.03142-1.pdf"
+    name = "iss2103.pdf"
+    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
+    writer.remove_images()
+    assert (
+        len(
+            [
+                o.get_object()
+                for o in writer.pages[0]["/Resources"]["/XObject"]["/Fm1"][
+                    "/Resources"
+                ]["/XObject"]["/Im1"]["/Resources"]["/XObject"].values()
+                if not isinstance(o.get_object(), NullObject)
+            ]
+        )
+        == 0
+    )
+
+
+@pytest.mark.parametrize(
+    "input_path",
+    [
+        "side-by-side-subfig.pdf",
+        "reportlab-inline-image.pdf",
+    ],
+)
+def test_remove_text(input_path, pdf_file_path):
+    pdf_path = RESOURCE_ROOT / input_path
+
+    reader = PdfReader(pdf_path)
+    writer = PdfWriter()
+
+    page = reader.pages[0]
+    writer.insert_page(page, 0)
+    writer.remove_text()
+
+    # finally, write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_remove_text_all_operators(pdf_file_path):
+    stream = (
+        b"BT "
+        b"/F0 36 Tf "
+        b"50 706 Td "
+        b"36 TL "
+        b"(The Tj operator) Tj "
+        b'1 2 (The double quote operator) " '
+        b"(The single quote operator) ' "
+        b"ET"
+    )
+    pdf_data = (
+        b"%%PDF-1.7\n"
+        b"1 0 obj << /Count 1 /Kids [5 0 R] /Type /Pages >> endobj\n"
+        b"2 0 obj << >> endobj\n"
+        b"3 0 obj << >> endobj\n"
+        b"4 0 obj << /Length %d >>\n"
+        b"stream\n" + (b"%s\n" % stream) + b"endstream\n"
+        b"endobj\n"
+        b"5 0 obj << /Contents 4 0 R /CropBox [0.0 0.0 2550.0 3508.0]\n"
+        b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
+        b" /Resources << /Font << >> >>"
+        b" /Rotate 0 /Type /Page >> endobj\n"
+        b"6 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
+        b"xref 1 6\n"
+        b"%010d 00000 n\n"
+        b"%010d 00000 n\n"
+        b"%010d 00000 n\n"
+        b"%010d 00000 n\n"
+        b"%010d 00000 n\n"
+        b"%010d 00000 n\n"
+        b"trailer << /Root 6 0 R /Size 6 >>\n"
+        b"startxref\n%d\n"
+        b"%%%%EOF"
+    )
+    startx_correction = -1
+    pdf_data = pdf_data % (
+        len(stream),
+        pdf_data.find(b"1 0 obj") + startx_correction,
+        pdf_data.find(b"2 0 obj") + startx_correction,
+        pdf_data.find(b"3 0 obj") + startx_correction,
+        pdf_data.find(b"4 0 obj") + startx_correction,
+        pdf_data.find(b"5 0 obj") + startx_correction,
+        pdf_data.find(b"6 0 obj") + startx_correction,
+        # startx_correction should be -1 due to double % at the beginning
+        # inducing an error on startxref computation
+        pdf_data.find(b"xref"),
+    )
+    pdf_stream = BytesIO(pdf_data)
+
+    reader = PdfReader(pdf_stream, strict=False)
+    writer = PdfWriter()
+
+    page = reader.pages[0]
+    writer.insert_page(page, 0)
+    writer.remove_text()
+
+    # finally, write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_write_metadata(pdf_file_path):
+    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
+
+    reader = PdfReader(pdf_path)
+    writer = PdfWriter()
+
+    writer.add_page(reader.pages[0])
+    for page in reader.pages:
+        writer.add_page(page)
+
+    metadata = reader.metadata
+    writer.add_metadata(metadata)
+
+    writer.add_metadata({"/Title": "The Crazy Ones"})
+
+    # finally, write data to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+    # Check if the title was set
+    reader = PdfReader(pdf_file_path)
+    metadata = reader.metadata
+    assert metadata.get("/Title") == "The Crazy Ones"
+
+
+def test_fill_form(pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
+    writer = PdfWriter()
+
+    writer.append(reader, [0])
+    writer.append(RESOURCE_ROOT / "crazyones.pdf", [0])
+
+    writer.update_page_form_field_values(
+        writer.pages[0], {"foo": "some filled in text"}, flags=1
+    )
+
+    # check if no fields to fill in the page
+    writer.update_page_form_field_values(
+        writer.pages[1], {"foo": "some filled in text"}, flags=1
+    )
+
+    writer.update_page_form_field_values(
+        writer.pages[0], {"foo": "some filled in text"}
+    )
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_fill_form_with_qualified():
+    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
+    reader.add_form_topname("top")
+
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.add_page(reader.pages[0])
+    writer.update_page_form_field_values(
+        writer.pages[0], {"top.foo": "filling"}, flags=1
+    )
+    b = BytesIO()
+    writer.write(b)
+
+    reader2 = PdfReader(b)
+    fields = reader2.get_fields()
+    assert fields["top.foo"]["/V"] == "filling"
+
+
+@pytest.mark.parametrize(
+    ("use_128bit", "user_password", "owner_password"),
+    [(True, "userpwd", "ownerpwd"), (False, "userpwd", "ownerpwd")],
+)
+def test_encrypt(use_128bit, user_password, owner_password, pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
+    writer = PdfWriter()
+
+    page = reader.pages[0]
+    orig_text = page.extract_text()
+
+    writer.add_page(page)
+
+    with pytest.raises(ValueError, match="owner_pwd of encrypt is deprecated."):
+        writer.encrypt(
+            owner_pwd=user_password,
+            owner_password=owner_password,
+            user_password=user_password,
+            use_128bit=use_128bit,
+        )
+    with pytest.raises(ValueError, match="'user_pwd' argument is deprecated"):
+        writer.encrypt(
+            owner_password=owner_password,
+            user_password=user_password,
+            user_pwd=user_password,
+            use_128bit=use_128bit,
+        )
+    writer.encrypt(
+        user_password=user_password,
+        owner_password=owner_password,
+        use_128bit=use_128bit,
+    )
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+    # Test that the data is not there in clear text
+    with open(pdf_file_path, "rb") as input_stream:
+        data = input_stream.read()
+    assert b"foo" not in data
+
+    # Test the user password (str):
+    reader = PdfReader(pdf_file_path, password="userpwd")
+    new_text = reader.pages[0].extract_text()
+    assert reader.metadata.get("/Producer") == "pypdf"
+    assert new_text == orig_text
+
+    # Test the owner password (str):
+    reader = PdfReader(pdf_file_path, password="ownerpwd")
+    new_text = reader.pages[0].extract_text()
+    assert reader.metadata.get("/Producer") == "pypdf"
+    assert new_text == orig_text
+
+    # Test the user password (bytes):
+    reader = PdfReader(pdf_file_path, password=b"userpwd")
+    new_text = reader.pages[0].extract_text()
+    assert reader.metadata.get("/Producer") == "pypdf"
+    assert new_text == orig_text
+
+    # Test the owner password (stbytesr):
+    reader = PdfReader(pdf_file_path, password=b"ownerpwd")
+    new_text = reader.pages[0].extract_text()
+    assert reader.metadata.get("/Producer") == "pypdf"
+    assert new_text == orig_text
+
+
+def test_add_outline_item(pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
+    writer = PdfWriter()
+
+    for page in reader.pages:
+        writer.add_page(page)
+
+    outline_item = writer.add_outline_item(
+        "An outline item",
+        1,
+        None,
+        (255, 0, 15),
+        True,
+        True,
+        Fit.fit(),
+        is_open=False,
+    )
+    _o2a = writer.add_outline_item(
+        "Another", 2, outline_item, None, False, False, Fit.fit()
+    )
+    _o2b = writer.add_outline_item(
+        "Another bis", 2, outline_item, None, False, False, Fit.fit()
+    )
+    outline_item2 = writer.add_outline_item(
+        "An outline item 2",
+        1,
+        None,
+        (255, 0, 15),
+        True,
+        True,
+        Fit.fit(),
+        is_open=True,
+    )
+    _o3a = writer.add_outline_item(
+        "Another 2", 2, outline_item2, None, False, False, Fit.fit()
+    )
+    _o3b = writer.add_outline_item(
+        "Another 2bis", 2, outline_item2, None, False, False, Fit.fit()
+    )
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "w+b") as output_stream:
+        writer.write(output_stream)
+        output_stream.seek(0)
+        reader = PdfReader(output_stream)
+        assert reader.trailer["/Root"]["/Outlines"]["/Count"] == 3
+        assert reader.outline[0]["/Count"] == -2
+        assert reader.outline[0]["/%is_open%"] == False  # noqa
+        assert reader.outline[2]["/Count"] == 2
+        assert reader.outline[2]["/%is_open%"] == True  # noqa
+        assert reader.outline[1][0]["/Count"] == 0
+
+
+def test_add_named_destination(pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
+    writer = PdfWriter()
+    assert writer.get_named_dest_root() == []
+
+    for page in reader.pages:
+        writer.add_page(page)
+
+    assert writer.get_named_dest_root() == []
+
+    writer.add_named_destination(TextStringObject("A named dest"), 2)
+    writer.add_named_destination(TextStringObject("A named dest2"), 2)
+
+    with pytest.warns(DeprecationWarning, match="pagenum is deprecated as an argument"):
+        writer.add_named_destination(TextStringObject("A named dest3"), pagenum=2)
+
+    with pytest.raises(ValueError):
+        writer.add_named_destination(
+            TextStringObject("A named dest3"), pagenum=2, page_number=2
+        )
+
+    root = writer.get_named_dest_root()
+    assert root[0] == "A named dest"
+    assert root[1].pdf == writer
+    assert root[1].get_object()["/S"] == NameObject("/GoTo")
+    assert root[1].get_object()["/D"][0] == writer.pages[2].indirect_reference
+    assert root[2] == "A named dest2"
+    assert root[3].pdf == writer
+    assert root[3].get_object()["/S"] == NameObject("/GoTo")
+    assert root[3].get_object()["/D"][0] == writer.pages[2].indirect_reference
+    assert root[4] == "A named dest3"
+
+    # test get_object
+
+    assert writer.get_object(root[1].idnum) == writer.get_object(root[1])
+    with pytest.raises(ValueError) as exc:
+        writer.get_object(reader.pages[0].indirect_reference)
+    assert exc.value.args[0] == "pdf must be self"
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_add_named_destination_sort_order(pdf_file_path):
+    """
+    Issue #1927 does not appear.
+
+    add_named_destination() maintains the named destination list sort order
+    """
+    writer = PdfWriter()
+
+    assert writer.get_named_dest_root() == []
+
+    writer.add_blank_page(200, 200)
+    writer.add_named_destination("b", 0)
+    # "a" should be moved before "b" on insert
+    writer.add_named_destination("a", 0)
+
+    root = writer.get_named_dest_root()
+
+    assert len(root) == 4
+    assert (
+        root[0] == "a"
+    ), '"a" was not inserted before "b" in the named destination root'
+    assert root[2] == "b"
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_add_uri(pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
+    writer = PdfWriter()
+
+    for page in reader.pages:
+        writer.add_page(page)
+
+    writer.add_uri(
+        1,
+        "http://www.example.com",
+        RectangleObject([0, 0, 100, 100]),
+        border=[1, 2, 3, [4]],
+    )
+    writer.add_uri(
+        2,
+        "https://pypdf.readthedocs.io/en/latest/",
+        RectangleObject([20, 30, 50, 80]),
+        border=[1, 2, 3],
+    )
+    writer.add_uri(
+        3,
+        "https://pypdf.readthedocs.io/en/latest/user/adding-pdf-annotations.html",
+        "[ 200 300 250 350 ]",
+        border=[0, 0, 0],
+    )
+    writer.add_uri(
+        3,
+        "https://pypdf.readthedocs.io/en/latest/user/adding-pdf-annotations.html",
+        [100, 200, 150, 250],
+        border=[0, 0, 0],
+    )
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_add_link(pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
+    writer = PdfWriter()
+
+    for page in reader.pages:
+        writer.add_page(page)
+
+    with pytest.raises(
+        DeprecationError,
+        match=(
+            re.escape(
+                "add_link is deprecated and was removed in pypdf 3.0.0. "
+                "Use add_annotation(pypdf.annotations.Link(...)) instead."
+            )
+        ),
+    ):
+        writer.add_link(
+            1,
+            2,
+            RectangleObject([0, 0, 100, 100]),
+            border=[1, 2, 3, [4]],
+            fit="/Fit",
+        )
+        writer.add_link(
+            2, 3, RectangleObject([20, 30, 50, 80]), [1, 2, 3], "/FitH", None
+        )
+        writer.add_link(
+            3,
+            0,
+            "[ 200 300 250 350 ]",
+            [0, 0, 0],
+            "/XYZ",
+            0,
+            0,
+            2,
+        )
+        writer.add_link(
+            3,
+            0,
+            [100, 200, 150, 250],
+            border=[0, 0, 0],
+        )
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_io_streams():
+    """This is the example from the docs ("Streaming data")."""
+    filepath = RESOURCE_ROOT / "pdflatex-outline.pdf"
+    with open(filepath, "rb") as fh:
+        bytes_stream = BytesIO(fh.read())
+
+    # Read from bytes stream
+    reader = PdfReader(bytes_stream)
+    assert len(reader.pages) == 4
+
+    # Write to bytes stream
+    writer = PdfWriter()
+    with BytesIO() as output_stream:
+        writer.write(output_stream)
+
+
+def test_regression_issue670(pdf_file_path):
+    filepath = RESOURCE_ROOT / "crazyones.pdf"
+    reader = PdfReader(filepath, strict=False)
+    for _ in range(2):
+        writer = PdfWriter()
+        writer.add_page(reader.pages[0])
+        with open(pdf_file_path, "wb") as f_pdf:
+            writer.write(f_pdf)
+
+
+def test_issue301():
+    """Test with invalid stream length object."""
+    with open(RESOURCE_ROOT / "issue-301.pdf", "rb") as f:
+        reader = PdfReader(f)
+        writer = PdfWriter()
+        writer.append_pages_from_reader(reader)
+        b = BytesIO()
+        writer.write(b)
+
+
+def test_append_pages_from_reader_append():
+    """Use append_pages_from_reader with a callable."""
+    with open(RESOURCE_ROOT / "issue-301.pdf", "rb") as f:
+        reader = PdfReader(f)
+        writer = PdfWriter()
+        writer.append_pages_from_reader(reader, callable)
+        b = BytesIO()
+        writer.write(b)
+
+
+@pytest.mark.enable_socket()
+@pytest.mark.slow()
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+def test_sweep_indirect_references_nullobject_exception(pdf_file_path):
+    # TODO: Check this more closely... this looks weird
+    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
+    name = "tika-924666.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    merger = PdfMerger()
+    merger.append(reader)
+    merger.write(pdf_file_path)
+
+
+@pytest.mark.enable_socket()
+@pytest.mark.slow()
+@pytest.mark.parametrize(
+    ("url", "name"),
+    [
+        (
+            "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf",
+            "test_sweep_indirect_references_nullobject_exception.pdf",
+        ),
+        (
+            "https://corpora.tika.apache.org/base/docs/govdocs1/922/922840.pdf",
+            "test_write_outline_item_on_page_fitv.pdf",
+        ),
+        ("https://github.com/py-pdf/pypdf/files/10715624/test.pdf", "iss1627.pdf"),
+    ],
+)
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+def test_some_appends(pdf_file_path, url, name):
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    # PdfMerger
+    merger = PdfMerger()
+    merger.append(reader)
+    merger.write(pdf_file_path)
+    # PdfWriter
+    merger = PdfWriter()
+    merger.append(reader)
+    merger.write(pdf_file_path)
+
+
+def test_pdf_header():
+    writer = PdfWriter()
+    assert writer.pdf_header == b"%PDF-1.3"
+
+    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
+    writer.add_page(reader.pages[0])
+    assert writer.pdf_header == b"%PDF-1.5"
+
+    writer.pdf_header = b"%PDF-1.6"
+    assert writer.pdf_header == b"%PDF-1.6"
+
+
+def test_write_dict_stream_object(pdf_file_path):
+    stream = (
+        b"BT "
+        b"/F0 36 Tf "
+        b"50 706 Td "
+        b"36 TL "
+        b"(The Tj operator) Tj "
+        b'1 2 (The double quote operator) " '
+        b"(The single quote operator) ' "
+        b"ET"
+    )
+
+    stream_object = StreamObject()
+    stream_object[NameObject("/Type")] = NameObject("/Text")
+    stream_object._data = stream
+
+    writer = PdfWriter()
+
+    page_object = PageObject.create_blank_page(writer, 1000, 1000)
+    # Construct dictionary object (PageObject) with stream object
+    # Writer will replace this stream object with indirect object
+    page_object[NameObject("/Test")] = stream_object
+
+    page_object = writer.add_page(page_object)
+    with open(pdf_file_path, "wb") as fp:
+        writer.write(fp)
+
+    for k, v in page_object.items():
+        if k == "/Test":
+            assert str(v) != str(stream_object)
+            assert isinstance(v, IndirectObject)
+            assert str(v.get_object()) == str(stream_object)
+            break
+    else:
+        pytest.fail("/Test not found")
+
+    # Check that every key in _idnum_hash is correct
+    objects_hash = [o.hash_value() for o in writer._objects]
+    for k, v in writer._idnum_hash.items():
+        assert v.pdf == writer
+        assert k in objects_hash, "Missing %s" % v
+
+
+def test_add_single_annotation(pdf_file_path):
+    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
+    reader = PdfReader(pdf_path)
+    page = reader.pages[0]
+    writer = PdfWriter()
+    writer.add_page(page)
+
+    annot_dict = {
+        "/Type": "/Annot",
+        "/Subtype": "/Text",
+        "/Rect": [270.75, 596.25, 294.75, 620.25],
+        "/Contents": "Note in second paragraph",
+        "/C": [1, 1, 0],
+        "/M": "D:20220406191858+02'00",
+        "/Popup": {
+            "/Type": "/Annot",
+            "/Subtype": "/Popup",
+            "/Rect": [294.75, 446.25, 494.75, 596.25],
+            "/M": "D:20220406191847+02'00",
+        },
+        "/T": "moose",
+    }
+    writer.add_annotation(0, annot_dict)
+
+    # Inspect manually by adding 'assert False' and viewing the PDF
+    with open(pdf_file_path, "wb") as fp:
+        writer.write(fp)
+
+
+def test_deprecation_bookmark_decorator():
+    reader = PdfReader(RESOURCE_ROOT / "outlines-with-invalid-destinations.pdf")
+    page = reader.pages[0]
+    outline_item = reader.outline[0]
+    writer = PdfWriter()
+    writer.add_page(page)
+    with pytest.raises(
+        DeprecationError,
+        match="bookmark is deprecated as an argument. Use outline_item instead",
+    ):
+        writer.add_outline_item_dict(bookmark=outline_item)
+
+
+@pytest.mark.samples()
+def test_colors_in_outline_item(pdf_file_path):
+    reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf")
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    purple_rgb = (0.5019607843137255, 0.0, 0.5019607843137255)
+    writer.add_outline_item("First Outline Item", page_number=2, color="800080")
+    writer.add_outline_item("Second Outline Item", page_number=3, color="#800080")
+    writer.add_outline_item("Third Outline Item", page_number=4, color=purple_rgb)
+
+    with open(pdf_file_path, "wb") as f:
+        writer.write(f)
+
+    reader2 = PdfReader(pdf_file_path)
+    for outline_item in reader2.outline:
+        # convert float to string because of mutability
+        assert [str(c) for c in outline_item.color] == [str(p) for p in purple_rgb]
+
+
+@pytest.mark.samples()
+def test_write_empty_stream():
+    reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf")
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+
+    with pytest.raises(ValueError) as exc:
+        writer.write("")
+    assert exc.value.args[0] == "Output(stream=) is empty."
+
+
+def test_startup_dest():
+    pdf_file_writer = PdfWriter()
+    pdf_file_writer.append_pages_from_reader(PdfReader(RESOURCE_ROOT / "issue-604.pdf"))
+
+    assert pdf_file_writer.open_destination is None
+    pdf_file_writer.open_destination = pdf_file_writer.pages[9]
+    # checked also using Acrobrat to verify the good page is opened
+    op = pdf_file_writer._root_object["/OpenAction"]
+    assert op[0] == pdf_file_writer.pages[9].indirect_reference
+    assert op[1] == "/Fit"
+    op = pdf_file_writer.open_destination
+    assert op.raw_get("/Page") == pdf_file_writer.pages[9].indirect_reference
+    assert op["/Type"] == "/Fit"
+    pdf_file_writer.open_destination = op
+    assert pdf_file_writer.open_destination == op
+
+    # irrelevant, just for coverage
+    pdf_file_writer._root_object[NameObject("/OpenAction")][0] = NumberObject(0)
+    pdf_file_writer.open_destination
+    with pytest.raises(Exception) as exc:
+        del pdf_file_writer._root_object[NameObject("/OpenAction")][0]
+        pdf_file_writer.open_destination
+    assert "Invalid Destination" in str(exc.value)
+
+    pdf_file_writer.open_destination = "Test"
+    # checked also using Acrobrat to verify open_destination
+    op = pdf_file_writer._root_object["/OpenAction"]
+    assert isinstance(op, TextStringObject)
+    assert op == "Test"
+    op = pdf_file_writer.open_destination
+    assert isinstance(op, TextStringObject)
+    assert op == "Test"
+
+    # irrelevant, this is just for coverage
+    pdf_file_writer._root_object[NameObject("/OpenAction")] = NumberObject(0)
+    assert pdf_file_writer.open_destination is None
+    pdf_file_writer.open_destination = None
+    assert "/OpenAction" not in pdf_file_writer._root_object
+    pdf_file_writer.open_destination = None
+
+
+@pytest.mark.enable_socket()
+def test_iss471():
+    url = "https://github.com/py-pdf/pypdf/files/9139245/book.pdf"
+    name = "book_471.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+
+    writer = PdfWriter()
+    writer.append(reader, excluded_fields=[])
+    assert isinstance(
+        writer.pages[0]["/Annots"][0].get_object()["/Dest"], TextStringObject
+    )
+
+
+@pytest.mark.enable_socket()
+def test_reset_translation():
+    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
+    name = "tika-924666.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader, (0, 10))
+    nb = len(writer._objects)
+    writer.append(reader, (0, 10))
+    assert (
+        len(writer._objects) == nb + 11
+    )  # +10 (pages) +1 because of the added outline
+    nb += 1
+    writer.reset_translation(reader)
+    writer.append(reader, (0, 10))
+    assert len(writer._objects) >= nb + 200
+    nb = len(writer._objects)
+    writer.reset_translation(reader.pages[0].indirect_reference)
+    writer.append(reader, (0, 10))
+    assert len(writer._objects) >= nb + 200
+    nb = len(writer._objects)
+    writer.reset_translation()
+    writer.append(reader, (0, 10))
+    assert len(writer._objects) >= nb + 200
+    nb = len(writer.pages)
+    writer.append(reader, [reader.pages[0], reader.pages[0]])
+    assert len(writer.pages) == nb + 2
+
+
+def test_threads_empty():
+    writer = PdfWriter()
+    thr = writer.threads
+    assert isinstance(thr, ArrayObject)
+    assert len(thr) == 0
+    thr2 = writer.threads
+    assert thr == thr2
+
+
+@pytest.mark.enable_socket()
+def test_append_without_annots_and_articles():
+    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
+    name = "tika-924666.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader, None, (0, 10), True, ["/B"])
+    writer.reset_translation()
+    writer.append(reader, (0, 10), True, ["/B"])
+    assert writer.threads == []
+    writer = PdfWriter()
+    writer.append(reader, None, (0, 10), True, ["/Annots"])
+    assert "/Annots" not in writer.pages[5]
+    writer = PdfWriter()
+    writer.append(reader, None, (0, 10), True, [])
+    assert "/Annots" in writer.pages[5]
+    assert len(writer.threads) >= 1
+
+
+@pytest.mark.enable_socket()
+def test_append_multiple():
+    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
+    name = "tika-924666.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(
+        reader, [0, 0, 0]
+    )  # to demonstre multiple insertion of same page at once
+    writer.append(reader, [0, 0, 0])  # second pack
+    pages = writer._root_object["/Pages"]["/Kids"]
+    assert pages[0] not in pages[1:]  # page not repeated
+    assert pages[-1] not in pages[0:-1]  # page not repeated
+
+
+@pytest.mark.samples()
+def test_set_page_label(pdf_file_path):
+    src = RESOURCE_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"  # File without labels
+    reader = PdfReader(src)
+
+    expected = [
+        "i",
+        "ii",
+        "1",
+        "2",
+        "A",
+        "B",
+        "1",
+        "2",
+        "3",
+        "4",
+        "A",
+        "i",
+        "I",
+        "II",
+        "1",
+        "2",
+        "3",
+        "I",
+        "II",
+    ]
+
+    # Tests full lenght with labels assigned at first and last elements
+    # Tests different labels assigned to consecutive ranges
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(0, 1, "/r")
+    writer.set_page_label(4, 5, "/A")
+    writer.set_page_label(10, 10, "/A")
+    writer.set_page_label(11, 11, "/r")
+    writer.set_page_label(12, 13, "/R")
+    writer.set_page_label(17, 18, "/R")
+    writer.write(pdf_file_path)
+    assert PdfReader(pdf_file_path).page_labels == expected
+
+    writer = PdfWriter()  # Same labels, different set order
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(17, 18, "/R")
+    writer.set_page_label(4, 5, "/A")
+    writer.set_page_label(10, 10, "/A")
+    writer.set_page_label(0, 1, "/r")
+    writer.set_page_label(12, 13, "/R")
+    writer.set_page_label(11, 11, "/r")
+    writer.write(pdf_file_path)
+    assert PdfReader(pdf_file_path).page_labels == expected
+
+    # Tests labels assigned only in the middle
+    # Tests label assigned to a range already containing labled ranges
+    expected = ["1", "2", "i", "ii", "iii", "iv", "v", "1"]
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(3, 4, "/a")
+    writer.set_page_label(5, 5, "/A")
+    writer.set_page_label(2, 6, "/r")
+    writer.write(pdf_file_path)
+    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
+
+    # Tests labels assigned inside a previously existing range
+    expected = ["1", "2", "i", "a", "b", "A", "1", "1", "2"]
+    # Ones repeat because user didnt cover the entire original range
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(2, 6, "/r")
+    writer.set_page_label(3, 4, "/a")
+    writer.set_page_label(5, 5, "/A")
+    writer.write(pdf_file_path)
+    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
+
+    # Tests invalid user input
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    with pytest.raises(
+        ValueError, match="at least one between style and prefix must be given"
+    ):
+        writer.set_page_label(0, 5, start=2)
+    with pytest.raises(
+        ValueError, match="page_index_from must be equal or greater then 0"
+    ):
+        writer.set_page_label(-1, 5, "/r")
+    with pytest.raises(
+        ValueError, match="page_index_to must be equal or greater then page_index_from"
+    ):
+        writer.set_page_label(5, 0, "/r")
+    with pytest.raises(ValueError, match="page_index_to exceeds number of pages"):
+        writer.set_page_label(0, 19, "/r")
+    with pytest.raises(
+        ValueError, match="if given, start must be equal or greater than one"
+    ):
+        writer.set_page_label(0, 5, "/r", start=-1)
+
+    pdf_file_path.unlink()
+
+    src = (
+        SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf"
+    )  # File with pre existing labels
+    reader = PdfReader(src)
+
+    # Tests adding labels to existing ones
+    expected = ["i", "ii", "A", "B", "1"]
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(2, 3, "/A")
+    writer.write(pdf_file_path)
+    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
+
+    # Tests replacing existing lables
+    expected = ["A", "B", "1", "1", "2"]
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(0, 1, "/A")
+    writer.write(pdf_file_path)
+    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
+
+    pdf_file_path.unlink()
+
+    # Tests prefix and start.
+    src = RESOURCE_ROOT / "issue-604.pdf"  # File without page labels
+    reader = PdfReader(src)
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+
+    writer.set_page_label(0, 0, prefix="FRONT")
+    writer.set_page_label(1, 2, "/D", start=2)
+    writer.set_page_label(3, 6, prefix="UPDATES")
+    writer.set_page_label(7, 10, "/D", prefix="THYR-")
+    writer.set_page_label(11, 21, "/D", prefix="PAP-")
+    writer.set_page_label(22, 30, "/D", prefix="FOLL-")
+    writer.set_page_label(31, 39, "/D", prefix="HURT-")
+    writer.write(pdf_file_path)
+
+
+@pytest.mark.enable_socket()
+def test_iss1601():
+    url = "https://github.com/py-pdf/pypdf/files/10579503/badges-38.pdf"
+    name = "badge-38.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    original_cs_operations = ContentStream(
+        reader.pages[0].get_contents(), reader
+    ).operations
+    writer = PdfWriter()
+    page_1 = writer.add_blank_page(
+        reader.pages[0].mediabox[2], reader.pages[0].mediabox[3]
+    )
+    page_1.merge_transformed_page(reader.pages[0], Transformation())
+    page_1_cs_operations = page_1.get_contents().operations
+    assert is_sublist(original_cs_operations, page_1_cs_operations)
+    page_1 = writer.add_blank_page(
+        reader.pages[0].mediabox[2], reader.pages[0].mediabox[3]
+    )
+    page_1.merge_page(reader.pages[0])
+    page_1_cs_operations = page_1.get_contents().operations
+    assert is_sublist(original_cs_operations, page_1_cs_operations)
+
+
+def test_attachments():
+    writer = PdfWriter()
+    writer.add_blank_page(100, 100)
+    b = BytesIO()
+    writer.write(b)
+    b.seek(0)
+    reader = PdfReader(b)
+    b = None
+    assert reader.attachments == {}
+    assert reader._list_attachments() == []
+    assert reader._get_attachments() == {}
+    to_add = [
+        ("foobar.txt", b"foobarcontent"),
+        ("foobar2.txt", b"foobarcontent2"),
+        ("foobar2.txt", b"2nd_foobarcontent"),
+    ]
+    for name, content in to_add:
+        writer.add_attachment(name, content)
+
+    b = BytesIO()
+    writer.write(b)
+    b.seek(0)
+    reader = PdfReader(b)
+    b = None
+    assert sorted(reader.attachments.keys()) == sorted({name for name, _ in to_add})
+    assert reader.attachments == {
+        "foobar.txt": [b"foobarcontent"],
+        "foobar2.txt": [b"foobarcontent2", b"2nd_foobarcontent"],
+    }
+    assert reader._list_attachments() == [name for name, _ in to_add]
+
+    # We've added the same key twice - hence only 2 and not 3:
+    att = reader._get_attachments()
+    assert len(att) == 2  # we have 2 keys, but 3 attachments!
+
+    # The content for foobar.txt is clear and just a single value:
+    assert att["foobar.txt"] == b"foobarcontent"
+
+    # The content for foobar2.txt is a list!
+    att = reader._get_attachments("foobar2.txt")
+    assert len(att) == 1
+    assert att["foobar2.txt"] == [b"foobarcontent2", b"2nd_foobarcontent"]
+
+    # Let's do both cases with the public interface:
+    assert reader.attachments["foobar.txt"][0] == b"foobarcontent"
+    assert reader.attachments["foobar2.txt"][0] == b"foobarcontent2"
+    assert reader.attachments["foobar2.txt"][1] == b"2nd_foobarcontent"
+
+
+@pytest.mark.enable_socket()
+def test_iss1614():
+    # test of an annotation(link) directly stored in the /Annots in the page
+    url = "https://github.com/py-pdf/pypdf/files/10669995/broke.pdf"
+    name = "iss1614.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader)
+    # test for 2nd error case reported in #1614
+    url = "https://github.com/py-pdf/pypdf/files/10696390/broken.pdf"
+    name = "iss1614.2.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer.append(reader)
+
+
+@pytest.mark.enable_socket()
+def test_new_removes():
+    # test of an annotation(link) directly stored in the /Annots in the page
+    url = "https://github.com/py-pdf/pypdf/files/10807951/tt.pdf"
+    name = "iss1650.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.remove_images()
+    b = BytesIO()
+    writer.write(b)
+    bb = bytes(b.getbuffer())
+    assert b"/Im0 Do" not in bb
+    assert b"/Fm0 Do" in bb
+    assert b" TJ" in bb
+
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.remove_text()
+    b = BytesIO()
+    writer.write(b)
+    bb = bytes(b.getbuffer())
+    assert b"/Im0" in bb
+    assert b"Chap" not in bb
+    assert b" TJ" not in bb
+
+    url = "https://github.com/py-pdf/pypdf/files/10832029/tt2.pdf"
+    name = "GeoBaseWithComments.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer.append(reader)
+    writer.remove_objects_from_page(writer.pages[0], [ObjectDeletionFlag.LINKS])
+    assert "/Links" not in [
+        a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]
+    ]
+    writer.remove_objects_from_page(writer.pages[0], ObjectDeletionFlag.ATTACHMENTS)
+    assert "/FileAttachment" not in [
+        a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]
+    ]
+
+    writer.pages[0]["/Annots"].append(
+        DictionaryObject({NameObject("/Subtype"): TextStringObject("/3D")})
+    )
+    assert "/3D" in [a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]]
+    writer.remove_objects_from_page(writer.pages[0], ObjectDeletionFlag.OBJECTS_3D)
+    assert "/3D" not in [a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]]
+
+    writer.remove_links()
+    assert len(writer.pages[0]["/Annots"]) == 0
+    assert len(writer.pages[3]["/Annots"]) == 0
+
+    writer.remove_annotations("/Text")
+
+
+@pytest.mark.enable_socket()
+def test_late_iss1654():
+    url = "https://github.com/py-pdf/pypdf/files/10935632/bid1.pdf"
+    name = "bid1.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    for p in writer.pages:
+        p.compress_content_streams()
+    b = BytesIO()
+    writer.write(b)
+
+
+@pytest.mark.enable_socket()
+def test_iss1723():
+    # test of an annotation(link) directly stored in the /Annots in the page
+    url = "https://github.com/py-pdf/pypdf/files/11015242/inputFile.pdf"
+    name = "iss1723.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader, (3, 5))
+
+
+@pytest.mark.enable_socket()
+def test_iss1767():
+    # test with a pdf which is buggy because the object 389,0 exists 3 times:
+    # twice to define catalog and one as an XObject inducing a loop when
+    # cloning
+    url = "https://github.com/py-pdf/pypdf/files/11138472/test.pdf"
+    name = "iss1723.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    PdfWriter(clone_from=reader)
+
+
+@pytest.mark.enable_socket()
+def test_named_dest_page_number():
+    """
+    Closes iss471
+    tests appending with named destinations as integers
+    """
+    url = "https://github.com/py-pdf/pypdf/files/10704333/central.pdf"
+    name = "central.pdf"
+    writer = PdfWriter()
+    writer.add_blank_page(100, 100)
+    writer.append(BytesIO(get_data_from_url(url, name=name)), pages=[0, 1, 2])
+    assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 2
+    assert writer._root_object["/Names"]["/Dests"]["/Names"][-1][0] == (1 + 1)
+    writer.append(BytesIO(get_data_from_url(url, name=name)))
+    assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 6
+    writer2 = PdfWriter()
+    writer2.add_blank_page(100, 100)
+    dest = writer2.add_named_destination("toto", 0)
+    dest.get_object()[NameObject("/D")][0] = NullObject()
+    b = BytesIO()
+    writer2.write(b)
+    b.seek(0)
+    writer.append(b)
+    assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 6
+
+
+@pytest.mark.parametrize(
+    ("write_data_here", "needs_cleanup"),
+    [
+        (
+            "dont_commit_writer.pdf",
+            True,
+        )
+    ],
+)
+def test_update_form_fields(write_data_here, needs_cleanup):
+    writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf")
+    writer.update_page_form_field_values(
+        writer.pages[0],
+        {
+            "CheckBox1": "/Yes",
+            "Text1": "mon Text1",
+            "Text2": "ligne1\nligne2",
+            "RadioGroup1": "/2",
+            "RdoS1": "/",
+            "Combo1": "!!monCombo!!",
+            "Liste1": "Liste2",
+            "Liste2": ["Lst1", "Lst3"],
+            "DropList1": "DropListe3",
+        },
+        auto_regenerate=False,
+    )
+    del writer.pages[0]["/Annots"][1].get_object()["/AP"]["/N"]
+    writer.update_page_form_field_values(
+        writer.pages[0],
+        {"Text1": "my Text1", "Text2": "ligne1\nligne2\nligne3"},
+        auto_regenerate=False,
+    )
+
+    writer.write("dont_commit_writer.pdf")
+    reader = PdfReader("dont_commit_writer.pdf")
+    flds = reader.get_fields()
+    assert flds["CheckBox1"]["/V"] == "/Yes"
+    assert flds["CheckBox1"].indirect_reference.get_object()["/AS"] == "/Yes"
+    assert (
+        b"(my Text1)"
+        in flds["Text1"].indirect_reference.get_object()["/AP"]["/N"].get_data()
+    )
+    assert flds["Text2"]["/V"] == "ligne1\nligne2\nligne3"
+    assert (
+        b"(ligne3)"
+        in flds["Text2"].indirect_reference.get_object()["/AP"]["/N"].get_data()
+    )
+    assert flds["RadioGroup1"]["/V"] == "/2"
+    assert flds["RadioGroup1"]["/Kids"][0].get_object()["/AS"] == "/Off"
+    assert flds["RadioGroup1"]["/Kids"][1].get_object()["/AS"] == "/2"
+    assert all(x in flds["Liste2"]["/V"] for x in ["Lst1", "Lst3"])
+
+    assert all(x in flds["CheckBox1"]["/_States_"] for x in ["/Off", "/Yes"])
+    assert all(x in flds["RadioGroup1"]["/_States_"] for x in ["/1", "/2", "/3"])
+    assert all(x in flds["Liste1"]["/_States_"] for x in ["Liste1", "Liste2", "Liste3"])
+
+    if needs_cleanup:
+        Path(write_data_here).unlink()
+
+
+@pytest.mark.enable_socket()
+def test_iss1862():
+    # The file here has "/B" entry to define the font in a object below the page
+    # The excluded field shall be considered only at first level (page) and not
+    # below
+    url = "https://github.com/py-pdf/pypdf/files/11708801/intro.pdf"
+    name = "iss1862.pdf"
+    writer = PdfWriter()
+    writer.append(BytesIO(get_data_from_url(url, name=name)))
+    # check that "/B" is in the font
+    writer.pages[0]["/Resources"]["/Font"]["/F1"]["/CharProcs"]["/B"].get_data()
+
+
+def test_empty_objects_before_cloning():
+    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
+    reader = PdfReader(pdf_path)
+    writer = PdfWriter(clone_from=reader)
+    nb_obj_reader = len(reader.xref_objStm) + sum(
+        len(reader.xref[i]) for i in reader.xref
+    )
+    nb_obj_reader -= 1  # for trailer
+    nb_obj_reader -= len(
+        {x: 1 for x, y in reader.xref_objStm.values()}
+    )  # to remove object streams
+    assert len(writer._objects) == nb_obj_reader
+
+
+@pytest.mark.enable_socket()
+def test_watermark():
+    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
+    name = "bgwatermark.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    url = "https://github.com/py-pdf/pypdf/files/11985888/source.pdf"
+    name = "srcwatermark.pdf"
+    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
+    for p in writer.pages:
+        p.merge_page(reader.pages[0], over=False)
+
+    assert isinstance(p["/Contents"], ArrayObject)
+    assert isinstance(p["/Contents"][0], IndirectObject)
+
+    b = BytesIO()
+    writer.write(b)
+    assert len(b.getvalue()) < 2.1 * 1024 * 1024
+
+
+@pytest.mark.enable_socket()
+@pytest.mark.timeout(4)  # this was a lot slower before PR #2086
+def test_watermarking_speed():
+    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
+    name = "bgwatermark.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    url = "https://arxiv.org/pdf/2201.00214.pdf"
+    name = "2201.00214.pdf"
+    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
+    for p in writer.pages:
+        p.merge_page(reader.pages[0], over=False)
+    out_pdf_bytesio = BytesIO()
+    writer.write(out_pdf_bytesio)
+    pdf_size_in_mib = len(out_pdf_bytesio.getvalue()) / 1024 / 1024
+    assert pdf_size_in_mib < 20
+
+
+@pytest.mark.enable_socket()
+@pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript")
+def test_watermark_rendering(tmp_path):
+    """Ensure the visual appearance of watermarking stays correct."""
+    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
+    name = "bgwatermark.pdf"
+    watermark = PdfReader(BytesIO(get_data_from_url(url, name=name))).pages[0]
+    url = "https://github.com/py-pdf/pypdf/files/11985888/source.pdf"
+    name = "srcwatermark.pdf"
+    page = PdfReader(BytesIO(get_data_from_url(url, name=name))).pages[0]
+    writer = PdfWriter()
+    page.merge_page(watermark, over=False)
+    writer.add_page(page)
+
+    target_png_path = tmp_path / "target.png"
+    url = "https://github.com/py-pdf/pypdf/assets/96178532/d5c72d0e-7047-4504-bbf6-bc591c80d7c0"
+    name = "dstwatermark.png"
+    target_png_path.write_bytes(get_data_from_url(url, name=name))
+
+    pdf_path = tmp_path / "out.pdf"
+    png_path = tmp_path / "out.png"
     writer.write(pdf_path)
 
     # False positive: https://github.com/PyCQA/bandit/issues/333
@@ -1610,205 +1613,205 @@ def test_watermark_rendering(tmp_path):
     assert png_path.is_file()
     assert image_similarity(png_path, target_png_path) >= 0.95
 
-
-@pytest.mark.enable_socket()
-def test_da_missing_in_annot():
-    url = "https://github.com/py-pdf/pypdf/files/12136285/Building.Division.Permit.Application.pdf"
-    name = "BuildingDivisionPermitApplication.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter(clone_from=reader)
-    writer.update_page_form_field_values(
-        writer.pages[0], {"PCN-1": "0"}, auto_regenerate=False
-    )
-    b = BytesIO()
-    writer.write(b)
-    reader = PdfReader(BytesIO(b.getvalue()))
-    ff = reader.get_fields()
-    # check for autosize processing
-    assert (
-        b"0 Tf"
-        not in ff["PCN-1"].indirect_reference.get_object()["/AP"]["/N"].get_data()
-    )
-    f2 = writer.get_object(ff["PCN-2"].indirect_reference.idnum)
-    f2[NameObject("/Parent")] = writer.get_object(
-        ff["PCN-1"].indirect_reference.idnum
-    ).indirect_reference
-    writer.update_page_form_field_values(
-        writer.pages[0], {"PCN-2": "1"}, auto_regenerate=False
-    )
-
-
-def test_missing_fields(pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
-
-    writer = PdfWriter()
-    writer.add_page(reader.pages[0])
-
-    with pytest.raises(PyPdfError) as exc:
-        writer.update_page_form_field_values(
-            writer.pages[0], {"foo": "some filled in text"}, flags=1
-        )
-    assert exc.value.args[0] == "No /AcroForm dictionary in PdfWriter Object"
-
-    writer = PdfWriter()
-    writer.append(reader, [0])
-    del writer._root_object["/AcroForm"]["/Fields"]
-    with pytest.raises(PyPdfError) as exc:
-        writer.update_page_form_field_values(
-            writer.pages[0], {"foo": "some filled in text"}, flags=1
-        )
-    assert exc.value.args[0] == "No /Fields dictionary in Pdf in PdfWriter Object"
-
-
-def test_missing_info():
-    reader = PdfReader(RESOURCE_ROOT / "missing_info.pdf")
-
-    writer = PdfWriter(clone_from=reader)
-    assert len(writer.pages) == len(reader.pages)
-
-
-@pytest.mark.enable_socket()
-def test_germanfields():
-    """Cf #2035"""
-    url = "https://github.com/py-pdf/pypdf/files/12194195/test.pdf"
-    name = "germanfields.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter(clone_from=reader)
-    form_fields = {"Text Box 1": "test æ ø å"}
-    writer.update_page_form_field_values(
-        writer.pages[0], form_fields, auto_regenerate=False
-    )
-    bytes_stream = BytesIO()
-    writer.write(bytes_stream)
-    bytes_stream.seek(0)
-    reader2 = PdfReader(bytes_stream)
-    assert (
-        b"test \xe6 \xf8 \xe5"
-        in reader2.get_fields()["Text Box 1"]
-        .indirect_reference.get_object()["/AP"]["/N"]
-        .get_data()
-    )
-
-
-@pytest.mark.enable_socket()
-def test_no_t_in_articles():
-    """Cf #2078"""
-    url = "https://github.com/py-pdf/pypdf/files/12311735/bad.pdf"
-    name = "iss2078.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader)
-
-
-@pytest.mark.enable_socket()
-def test_no_i_in_articles():
-    """Cf #2089"""
-    url = "https://github.com/py-pdf/pypdf/files/12352793/kim2002.pdf"
-    name = "iss2089.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader)
-
-
-@pytest.mark.enable_socket()
-def test_damaged_pdf_length_returning_none():
-    """
-    Cf #140
-    https://github.com/py-pdf/pypdf/issues/140#issuecomment-1685380549
-    """
-    url = "https://github.com/py-pdf/pypdf/files/12168578/bad_pdf_example.pdf"
-    name = "iss140_bad_pdf.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader)
-
-
-@pytest.mark.enable_socket()
-def test_viewerpreferences():
-    """
-    Add Tests for ViewerPreferences
-    https://github.com/py-pdf/pypdf/issues/140#issuecomment-1685380549
-    """
-    url = "https://github.com/py-pdf/pypdf/files/9175966/2015._pb_decode_pg0.pdf"
-    name = "2015._pb_decode_pg0.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    v = reader.viewer_preferences
-    assert v.center_window == True  # noqa: E712
-    writer = PdfWriter(clone_from=reader)
-    v = writer.viewer_preferences
-    assert v.center_window == True  # noqa: E712
-    v.center_window = False
-    assert (
-        writer._root_object["/ViewerPreferences"]["/CenterWindow"]
-        == False  # noqa: E712
-    )
-    assert v.print_area == "/CropBox"
-    with pytest.raises(ValueError):
-        v.non_fullscreen_pagemode = "toto"
-    with pytest.raises(ValueError):
-        v.non_fullscreen_pagemode = "/toto"
-    v.non_fullscreen_pagemode = "/UseOutlines"
-    assert (
-        writer._root_object["/ViewerPreferences"]["/NonFullScreenPageMode"]
-        == "/UseOutlines"
-    )
-    writer = PdfWriter(clone_from=reader)
-    v = writer.viewer_preferences
-    assert v.center_window == True  # noqa: E712
-    v.center_window = False
-    assert (
-        writer._root_object["/ViewerPreferences"]["/CenterWindow"]
-        == False  # noqa: E712
-    )
-
-    writer = PdfWriter(clone_from=reader)
-    writer._root_object[NameObject("/ViewerPreferences")] = writer._add_object(
-        writer._root_object["/ViewerPreferences"]
-    )
-    v = writer.viewer_preferences
-    v.center_window = False
-    assert (
-        writer._root_object["/ViewerPreferences"]["/CenterWindow"]
-        == False  # noqa: E712
-    )
-    v.num_copies = 1
-    assert v.num_copies == 1
-    assert v.print_pagerange is None
-    with pytest.raises(ValueError):
-        v.print_pagerange = "toto"
-    v.print_pagerange = ArrayObject()
-    assert len(v.print_pagerange) == 0
-
-    writer.create_viewer_preference()
-    assert len(writer._root_object["/ViewerPreferences"]) == 0
-
-    del reader.trailer["/Root"]["/ViewerPreferences"]
-    assert reader.viewer_preferences is None
-    writer = PdfWriter(clone_from=reader)
-    assert writer.viewer_preferences is None
-
-
-def test_extra_spaces_in_da_text(caplog):
-    writer = PdfWriter(clone_from=RESOURCE_ROOT / "form.pdf")
-    t = writer.pages[0]["/Annots"][0].get_object()["/DA"]
-    t = t.replace("/Helv", "/Helv   ")
-    writer.pages[0]["/Annots"][0].get_object()[NameObject("/DA")] = TextStringObject(t)
-    writer.update_page_form_field_values(
-        writer.pages[0], {"foo": "abcd"}, auto_regenerate=False
-    )
-    t = writer.pages[0]["/Annots"][0].get_object()["/AP"]["/N"].get_data()
-    assert "Font dictionary for  not found." not in caplog.text
-    assert b"/Helv" in t
-    assert b"(abcd)" in t
-
-
-@pytest.mark.enable_socket()
-def test_object_contains_indirect_reference_to_self():
-    url = "https://github.com/py-pdf/pypdf/files/12389243/testbook.pdf"
-    name = "iss2102.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    width, height = 595, 841
-    outpage = writer.add_blank_page(width, height)
-    outpage.merge_page(reader.pages[6])
-    writer.append(reader)
+
+@pytest.mark.enable_socket()
+def test_da_missing_in_annot():
+    url = "https://github.com/py-pdf/pypdf/files/12136285/Building.Division.Permit.Application.pdf"
+    name = "BuildingDivisionPermitApplication.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter(clone_from=reader)
+    writer.update_page_form_field_values(
+        writer.pages[0], {"PCN-1": "0"}, auto_regenerate=False
+    )
+    b = BytesIO()
+    writer.write(b)
+    reader = PdfReader(BytesIO(b.getvalue()))
+    ff = reader.get_fields()
+    # check for autosize processing
+    assert (
+        b"0 Tf"
+        not in ff["PCN-1"].indirect_reference.get_object()["/AP"]["/N"].get_data()
+    )
+    f2 = writer.get_object(ff["PCN-2"].indirect_reference.idnum)
+    f2[NameObject("/Parent")] = writer.get_object(
+        ff["PCN-1"].indirect_reference.idnum
+    ).indirect_reference
+    writer.update_page_form_field_values(
+        writer.pages[0], {"PCN-2": "1"}, auto_regenerate=False
+    )
+
+
+def test_missing_fields(pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
+
+    writer = PdfWriter()
+    writer.add_page(reader.pages[0])
+
+    with pytest.raises(PyPdfError) as exc:
+        writer.update_page_form_field_values(
+            writer.pages[0], {"foo": "some filled in text"}, flags=1
+        )
+    assert exc.value.args[0] == "No /AcroForm dictionary in PdfWriter Object"
+
+    writer = PdfWriter()
+    writer.append(reader, [0])
+    del writer._root_object["/AcroForm"]["/Fields"]
+    with pytest.raises(PyPdfError) as exc:
+        writer.update_page_form_field_values(
+            writer.pages[0], {"foo": "some filled in text"}, flags=1
+        )
+    assert exc.value.args[0] == "No /Fields dictionary in Pdf in PdfWriter Object"
+
+
+def test_missing_info():
+    reader = PdfReader(RESOURCE_ROOT / "missing_info.pdf")
+
+    writer = PdfWriter(clone_from=reader)
+    assert len(writer.pages) == len(reader.pages)
+
+
+@pytest.mark.enable_socket()
+def test_germanfields():
+    """Cf #2035"""
+    url = "https://github.com/py-pdf/pypdf/files/12194195/test.pdf"
+    name = "germanfields.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter(clone_from=reader)
+    form_fields = {"Text Box 1": "test æ ø å"}
+    writer.update_page_form_field_values(
+        writer.pages[0], form_fields, auto_regenerate=False
+    )
+    bytes_stream = BytesIO()
+    writer.write(bytes_stream)
+    bytes_stream.seek(0)
+    reader2 = PdfReader(bytes_stream)
+    assert (
+        b"test \xe6 \xf8 \xe5"
+        in reader2.get_fields()["Text Box 1"]
+        .indirect_reference.get_object()["/AP"]["/N"]
+        .get_data()
+    )
+
+
+@pytest.mark.enable_socket()
+def test_no_t_in_articles():
+    """Cf #2078"""
+    url = "https://github.com/py-pdf/pypdf/files/12311735/bad.pdf"
+    name = "iss2078.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader)
+
+
+@pytest.mark.enable_socket()
+def test_no_i_in_articles():
+    """Cf #2089"""
+    url = "https://github.com/py-pdf/pypdf/files/12352793/kim2002.pdf"
+    name = "iss2089.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader)
+
+
+@pytest.mark.enable_socket()
+def test_damaged_pdf_length_returning_none():
+    """
+    Cf #140
+    https://github.com/py-pdf/pypdf/issues/140#issuecomment-1685380549
+    """
+    url = "https://github.com/py-pdf/pypdf/files/12168578/bad_pdf_example.pdf"
+    name = "iss140_bad_pdf.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader)
+
+
+@pytest.mark.enable_socket()
+def test_viewerpreferences():
+    """
+    Add Tests for ViewerPreferences
+    https://github.com/py-pdf/pypdf/issues/140#issuecomment-1685380549
+    """
+    url = "https://github.com/py-pdf/pypdf/files/9175966/2015._pb_decode_pg0.pdf"
+    name = "2015._pb_decode_pg0.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    v = reader.viewer_preferences
+    assert v.center_window == True  # noqa: E712
+    writer = PdfWriter(clone_from=reader)
+    v = writer.viewer_preferences
+    assert v.center_window == True  # noqa: E712
+    v.center_window = False
+    assert (
+        writer._root_object["/ViewerPreferences"]["/CenterWindow"]
+        == False  # noqa: E712
+    )
+    assert v.print_area == "/CropBox"
+    with pytest.raises(ValueError):
+        v.non_fullscreen_pagemode = "toto"
+    with pytest.raises(ValueError):
+        v.non_fullscreen_pagemode = "/toto"
+    v.non_fullscreen_pagemode = "/UseOutlines"
+    assert (
+        writer._root_object["/ViewerPreferences"]["/NonFullScreenPageMode"]
+        == "/UseOutlines"
+    )
+    writer = PdfWriter(clone_from=reader)
+    v = writer.viewer_preferences
+    assert v.center_window == True  # noqa: E712
+    v.center_window = False
+    assert (
+        writer._root_object["/ViewerPreferences"]["/CenterWindow"]
+        == False  # noqa: E712
+    )
+
+    writer = PdfWriter(clone_from=reader)
+    writer._root_object[NameObject("/ViewerPreferences")] = writer._add_object(
+        writer._root_object["/ViewerPreferences"]
+    )
+    v = writer.viewer_preferences
+    v.center_window = False
+    assert (
+        writer._root_object["/ViewerPreferences"]["/CenterWindow"]
+        == False  # noqa: E712
+    )
+    v.num_copies = 1
+    assert v.num_copies == 1
+    assert v.print_pagerange is None
+    with pytest.raises(ValueError):
+        v.print_pagerange = "toto"
+    v.print_pagerange = ArrayObject()
+    assert len(v.print_pagerange) == 0
+
+    writer.create_viewer_preference()
+    assert len(writer._root_object["/ViewerPreferences"]) == 0
+
+    del reader.trailer["/Root"]["/ViewerPreferences"]
+    assert reader.viewer_preferences is None
+    writer = PdfWriter(clone_from=reader)
+    assert writer.viewer_preferences is None
+
+
+def test_extra_spaces_in_da_text(caplog):
+    writer = PdfWriter(clone_from=RESOURCE_ROOT / "form.pdf")
+    t = writer.pages[0]["/Annots"][0].get_object()["/DA"]
+    t = t.replace("/Helv", "/Helv   ")
+    writer.pages[0]["/Annots"][0].get_object()[NameObject("/DA")] = TextStringObject(t)
+    writer.update_page_form_field_values(
+        writer.pages[0], {"foo": "abcd"}, auto_regenerate=False
+    )
+    t = writer.pages[0]["/Annots"][0].get_object()["/AP"]["/N"].get_data()
+    assert "Font dictionary for  not found." not in caplog.text
+    assert b"/Helv" in t
+    assert b"(abcd)" in t
+
+
+@pytest.mark.enable_socket()
+def test_object_contains_indirect_reference_to_self():
+    url = "https://github.com/py-pdf/pypdf/files/12389243/testbook.pdf"
+    name = "iss2102.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    width, height = 595, 841
+    outpage = writer.add_blank_page(width, height)
+    outpage.merge_page(reader.pages[6])
+    writer.append(reader)

From 97026c560c27b93001daf188b1ad9249735a553d Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sat, 16 Sep 2023 13:26:52 +0200
Subject: [PATCH 02/13] fix

---
 pypdf/_reader.py                  | 12 ++-------
 pypdf/_writer.py                  | 45 +++++++++++++++++++++++++++++++
 pypdf/generic/_data_structures.py | 17 ++++++------
 3 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/pypdf/_reader.py b/pypdf/_reader.py
index 01529ec47..dcc324ed5 100644
--- a/pypdf/_reader.py
+++ b/pypdf/_reader.py
@@ -2230,21 +2230,13 @@ def _get_embedded_files_root(self) -> Optional[NameTree]:
         return NameTree(efo)
 
     @property
-    def detailed_embedded_files(self) -> Optional[Mapping[str, PdfObject]]:
+    def embedded_files(self) -> Optional[Mapping[str, List[PdfObject]]]:
         ef = self._get_embedded_files_root()
         if ef:
             return ef.list_items()
         else:
             return None
 
-    @property
-    def embedded_files(self) -> Optional[Mapping[str, List[bytes]]]:
-        ef = self._get_embedded_files_root()
-        if ef:
-            return {k: v["/EF"]["/F"].get_data() for k, v in ef.list_items().items()}  # type: ignore
-        else:
-            return None
-
     @property
     def attachments(self) -> Mapping[str, List[bytes]]:
         ef = self._get_embedded_files_root()
@@ -2252,7 +2244,7 @@ def attachments(self) -> Mapping[str, List[bytes]]:
             d = {}
             for k, v in ef.list_items().items():
                 if isinstance(v, list):
-                    d[k] = [e["/EF"]["/F"].get_data() for e in v]
+                    d[k] = [e["/EF"]["/F"].get_data() for e in v]  # type: ignore
             return d
         else:
             return {}
diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index 1d70bba50..befe617d0 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -46,6 +46,7 @@
     Dict,
     Iterable,
     List,
+    Mapping,
     Optional,
     Pattern,
     Tuple,
@@ -104,6 +105,7 @@
     FloatObject,
     IndirectObject,
     NameObject,
+    NameTree,
     NullObject,
     NumberObject,
     PdfObject,
@@ -699,6 +701,49 @@ def addJS(self, javascript: str) -> None:  # deprecated
         deprecation_with_replacement("addJS", "add_js", "3.0.0")
         return self.add_js(javascript)
 
+    def _get_embedded_files_root(self) -> Optional[NameTree]:
+        """
+        Returns the EmbeddedFiles root as a NameTree Object
+        if the root does not exists, return None
+        """
+        catalog = self._root_object
+        if "/Names" not in catalog:
+            return None
+        ef = cast(DictionaryObject, catalog["/Names"]).get("/EmbeddedFiles", None)
+        if ef is None:
+            return None
+        efo = ef.get_object()
+        # not for reader
+        """
+            if not isinstance(efo,NameTree):
+            if isinstance(ef,IndirectObject):
+                ef.replace_object(efo)
+            else:
+                cast(DictionaryObject,catalog["/Names"])[
+                    NameObject("/EmbeddedFiles")] = NameTree(efo)
+        """
+        return NameTree(efo)
+
+    @property
+    def embedded_files(self) -> Optional[Mapping[str, List[PdfObject]]]:
+        ef = self._get_embedded_files_root()
+        if ef:
+            return ef.list_items()
+        else:
+            return None
+
+    @property
+    def attachments(self) -> Mapping[str, List[bytes]]:
+        ef = self._get_embedded_files_root()
+        if ef:
+            d = {}
+            for k, v in ef.list_items().items():
+                if isinstance(v, list):
+                    d[k] = [e["/EF"]["/F"].get_data() for e in v]  # type: ignore
+            return d
+        else:
+            return {}
+
     def add_attachment(self, filename: str, data: Union[str, bytes]) -> None:
         """
         Embed a file inside the PDF.
diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 59e28250a..dd14945fa 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -38,6 +38,7 @@
     Dict,
     Iterable,
     List,
+    Mapping,
     Optional,
     Sequence,
     Tuple,
@@ -1490,7 +1491,7 @@ def _list(o: Optional[PdfObject]) -> List[str]:
         _l.sort()
         return _l
 
-    def list_items(self) -> dict[str, PdfObject]:
+    def list_items(self) -> Mapping[str, List[PdfObject]]:
         """
         Provides the Name Tree Entries as a dictionary
 
@@ -1499,8 +1500,8 @@ def list_items(self) -> dict[str, PdfObject]:
         """
 
         def _list(
-            o: Optional[PdfObject], lout: List[Tuple[str, PdfObject]]
-        ) -> List[Tuple[str, PdfObject]]:
+            o: Optional[PdfObject], lout: List[Tuple[str, List[PdfObject]]]
+        ) -> List[Tuple[str, List[PdfObject]]]:
             def _append_with_dup(
                 ll: List[Tuple[str, Any]], _l: List[Tuple[str, Any]]
             ) -> None:
@@ -1530,7 +1531,7 @@ def _append_with_dup(
                 _list(x.get_object(), lout)
             return lout
 
-        _l: List[Tuple[str, PdfObject]] = []
+        _l: List[Tuple[str, List[PdfObject]]] = []
         _list(self, _l)
         return dict(_l)
 
@@ -1564,7 +1565,7 @@ def _get(key: str, o: Optional[PdfObject]) -> List[PdfObject]:
 
         return _get(key, self)
 
-    def list_set(
+    def list_add(
         self, key: str, data: PdfObject, overwrite: bool = False
     ) -> Optional[IndirectObject]:
         """
@@ -1602,7 +1603,7 @@ def _update_limits(
                 return True
             return False
 
-        def _set_in(o: Optional[PdfObject], app: bool = True) -> Optional[PdfObject]:
+        def _add_in(o: Optional[PdfObject], app: bool = True) -> Optional[PdfObject]:
             nonlocal overwrite, writer, key, data
             if o is None:
                 return None
@@ -1641,13 +1642,13 @@ def _set_in(o: Optional[PdfObject], app: bool = True) -> Optional[PdfObject]:
             else:  # kids
                 ar = cast(ArrayObject, o["/Kids"])
                 for x in ar:
-                    r = _set_in(x, x == ar[-1])
+                    r = _add_in(x, x == ar[-1])
                     if r:
                         _update_limits(o, key, key)
                         return r
                 return None
 
-        o = _set_in(self, True)
+        o = _add_in(self, True)
         return o.indirect_reference if o is not None else None
 
 

From 1124824b547dbc6d09a3d4d4bf1297b7840cae60 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 17 Sep 2023 23:03:35 +0200
Subject: [PATCH 03/13] add_attachements and rf

---
 pypdf/_reader.py                  | 4647 +++++++++++++++--------------
 pypdf/_writer.py                  |  137 +-
 pypdf/constants.py                |    5 +-
 pypdf/generic/_data_structures.py |   29 +-
 4 files changed, 2439 insertions(+), 2379 deletions(-)

diff --git a/pypdf/_reader.py b/pypdf/_reader.py
index dcc324ed5..8bd9e2454 100644
--- a/pypdf/_reader.py
+++ b/pypdf/_reader.py
@@ -1,2316 +1,2331 @@
-# Copyright (c) 2006, Mathieu Fenniak
-# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
-#
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-# * The name of the author may not be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-import os
-import re
-import struct
-import zlib
-from datetime import datetime
-from io import BytesIO, UnsupportedOperation
-from pathlib import Path
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    Iterable,
-    List,
-    Mapping,
-    Optional,
-    Tuple,
-    Union,
-    cast,
-)
-
-from ._encryption import Encryption, PasswordType
-from ._page import PageObject, _VirtualList
-from ._page_labels import index2label as page_index2page_label
-from ._utils import (
-    StrByteType,
-    StreamType,
-    b_,
-    deprecate_no_replacement,
-    deprecation_no_replacement,
-    deprecation_with_replacement,
-    logger_warning,
-    parse_iso8824_date,
-    read_non_whitespace,
-    read_previous_line,
-    read_until_whitespace,
-    skip_over_comment,
-    skip_over_whitespace,
-)
-from .constants import CatalogAttributes as CA
-from .constants import CatalogDictionary as CD
-from .constants import (
-    CheckboxRadioButtonAttributes,
-    GoToActionArguments,
-)
-from .constants import Core as CO
-from .constants import DocumentInformationAttributes as DI
-from .constants import FieldDictionaryAttributes as FA
-from .constants import PageAttributes as PG
-from .constants import PagesAttributes as PA
-from .constants import TrailerKeys as TK
-from .errors import (
-    EmptyFileError,
-    FileNotDecryptedError,
-    PdfReadError,
-    PdfStreamError,
-    WrongPasswordError,
-)
-from .generic import (
-    ArrayObject,
-    BooleanObject,
-    ContentStream,
-    DecodedStreamObject,
-    Destination,
-    DictionaryObject,
-    EncodedStreamObject,
-    Field,
-    Fit,
-    FloatObject,
-    IndirectObject,
-    NameObject,
-    NameTree,
-    NullObject,
-    NumberObject,
-    PdfObject,
-    TextStringObject,
-    TreeObject,
-    ViewerPreferences,
-    read_object,
-)
-from .types import OutlineType, PagemodeType
-from .xmp import XmpInformation
-
-
-def convert_to_int(d: bytes, size: int) -> Union[int, Tuple[Any, ...]]:
-    if size > 8:
-        raise PdfReadError("invalid size in convert_to_int")
-    d = b"\x00\x00\x00\x00\x00\x00\x00\x00" + d
-    d = d[-8:]
-    return struct.unpack(">q", d)[0]
-
-
-def convertToInt(d: bytes, size: int) -> Union[int, Tuple[Any, ...]]:  # deprecated
-    deprecation_with_replacement("convertToInt", "convert_to_int")
-    return convert_to_int(d, size)
-
-
-class DocumentInformation(DictionaryObject):
-    """
-    A class representing the basic document metadata provided in a PDF File.
-    This class is accessible through
-    :py:class:`PdfReader.metadata<pypdf.PdfReader.metadata>`.
-
-    All text properties of the document metadata have
-    *two* properties, eg. author and author_raw. The non-raw property will
-    always return a ``TextStringObject``, making it ideal for a case where
-    the metadata is being displayed. The raw property can sometimes return
-    a ``ByteStringObject``, if pypdf was unable to decode the string's
-    text encoding; this requires additional safety in the caller and
-    therefore is not as commonly accessed.
-    """
-
-    def __init__(self) -> None:
-        DictionaryObject.__init__(self)
-
-    def _get_text(self, key: str) -> Optional[str]:
-        retval = self.get(key, None)
-        if isinstance(retval, TextStringObject):
-            return retval
-        return None
-
-    def getText(self, key: str) -> Optional[str]:  # deprecated
-        """
-        Use the attributes (e.g. :py:attr:`title` / :py:attr:`author`).
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_no_replacement("getText", "3.0.0")
-        return self._get_text(key)
-
-    @property
-    def title(self) -> Optional[str]:
-        """
-        Read-only property accessing the document's title.
-
-        Returns a ``TextStringObject`` or ``None`` if the title is not
-        specified.
-        """
-        return (
-            self._get_text(DI.TITLE) or self.get(DI.TITLE).get_object()  # type: ignore
-            if self.get(DI.TITLE)
-            else None
-        )
-
-    @property
-    def title_raw(self) -> Optional[str]:
-        """The "raw" version of title; can return a ``ByteStringObject``."""
-        return self.get(DI.TITLE)
-
-    @property
-    def author(self) -> Optional[str]:
-        """
-        Read-only property accessing the document's author.
-
-        Returns a ``TextStringObject`` or ``None`` if the author is not
-        specified.
-        """
-        return self._get_text(DI.AUTHOR)
-
-    @property
-    def author_raw(self) -> Optional[str]:
-        """The "raw" version of author; can return a ``ByteStringObject``."""
-        return self.get(DI.AUTHOR)
-
-    @property
-    def subject(self) -> Optional[str]:
-        """
-        Read-only property accessing the document's subject.
-
-        Returns a ``TextStringObject`` or ``None`` if the subject is not
-        specified.
-        """
-        return self._get_text(DI.SUBJECT)
-
-    @property
-    def subject_raw(self) -> Optional[str]:
-        """The "raw" version of subject; can return a ``ByteStringObject``."""
-        return self.get(DI.SUBJECT)
-
-    @property
-    def creator(self) -> Optional[str]:
-        """
-        Read-only property accessing the document's creator.
-
-        If the document was converted to PDF from another format, this is the
-        name of the application (e.g. OpenOffice) that created the original
-        document from which it was converted. Returns a ``TextStringObject`` or
-        ``None`` if the creator is not specified.
-        """
-        return self._get_text(DI.CREATOR)
-
-    @property
-    def creator_raw(self) -> Optional[str]:
-        """The "raw" version of creator; can return a ``ByteStringObject``."""
-        return self.get(DI.CREATOR)
-
-    @property
-    def producer(self) -> Optional[str]:
-        """
-        Read-only property accessing the document's producer.
-
-        If the document was converted to PDF from another format, this is the
-        name of the application (for example, OSX Quartz) that converted it to
-        PDF. Returns a ``TextStringObject`` or ``None`` if the producer is not
-        specified.
-        """
-        return self._get_text(DI.PRODUCER)
-
-    @property
-    def producer_raw(self) -> Optional[str]:
-        """The "raw" version of producer; can return a ``ByteStringObject``."""
-        return self.get(DI.PRODUCER)
-
-    @property
-    def creation_date(self) -> Optional[datetime]:
-        """Read-only property accessing the document's creation date."""
-        return parse_iso8824_date(self._get_text(DI.CREATION_DATE))
-
-    @property
-    def creation_date_raw(self) -> Optional[str]:
-        """
-        The "raw" version of creation date; can return a ``ByteStringObject``.
-
-        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
-        is the offset from UTC.
-        """
-        return self.get(DI.CREATION_DATE)
-
-    @property
-    def modification_date(self) -> Optional[datetime]:
-        """
-        Read-only property accessing the document's modification date.
-
-        The date and time the document was most recently modified.
-        """
-        return parse_iso8824_date(self._get_text(DI.MOD_DATE))
-
-    @property
-    def modification_date_raw(self) -> Optional[str]:
-        """
-        The "raw" version of modification date; can return a
-        ``ByteStringObject``.
-
-        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
-        is the offset from UTC.
-        """
-        return self.get(DI.MOD_DATE)
-
-
-class PdfReader:
-    """
-    Initialize a PdfReader object.
-
-    This operation can take some time, as the PDF stream's cross-reference
-    tables are read into memory.
-
-    Args:
-        stream: A File object or an object that supports the standard read
-            and seek methods similar to a File object. Could also be a
-            string representing a path to a PDF file.
-        strict: Determines whether user should be warned of all
-            problems and also causes some correctable problems to be fatal.
-            Defaults to ``False``.
-        password: Decrypt PDF file at initialization. If the
-            password is None, the file will not be decrypted.
-            Defaults to ``None``
-    """
-
-    @property
-    def viewer_preferences(self) -> Optional[ViewerPreferences]:
-        """Returns the existing ViewerPreferences as an overloaded dictionary."""
-        o = cast(DictionaryObject, self.trailer["/Root"]).get(
-            CD.VIEWER_PREFERENCES, None
-        )
-        if o is None:
-            return None
-        o = o.get_object()
-        if not isinstance(o, ViewerPreferences):
-            o = ViewerPreferences(o)
-        return o
-
-    def __init__(
-        self,
-        stream: Union[StrByteType, Path],
-        strict: bool = False,
-        password: Union[None, str, bytes] = None,
-    ) -> None:
-        self.strict = strict
-        self.flattened_pages: Optional[List[PageObject]] = None
-        self.resolved_objects: Dict[Tuple[Any, Any], Optional[PdfObject]] = {}
-        self.xref_index = 0
-        self._page_id2num: Optional[
-            Dict[Any, Any]
-        ] = None  # map page indirect_reference number to Page Number
-        if hasattr(stream, "mode") and "b" not in stream.mode:  # type: ignore
-            logger_warning(
-                "PdfReader stream/file object is not in binary mode. "
-                "It may not be read correctly.",
-                __name__,
-            )
-        if isinstance(stream, (str, Path)):
-            with open(stream, "rb") as fh:
-                stream = BytesIO(fh.read())
-        self.read(stream)
-        self.stream = stream
-
-        self._override_encryption = False
-        self._encryption: Optional[Encryption] = None
-        if self.is_encrypted:
-            self._override_encryption = True
-            # Some documents may not have a /ID, use two empty
-            # byte strings instead. Solves
-            # https://github.com/py-pdf/pypdf/issues/608
-            id_entry = self.trailer.get(TK.ID)
-            id1_entry = id_entry[0].get_object().original_bytes if id_entry else b""
-            encrypt_entry = cast(
-                DictionaryObject, self.trailer[TK.ENCRYPT].get_object()
-            )
-            self._encryption = Encryption.read(encrypt_entry, id1_entry)
-
-            # try empty password if no password provided
-            pwd = password if password is not None else b""
-            if (
-                self._encryption.verify(pwd) == PasswordType.NOT_DECRYPTED
-                and password is not None
-            ):
-                # raise if password provided
-                raise WrongPasswordError("Wrong password")
-            self._override_encryption = False
-        elif password is not None:
-            raise PdfReadError("Not encrypted file")
-
-    @property
-    def pdf_header(self) -> str:
-        """
-        The first 8 bytes of the file.
-
-        This is typically something like ``'%PDF-1.6'`` and can be used to
-        detect if the file is actually a PDF file and which version it is.
-        """
-        # TODO: Make this return a bytes object for consistency
-        #       but that needs a deprecation
-        loc = self.stream.tell()
-        self.stream.seek(0, 0)
-        pdf_file_version = self.stream.read(8).decode("utf-8", "backslashreplace")
-        self.stream.seek(loc, 0)  # return to where it was
-        return pdf_file_version
-
-    @property
-    def metadata(self) -> Optional[DocumentInformation]:
-        """
-        Retrieve the PDF file's document information dictionary, if it exists.
-
-        Note that some PDF files use metadata streams instead of docinfo
-        dictionaries, and these metadata streams will not be accessed by this
-        function.
-        """
-        if TK.INFO not in self.trailer:
-            return None
-        obj = self.trailer[TK.INFO]
-        retval = DocumentInformation()
-        if isinstance(obj, type(None)):
-            raise PdfReadError(
-                "trailer not found or does not point to document information directory"
-            )
-        retval.update(obj)  # type: ignore
-        return retval
-
-    def getDocumentInfo(self) -> Optional[DocumentInformation]:  # deprecated
-        """
-        Use the attribute :py:attr:`metadata` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getDocumentInfo", "metadata", "3.0.0")
-        return self.metadata
-
-    @property
-    def documentInfo(self) -> Optional[DocumentInformation]:  # deprecated
-        """
-        Use the attribute :py:attr:`metadata` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("documentInfo", "metadata", "3.0.0")
-        return self.metadata
-
-    @property
-    def xmp_metadata(self) -> Optional[XmpInformation]:
-        """XMP (Extensible Metadata Platform) data."""
-        try:
-            self._override_encryption = True
-            return self.trailer[TK.ROOT].xmp_metadata  # type: ignore
-        finally:
-            self._override_encryption = False
-
-    def getXmpMetadata(self) -> Optional[XmpInformation]:  # deprecated
-        """
-        Use the attribute :py:attr:`metadata` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getXmpMetadata", "xmp_metadata", "3.0.0")
-        return self.xmp_metadata
-
-    @property
-    def xmpMetadata(self) -> Optional[XmpInformation]:  # deprecated
-        """
-        Use the attribute :py:attr:`xmp_metadata` instead.
-
-        .. deprecated:: 1.28.0.
-        """
-        deprecation_with_replacement("xmpMetadata", "xmp_metadata", "3.0.0")
-        return self.xmp_metadata
-
-    def _get_num_pages(self) -> int:
-        """
-        Calculate the number of pages in this PDF file.
-
-        Returns:
-            The number of pages of the parsed PDF file
-
-        Raises:
-            PdfReadError: if file is encrypted and restrictions prevent
-                this action.
-        """
-        # Flattened pages will not work on an Encrypted PDF;
-        # the PDF file's page count is used in this case. Otherwise,
-        # the original method (flattened page count) is used.
-        if self.is_encrypted:
-            return self.trailer[TK.ROOT]["/Pages"]["/Count"]  # type: ignore
-        else:
-            if self.flattened_pages is None:
-                self._flatten()
-            return len(self.flattened_pages)  # type: ignore
-
-    def getNumPages(self) -> int:  # deprecated
-        """
-        Use :code:`len(reader.pages)` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("reader.getNumPages", "len(reader.pages)", "3.0.0")
-        return self._get_num_pages()
-
-    @property
-    def numPages(self) -> int:  # deprecated
-        """
-        Use :code:`len(reader.pages)` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("reader.numPages", "len(reader.pages)", "3.0.0")
-        return self._get_num_pages()
-
-    def getPage(self, pageNumber: int) -> PageObject:  # deprecated
-        """
-        Use :code:`reader.pages[page_number]` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement(
-            "reader.getPage(pageNumber)", "reader.pages[page_number]", "3.0.0"
-        )
-        return self._get_page(pageNumber)
-
-    def _get_page(self, page_number: int) -> PageObject:
-        """
-        Retrieve a page by number from this PDF file.
-
-        Args:
-            page_number: The page number to retrieve
-                (pages begin at zero)
-
-        Returns:
-            A :class:`PageObject<pypdf._page.PageObject>` instance.
-        """
-        if self.flattened_pages is None:
-            self._flatten()
-        assert self.flattened_pages is not None, "hint for mypy"
-        return self.flattened_pages[page_number]
-
-    @property
-    def namedDestinations(self) -> Dict[str, Any]:  # deprecated
-        """
-        Use :py:attr:`named_destinations` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("namedDestinations", "named_destinations", "3.0.0")
-        return self.named_destinations
-
-    @property
-    def named_destinations(self) -> Dict[str, Any]:
-        """
-        A read-only dictionary which maps names to
-        :class:`Destinations<pypdf.generic.Destination>`
-        """
-        return self._get_named_destinations()
-
-    # A select group of relevant field attributes. For the complete list,
-    # see section 8.6.2 of the PDF 1.7 reference.
-
-    def get_fields(
-        self,
-        tree: Optional[TreeObject] = None,
-        retval: Optional[Dict[Any, Any]] = None,
-        fileobj: Optional[Any] = None,
-    ) -> Optional[Dict[str, Any]]:
-        """
-        Extract field data if this PDF contains interactive form fields.
-
-        The *tree* and *retval* parameters are for recursive use.
-
-        Args:
-            tree:
-            retval:
-            fileobj: A file object (usually a text file) to write
-                a report to on all interactive form fields found.
-
-        Returns:
-            A dictionary where each key is a field name, and each
-            value is a :class:`Field<pypdf.generic.Field>` object. By
-            default, the mapping name is used for keys.
-            ``None`` if form data could not be located.
-        """
-        field_attributes = FA.attributes_dict()
-        field_attributes.update(CheckboxRadioButtonAttributes.attributes_dict())
-        if retval is None:
-            retval = {}
-            catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-            # get the AcroForm tree
-            if CD.ACRO_FORM in catalog:
-                tree = cast(Optional[TreeObject], catalog[CD.ACRO_FORM])
-            else:
-                return None
-        if tree is None:
-            return retval
-        self._check_kids(tree, retval, fileobj)
-        for attr in field_attributes:
-            if attr in tree:
-                # Tree is a field
-                self._build_field(tree, retval, fileobj, field_attributes)
-                break
-
-        if "/Fields" in tree:
-            fields = cast(ArrayObject, tree["/Fields"])
-            for f in fields:
-                field = f.get_object()
-                self._build_field(field, retval, fileobj, field_attributes)
-
-        return retval
-
-    def getFields(
-        self,
-        tree: Optional[TreeObject] = None,
-        retval: Optional[Dict[Any, Any]] = None,
-        fileobj: Optional[Any] = None,
-    ) -> Optional[Dict[str, Any]]:  # deprecated
-        """
-        Use :meth:`get_fields` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getFields", "get_fields", "3.0.0")
-        return self.get_fields(tree, retval, fileobj)
-
-    def _get_qualified_field_name(self, parent: DictionaryObject) -> str:
-        if "/TM" in parent:
-            return cast(str, parent["/TM"])
-        elif "/Parent" in parent:
-            return (
-                self._get_qualified_field_name(
-                    cast(DictionaryObject, parent["/Parent"])
-                )
-                + "."
-                + cast(str, parent["/T"])
-            )
-        else:
-            return cast(str, parent["/T"])
-
-    def _build_field(
-        self,
-        field: Union[TreeObject, DictionaryObject],
-        retval: Dict[Any, Any],
-        fileobj: Any,
-        field_attributes: Any,
-    ) -> None:
-        self._check_kids(field, retval, fileobj)
-        try:
-            key = cast(str, field["/TM"])
-        except KeyError:
-            try:
-                if "/Parent" in field:
-                    key = (
-                        self._get_qualified_field_name(
-                            cast(DictionaryObject, field["/Parent"])
-                        )
-                        + "."
-                    )
-                else:
-                    key = ""
-                key += cast(str, field["/T"])
-            except KeyError:
-                # Ignore no-name field for now
-                return
-        if fileobj:
-            self._write_field(fileobj, field, field_attributes)
-            fileobj.write("\n")
-        retval[key] = Field(field)
-        obj = retval[key].indirect_reference.get_object()  # to get the full object
-        if obj.get(FA.FT, "") == "/Ch":
-            retval[key][NameObject("/_States_")] = obj[NameObject(FA.Opt)]
-        if obj.get(FA.FT, "") == "/Btn" and "/AP" in obj:
-            #  Checkbox
-            retval[key][NameObject("/_States_")] = ArrayObject(
-                list(obj["/AP"]["/N"].keys())
-            )
-            if "/Off" not in retval[key]["/_States_"]:
-                retval[key][NameObject("/_States_")].append(NameObject("/Off"))
-        elif obj.get(FA.FT, "") == "/Btn" and obj.get(FA.Ff, 0) & FA.FfBits.Radio != 0:
-            states = []
-            for k in obj.get(FA.Kids, {}):
-                k = k.get_object()
-                for s in list(k["/AP"]["/N"].keys()):
-                    if s not in states:
-                        states.append(s)
-                retval[key][NameObject("/_States_")] = ArrayObject(states)
-            if (
-                obj.get(FA.Ff, 0) & FA.FfBits.NoToggleToOff != 0
-                and "/Off" in retval[key]["/_States_"]
-            ):
-                del retval[key]["/_States_"][retval[key]["/_States_"].index("/Off")]
-
-    def _check_kids(
-        self, tree: Union[TreeObject, DictionaryObject], retval: Any, fileobj: Any
-    ) -> None:
-        if PA.KIDS in tree:
-            # recurse down the tree
-            for kid in tree[PA.KIDS]:  # type: ignore
-                self.get_fields(kid.get_object(), retval, fileobj)
-
-    def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None:
-        field_attributes_tuple = FA.attributes()
-        field_attributes_tuple = (
-            field_attributes_tuple + CheckboxRadioButtonAttributes.attributes()
-        )
-
-        for attr in field_attributes_tuple:
-            if attr in (
-                FA.Kids,
-                FA.AA,
-            ):
-                continue
-            attr_name = field_attributes[attr]
-            try:
-                if attr == FA.FT:
-                    # Make the field type value more clear
-                    types = {
-                        "/Btn": "Button",
-                        "/Tx": "Text",
-                        "/Ch": "Choice",
-                        "/Sig": "Signature",
-                    }
-                    if field[attr] in types:
-                        fileobj.write(f"{attr_name}: {types[field[attr]]}\n")
-                elif attr == FA.Parent:
-                    # Let's just write the name of the parent
-                    try:
-                        name = field[attr][FA.TM]
-                    except KeyError:
-                        name = field[attr][FA.T]
-                    fileobj.write(f"{attr_name}: {name}\n")
-                else:
-                    fileobj.write(f"{attr_name}: {field[attr]}\n")
-            except KeyError:
-                # Field attribute is N/A or unknown, so don't write anything
-                pass
-
-    def get_form_text_fields(self, full_qualified_name: bool = False) -> Dict[str, Any]:
-        """
-        Retrieve form fields from the document with textual data.
-
-        Args:
-            full_qualified_name: to get full name
-
-        Returns:
-            A dictionary. The key is the name of the form field,
-            the value is the content of the field.
-
-            If the document contains multiple form fields with the same name, the
-            second and following will get the suffix .2, .3, ...
-        """
-
-        def indexed_key(k: str, fields: dict) -> str:
-            if k not in fields:
-                return k
-            else:
-                return (
-                    k
-                    + "."
-                    + str(sum([1 for kk in fields if kk.startswith(k + ".")]) + 2)
-                )
-
-        # Retrieve document form fields
-        formfields = self.get_fields()
-        if formfields is None:
-            return {}
-        ff = {}
-        for field, value in formfields.items():
-            if value.get("/FT") == "/Tx":
-                if full_qualified_name:
-                    ff[field] = value.get("/V")
-                else:
-                    ff[indexed_key(cast(str, value["/T"]), ff)] = value.get("/V")
-        return ff
-
-    def getFormTextFields(self) -> Dict[str, Any]:  # deprecated
-        """
-        Use :meth:`get_form_text_fields` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement(
-            "getFormTextFields", "get_form_text_fields", "3.0.0"
-        )
-        return self.get_form_text_fields()
-
-    def _get_named_destinations(
-        self,
-        tree: Union[TreeObject, None] = None,
-        retval: Optional[Any] = None,
-    ) -> Dict[str, Any]:
-        """
-        Retrieve the named destinations present in the document.
-
-        Args:
-            tree:
-            retval:
-
-        Returns:
-            A dictionary which maps names to
-            :class:`Destinations<pypdf.generic.Destination>`.
-        """
-        if retval is None:
-            retval = {}
-            catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-
-            # get the name tree
-            if CA.DESTS in catalog:
-                tree = cast(TreeObject, catalog[CA.DESTS])
-            elif CA.NAMES in catalog:
-                names = cast(DictionaryObject, catalog[CA.NAMES])
-                if CA.DESTS in names:
-                    tree = cast(TreeObject, names[CA.DESTS])
-
-        if tree is None:
-            return retval
-
-        if PA.KIDS in tree:
-            # recurse down the tree
-            for kid in cast(ArrayObject, tree[PA.KIDS]):
-                self._get_named_destinations(kid.get_object(), retval)
-        # TABLE 3.33 Entries in a name tree node dictionary (PDF 1.7 specs)
-        elif CA.NAMES in tree:  # KIDS and NAMES are exclusives (PDF 1.7 specs p 162)
-            names = cast(DictionaryObject, tree[CA.NAMES])
-            i = 0
-            while i < len(names):
-                key = cast(str, names[i].get_object())
-                i += 1
-                if not isinstance(key, str):
-                    continue
-                try:
-                    value = names[i].get_object()
-                except IndexError:
-                    break
-                i += 1
-                if isinstance(value, DictionaryObject) and "/D" in value:
-                    value = value["/D"]
-                dest = self._build_destination(key, value)  # type: ignore
-                if dest is not None:
-                    retval[key] = dest
-        else:  # case where Dests is in root catalog (PDF 1.7 specs, §2 about PDF1.1
-            for k__, v__ in tree.items():
-                val = v__.get_object()
-                if isinstance(val, DictionaryObject):
-                    val = val["/D"].get_object()
-                dest = self._build_destination(k__, val)
-                if dest is not None:
-                    retval[k__] = dest
-        return retval
-
-    def getNamedDestinations(
-        self,
-        tree: Union[TreeObject, None] = None,
-        retval: Optional[Any] = None,
-    ) -> Dict[str, Any]:  # deprecated
-        """
-        Use :py:attr:`named_destinations` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement(
-            "getNamedDestinations", "named_destinations", "3.0.0"
-        )
-        return self._get_named_destinations(tree, retval)
-
-    @property
-    def outline(self) -> OutlineType:
-        """
-        Read-only property for the outline present in the document.
-
-        (i.e., a collection of 'outline items' which are also known as
-        'bookmarks')
-        """
-        return self._get_outline()
-
-    @property
-    def outlines(self) -> OutlineType:  # deprecated
-        """
-        Use :py:attr:`outline` instead.
-
-        .. deprecated:: 2.9.0
-        """
-        deprecation_with_replacement("outlines", "outline", "3.0.0")
-        return self.outline
-
-    def _get_outline(
-        self, node: Optional[DictionaryObject] = None, outline: Optional[Any] = None
-    ) -> OutlineType:
-        if outline is None:
-            outline = []
-            catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-
-            # get the outline dictionary and named destinations
-            if CO.OUTLINES in catalog:
-                lines = cast(DictionaryObject, catalog[CO.OUTLINES])
-
-                if isinstance(lines, NullObject):
-                    return outline
-
-                # TABLE 8.3 Entries in the outline dictionary
-                if lines is not None and "/First" in lines:
-                    node = cast(DictionaryObject, lines["/First"])
-            self._namedDests = self._get_named_destinations()
-
-        if node is None:
-            return outline
-
-        # see if there are any more outline items
-        while True:
-            outline_obj = self._build_outline_item(node)
-            if outline_obj:
-                outline.append(outline_obj)
-
-            # check for sub-outline
-            if "/First" in node:
-                sub_outline: List[Any] = []
-                self._get_outline(cast(DictionaryObject, node["/First"]), sub_outline)
-                if sub_outline:
-                    outline.append(sub_outline)
-
-            if "/Next" not in node:
-                break
-            node = cast(DictionaryObject, node["/Next"])
-
-        return outline
-
-    def getOutlines(
-        self, node: Optional[DictionaryObject] = None, outline: Optional[Any] = None
-    ) -> OutlineType:  # deprecated
-        """
-        Use :py:attr:`outline` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getOutlines", "outline", "3.0.0")
-        return self._get_outline(node, outline)
-
-    @property
-    def threads(self) -> Optional[ArrayObject]:
-        """
-        Read-only property for the list of threads.
-
-        See §8.3.2 from PDF 1.7 spec.
-
-        It's an array of dictionaries with "/F" and "/I" properties or
-        None if there are no articles.
-        """
-        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-        if CO.THREADS in catalog:
-            return cast("ArrayObject", catalog[CO.THREADS])
-        else:
-            return None
-
-    def _get_page_number_by_indirect(
-        self, indirect_reference: Union[None, int, NullObject, IndirectObject]
-    ) -> int:
-        """
-        Generate _page_id2num.
-
-        Args:
-            indirect_reference:
-
-        Returns:
-            The page number.
-        """
-        if self._page_id2num is None:
-            self._page_id2num = {
-                x.indirect_reference.idnum: i for i, x in enumerate(self.pages)  # type: ignore
-            }
-
-        if indirect_reference is None or isinstance(indirect_reference, NullObject):
-            return -1
-        if isinstance(indirect_reference, int):
-            idnum = indirect_reference
-        else:
-            idnum = indirect_reference.idnum
-        assert self._page_id2num is not None, "hint for mypy"
-        ret = self._page_id2num.get(idnum, -1)
-        return ret
-
-    def get_page_number(self, page: PageObject) -> int:
-        """
-        Retrieve page number of a given PageObject.
-
-        Args:
-            page: The page to get page number. Should be
-                an instance of :class:`PageObject<pypdf._page.PageObject>`
-
-        Returns:
-            The page number or -1 if page is not found
-        """
-        return self._get_page_number_by_indirect(page.indirect_reference)
-
-    def getPageNumber(self, page: PageObject) -> int:  # deprecated
-        """
-        Use :meth:`get_page_number` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getPageNumber", "get_page_number", "3.0.0")
-        return self.get_page_number(page)
-
-    def get_destination_page_number(self, destination: Destination) -> int:
-        """
-        Retrieve page number of a given Destination object.
-
-        Args:
-            destination: The destination to get page number.
-
-        Returns:
-            The page number or -1 if page is not found
-        """
-        return self._get_page_number_by_indirect(destination.page)
-
-    def getDestinationPageNumber(self, destination: Destination) -> int:  # deprecated
-        """
-        Use :meth:`get_destination_page_number` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement(
-            "getDestinationPageNumber", "get_destination_page_number", "3.0.0"
-        )
-        return self.get_destination_page_number(destination)
-
-    def _build_destination(
-        self,
-        title: str,
-        array: Optional[
-            List[
-                Union[NumberObject, IndirectObject, None, NullObject, DictionaryObject]
-            ]
-        ],
-    ) -> Destination:
-        page, typ = None, None
-        # handle outline items with missing or invalid destination
-        if (
-            isinstance(array, (NullObject, str))
-            or (isinstance(array, ArrayObject) and len(array) == 0)
-            or array is None
-        ):
-            page = NullObject()
-            return Destination(title, page, Fit.fit())
-        else:
-            page, typ = array[0:2]  # type: ignore
-            array = array[2:]
-            try:
-                return Destination(title, page, Fit(fit_type=typ, fit_args=array))  # type: ignore
-            except PdfReadError:
-                logger_warning(f"Unknown destination: {title} {array}", __name__)
-                if self.strict:
-                    raise
-                # create a link to first Page
-                tmp = self.pages[0].indirect_reference
-                indirect_reference = NullObject() if tmp is None else tmp
-                return Destination(title, indirect_reference, Fit.fit())  # type: ignore
-
-    def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
-        dest, title, outline_item = None, None, None
-
-        # title required for valid outline
-        # PDF Reference 1.7: TABLE 8.4 Entries in an outline item dictionary
-        try:
-            title = cast("str", node["/Title"])
-        except KeyError:
-            if self.strict:
-                raise PdfReadError(f"Outline Entry Missing /Title attribute: {node!r}")
-            title = ""  # type: ignore
-
-        if "/A" in node:
-            # Action, PDFv1.7 Section 12.6 (only type GoTo supported)
-            action = cast(DictionaryObject, node["/A"])
-            action_type = cast(NameObject, action[GoToActionArguments.S])
-            if action_type == "/GoTo":
-                dest = action[GoToActionArguments.D]
-        elif "/Dest" in node:
-            # Destination, PDFv1.7 Section 12.3.2
-            dest = node["/Dest"]
-            # if array was referenced in another object, will be a dict w/ key "/D"
-            if isinstance(dest, DictionaryObject) and "/D" in dest:
-                dest = dest["/D"]
-
-        if isinstance(dest, ArrayObject):
-            outline_item = self._build_destination(title, dest)
-        elif isinstance(dest, str):
-            # named destination, addresses NameObject Issue #193
-            # TODO : keep named destination instead of replacing it ?
-            try:
-                outline_item = self._build_destination(
-                    title, self._namedDests[dest].dest_array
-                )
-            except KeyError:
-                # named destination not found in Name Dict
-                outline_item = self._build_destination(title, None)
-        elif dest is None:
-            # outline item not required to have destination or action
-            # PDFv1.7 Table 153
-            outline_item = self._build_destination(title, dest)
-        else:
-            if self.strict:
-                raise PdfReadError(f"Unexpected destination {dest!r}")
-            else:
-                logger_warning(
-                    f"Removed unexpected destination {dest!r} from destination",
-                    __name__,
-                )
-            outline_item = self._build_destination(title, None)  # type: ignore
-
-        # if outline item created, add color, format, and child count if present
-        if outline_item:
-            if "/C" in node:
-                # Color of outline item font in (R, G, B) with values ranging 0.0-1.0
-                outline_item[NameObject("/C")] = ArrayObject(FloatObject(c) for c in node["/C"])  # type: ignore
-            if "/F" in node:
-                # specifies style characteristics bold and/or italic
-                # with 1=italic, 2=bold, 3=both
-                outline_item[NameObject("/F")] = node["/F"]
-            if "/Count" in node:
-                # absolute value = num. visible children
-                # with positive = open/unfolded, negative = closed/folded
-                outline_item[NameObject("/Count")] = node["/Count"]
-            #  if count is 0 we will consider it as open ( in order to have always an is_open to simplify
-            outline_item[NameObject("/%is_open%")] = BooleanObject(
-                node.get("/Count", 0) >= 0
-            )
-        outline_item.node = node
-        try:
-            outline_item.indirect_reference = node.indirect_reference
-        except AttributeError:
-            pass
-        return outline_item
-
-    @property
-    def pages(self) -> List[PageObject]:
-        """Read-only property that emulates a list of :py:class:`Page<pypdf._page.Page>` objects."""
-        return _VirtualList(self._get_num_pages, self._get_page)  # type: ignore
-
-    @property
-    def page_labels(self) -> List[str]:
-        """
-        A list of labels for the pages in this document.
-
-        This property is read-only. The labels are in the order that the pages
-        appear in the document.
-        """
-        return [page_index2page_label(self, i) for i in range(len(self.pages))]
-
-    @property
-    def page_layout(self) -> Optional[str]:
-        """
-        Get the page layout currently being used.
-
-        .. list-table:: Valid ``layout`` values
-           :widths: 50 200
-
-           * - /NoLayout
-             - Layout explicitly not specified
-           * - /SinglePage
-             - Show one page at a time
-           * - /OneColumn
-             - Show one column at a time
-           * - /TwoColumnLeft
-             - Show pages in two columns, odd-numbered pages on the left
-           * - /TwoColumnRight
-             - Show pages in two columns, odd-numbered pages on the right
-           * - /TwoPageLeft
-             - Show two pages at a time, odd-numbered pages on the left
-           * - /TwoPageRight
-             - Show two pages at a time, odd-numbered pages on the right
-        """
-        trailer = cast(DictionaryObject, self.trailer[TK.ROOT])
-        if CD.PAGE_LAYOUT in trailer:
-            return cast(NameObject, trailer[CD.PAGE_LAYOUT])
-        return None
-
-    def getPageLayout(self) -> Optional[str]:  # deprecated
-        """
-        Use :py:attr:`page_layout` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getPageLayout", "page_layout", "3.0.0")
-        return self.page_layout
-
-    @property
-    def pageLayout(self) -> Optional[str]:  # deprecated
-        """
-        Use :py:attr:`page_layout` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("pageLayout", "page_layout", "3.0.0")
-        return self.page_layout
-
-    @property
-    def page_mode(self) -> Optional[PagemodeType]:
-        """
-        Get the page mode currently being used.
-
-        .. list-table:: Valid ``mode`` values
-           :widths: 50 200
-
-           * - /UseNone
-             - Do not show outline or thumbnails panels
-           * - /UseOutlines
-             - Show outline (aka bookmarks) panel
-           * - /UseThumbs
-             - Show page thumbnails panel
-           * - /FullScreen
-             - Fullscreen view
-           * - /UseOC
-             - Show Optional Content Group (OCG) panel
-           * - /UseAttachments
-             - Show attachments panel
-        """
-        try:
-            return self.trailer[TK.ROOT]["/PageMode"]  # type: ignore
-        except KeyError:
-            return None
-
-    def getPageMode(self) -> Optional[PagemodeType]:  # deprecated
-        """
-        Use :py:attr:`page_mode` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getPageMode", "page_mode", "3.0.0")
-        return self.page_mode
-
-    @property
-    def pageMode(self) -> Optional[PagemodeType]:  # deprecated
-        """
-        Use :py:attr:`page_mode` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("pageMode", "page_mode", "3.0.0")
-        return self.page_mode
-
-    def _flatten(
-        self,
-        pages: Union[None, DictionaryObject, PageObject] = None,
-        inherit: Optional[Dict[str, Any]] = None,
-        indirect_reference: Optional[IndirectObject] = None,
-    ) -> None:
-        inheritable_page_attributes = (
-            NameObject(PG.RESOURCES),
-            NameObject(PG.MEDIABOX),
-            NameObject(PG.CROPBOX),
-            NameObject(PG.ROTATE),
-        )
-        if inherit is None:
-            inherit = {}
-        if pages is None:
-            # Fix issue 327: set flattened_pages attribute only for
-            # decrypted file
-            catalog = self.trailer[TK.ROOT].get_object()
-            pages = catalog["/Pages"].get_object()  # type: ignore
-            self.flattened_pages = []
-
-        if PA.TYPE in pages:
-            t = pages[PA.TYPE]  # type: ignore
-        # if pdf has no type, considered as a page if /Kids is missing
-        elif PA.KIDS not in pages:
-            t = "/Page"
-        else:
-            t = "/Pages"
-
-        if t == "/Pages":
-            for attr in inheritable_page_attributes:
-                if attr in pages:
-                    inherit[attr] = pages[attr]
-            for page in pages[PA.KIDS]:  # type: ignore
-                addt = {}
-                if isinstance(page, IndirectObject):
-                    addt["indirect_reference"] = page
-                obj = page.get_object()
-                if obj:
-                    # damaged file may have invalid child in /Pages
-                    self._flatten(obj, inherit, **addt)
-        elif t == "/Page":
-            for attr_in, value in list(inherit.items()):
-                # if the page has it's own value, it does not inherit the
-                # parent's value:
-                if attr_in not in pages:
-                    pages[attr_in] = value
-            page_obj = PageObject(self, indirect_reference)
-            page_obj.update(pages)
-
-            # TODO: Could flattened_pages be None at this point?
-            self.flattened_pages.append(page_obj)  # type: ignore
-
-    def _get_object_from_stream(
-        self, indirect_reference: IndirectObject
-    ) -> Union[int, PdfObject, str]:
-        # indirect reference to object in object stream
-        # read the entire object stream into memory
-        stmnum, idx = self.xref_objStm[indirect_reference.idnum]
-        obj_stm: EncodedStreamObject = IndirectObject(stmnum, 0, self).get_object()  # type: ignore
-        # This is an xref to a stream, so its type better be a stream
-        assert cast(str, obj_stm["/Type"]) == "/ObjStm"
-        # /N is the number of indirect objects in the stream
-        assert idx < obj_stm["/N"]
-        stream_data = BytesIO(b_(obj_stm.get_data()))
-        for i in range(obj_stm["/N"]):  # type: ignore
-            read_non_whitespace(stream_data)
-            stream_data.seek(-1, 1)
-            objnum = NumberObject.read_from_stream(stream_data)
-            read_non_whitespace(stream_data)
-            stream_data.seek(-1, 1)
-            offset = NumberObject.read_from_stream(stream_data)
-            read_non_whitespace(stream_data)
-            stream_data.seek(-1, 1)
-            if objnum != indirect_reference.idnum:
-                # We're only interested in one object
-                continue
-            if self.strict and idx != i:
-                raise PdfReadError("Object is in wrong index.")
-            stream_data.seek(int(obj_stm["/First"] + offset), 0)  # type: ignore
-
-            # to cope with some case where the 'pointer' is on a white space
-            read_non_whitespace(stream_data)
-            stream_data.seek(-1, 1)
-
-            try:
-                obj = read_object(stream_data, self)
-            except PdfStreamError as exc:
-                # Stream object cannot be read. Normally, a critical error, but
-                # Adobe Reader doesn't complain, so continue (in strict mode?)
-                logger_warning(
-                    f"Invalid stream (index {i}) within object "
-                    f"{indirect_reference.idnum} {indirect_reference.generation}: "
-                    f"{exc}",
-                    __name__,
-                )
-
-                if self.strict:
-                    raise PdfReadError(f"Can't read object stream: {exc}")
-                # Replace with null. Hopefully it's nothing important.
-                obj = NullObject()
-            return obj
-
-        if self.strict:
-            raise PdfReadError("This is a fatal error in strict mode.")
-        return NullObject()
-
-    def _get_indirect_object(self, num: int, gen: int) -> Optional[PdfObject]:
-        """
-        Used to ease development.
-
-        This is equivalent to generic.IndirectObject(num,gen,self).get_object()
-
-        Args:
-            num: The object number of the indirect object.
-            gen: The generation number of the indirect object.
-
-        Returns:
-            A PdfObject
-        """
-        return IndirectObject(num, gen, self).get_object()
-
-    def get_object(
-        self, indirect_reference: Union[int, IndirectObject]
-    ) -> Optional[PdfObject]:
-        if isinstance(indirect_reference, int):
-            indirect_reference = IndirectObject(indirect_reference, 0, self)
-        retval = self.cache_get_indirect_object(
-            indirect_reference.generation, indirect_reference.idnum
-        )
-        if retval is not None:
-            return retval
-        if (
-            indirect_reference.generation == 0
-            and indirect_reference.idnum in self.xref_objStm
-        ):
-            retval = self._get_object_from_stream(indirect_reference)  # type: ignore
-        elif (
-            indirect_reference.generation in self.xref
-            and indirect_reference.idnum in self.xref[indirect_reference.generation]
-        ):
-            if self.xref_free_entry.get(indirect_reference.generation, {}).get(
-                indirect_reference.idnum, False
-            ):
-                return NullObject()
-            start = self.xref[indirect_reference.generation][indirect_reference.idnum]
-            self.stream.seek(start, 0)
-            try:
-                idnum, generation = self.read_object_header(self.stream)
-            except Exception:
-                if hasattr(self.stream, "getbuffer"):
-                    buf = bytes(self.stream.getbuffer())  # type: ignore
-                else:
-                    p = self.stream.tell()
-                    self.stream.seek(0, 0)
-                    buf = self.stream.read(-1)
-                    self.stream.seek(p, 0)
-                m = re.search(
-                    rf"\s{indirect_reference.idnum}\s+{indirect_reference.generation}\s+obj".encode(),
-                    buf,
-                )
-                if m is not None:
-                    logger_warning(
-                        f"Object ID {indirect_reference.idnum},{indirect_reference.generation} ref repaired",
-                        __name__,
-                    )
-                    self.xref[indirect_reference.generation][
-                        indirect_reference.idnum
-                    ] = (m.start(0) + 1)
-                    self.stream.seek(m.start(0) + 1)
-                    idnum, generation = self.read_object_header(self.stream)
-                else:
-                    idnum = -1  # exception will be raised below
-            if idnum != indirect_reference.idnum and self.xref_index:
-                # Xref table probably had bad indexes due to not being zero-indexed
-                if self.strict:
-                    raise PdfReadError(
-                        f"Expected object ID ({indirect_reference.idnum} {indirect_reference.generation}) "
-                        f"does not match actual ({idnum} {generation}); "
-                        "xref table not zero-indexed."
-                    )
-                # xref table is corrected in non-strict mode
-            elif idnum != indirect_reference.idnum and self.strict:
-                # some other problem
-                raise PdfReadError(
-                    f"Expected object ID ({indirect_reference.idnum} "
-                    f"{indirect_reference.generation}) does not match actual "
-                    f"({idnum} {generation})."
-                )
-            if self.strict:
-                assert generation == indirect_reference.generation
-            retval = read_object(self.stream, self)  # type: ignore
-
-            # override encryption is used for the /Encrypt dictionary
-            if not self._override_encryption and self._encryption is not None:
-                # if we don't have the encryption key:
-                if not self._encryption.is_decrypted():
-                    raise FileNotDecryptedError("File has not been decrypted")
-                # otherwise, decrypt here...
-                retval = cast(PdfObject, retval)
-                retval = self._encryption.decrypt_object(
-                    retval, indirect_reference.idnum, indirect_reference.generation
-                )
-        else:
-            if hasattr(self.stream, "getbuffer"):
-                buf = bytes(self.stream.getbuffer())  # type: ignore
-            else:
-                p = self.stream.tell()
-                self.stream.seek(0, 0)
-                buf = self.stream.read(-1)
-                self.stream.seek(p, 0)
-            m = re.search(
-                rf"\s{indirect_reference.idnum}\s+{indirect_reference.generation}\s+obj".encode(),
-                buf,
-            )
-            if m is not None:
-                logger_warning(
-                    f"Object {indirect_reference.idnum} {indirect_reference.generation} found",
-                    __name__,
-                )
-                if indirect_reference.generation not in self.xref:
-                    self.xref[indirect_reference.generation] = {}
-                self.xref[indirect_reference.generation][indirect_reference.idnum] = (
-                    m.start(0) + 1
-                )
-                self.stream.seek(m.end(0) + 1)
-                skip_over_whitespace(self.stream)
-                self.stream.seek(-1, 1)
-                retval = read_object(self.stream, self)  # type: ignore
-
-                # override encryption is used for the /Encrypt dictionary
-                if not self._override_encryption and self._encryption is not None:
-                    # if we don't have the encryption key:
-                    if not self._encryption.is_decrypted():
-                        raise FileNotDecryptedError("File has not been decrypted")
-                    # otherwise, decrypt here...
-                    retval = cast(PdfObject, retval)
-                    retval = self._encryption.decrypt_object(
-                        retval, indirect_reference.idnum, indirect_reference.generation
-                    )
-            else:
-                logger_warning(
-                    f"Object {indirect_reference.idnum} {indirect_reference.generation} not defined.",
-                    __name__,
-                )
-                if self.strict:
-                    raise PdfReadError("Could not find object.")
-        self.cache_indirect_object(
-            indirect_reference.generation, indirect_reference.idnum, retval
-        )
-        return retval
-
-    def getObject(
-        self, indirectReference: IndirectObject
-    ) -> Optional[PdfObject]:  # deprecated
-        """
-        Use :meth:`get_object` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getObject", "get_object", "3.0.0")
-        return self.get_object(indirectReference)
-
-    def read_object_header(self, stream: StreamType) -> Tuple[int, int]:
-        # Should never be necessary to read out whitespace, since the
-        # cross-reference table should put us in the right spot to read the
-        # object header.  In reality... some files have stupid cross reference
-        # tables that are off by whitespace bytes.
-        extra = False
-        skip_over_comment(stream)
-        extra |= skip_over_whitespace(stream)
-        stream.seek(-1, 1)
-        idnum = read_until_whitespace(stream)
-        extra |= skip_over_whitespace(stream)
-        stream.seek(-1, 1)
-        generation = read_until_whitespace(stream)
-        extra |= skip_over_whitespace(stream)
-        stream.seek(-1, 1)
-
-        # although it's not used, it might still be necessary to read
-        _obj = stream.read(3)
-
-        read_non_whitespace(stream)
-        stream.seek(-1, 1)
-        if extra and self.strict:
-            logger_warning(
-                f"Superfluous whitespace found in object header {idnum} {generation}",  # type: ignore
-                __name__,
-            )
-        return int(idnum), int(generation)
-
-    def readObjectHeader(self, stream: StreamType) -> Tuple[int, int]:  # deprecated
-        """
-        Use :meth:`read_object_header` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("readObjectHeader", "read_object_header", "3.0.0")
-        return self.read_object_header(stream)
-
-    def cache_get_indirect_object(
-        self, generation: int, idnum: int
-    ) -> Optional[PdfObject]:
-        return self.resolved_objects.get((generation, idnum))
-
-    def cacheGetIndirectObject(
-        self, generation: int, idnum: int
-    ) -> Optional[PdfObject]:  # deprecated
-        """
-        Use :meth:`cache_get_indirect_object` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement(
-            "cacheGetIndirectObject", "cache_get_indirect_object", "3.0.0"
-        )
-        return self.cache_get_indirect_object(generation, idnum)
-
-    def cache_indirect_object(
-        self, generation: int, idnum: int, obj: Optional[PdfObject]
-    ) -> Optional[PdfObject]:
-        if (generation, idnum) in self.resolved_objects:
-            msg = f"Overwriting cache for {generation} {idnum}"
-            if self.strict:
-                raise PdfReadError(msg)
-            logger_warning(msg, __name__)
-        self.resolved_objects[(generation, idnum)] = obj
-        if obj is not None:
-            obj.indirect_reference = IndirectObject(idnum, generation, self)
-        return obj
-
-    def cacheIndirectObject(
-        self, generation: int, idnum: int, obj: Optional[PdfObject]
-    ) -> Optional[PdfObject]:  # deprecated
-        """
-        Use :meth:`cache_indirect_object` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("cacheIndirectObject", "cache_indirect_object")
-        return self.cache_indirect_object(generation, idnum, obj)
-
-    def read(self, stream: StreamType) -> None:
-        self._basic_validation(stream)
-        self._find_eof_marker(stream)
-        startxref = self._find_startxref_pos(stream)
-
-        # check and eventually correct the startxref only in not strict
-        xref_issue_nr = self._get_xref_issues(stream, startxref)
-        if xref_issue_nr != 0:
-            if self.strict and xref_issue_nr:
-                raise PdfReadError("Broken xref table")
-            logger_warning(f"incorrect startxref pointer({xref_issue_nr})", __name__)
-
-        # read all cross reference tables and their trailers
-        self._read_xref_tables_and_trailers(stream, startxref, xref_issue_nr)
-
-        # if not zero-indexed, verify that the table is correct; change it if necessary
-        if self.xref_index and not self.strict:
-            loc = stream.tell()
-            for gen, xref_entry in self.xref.items():
-                if gen == 65535:
-                    continue
-                xref_k = sorted(
-                    xref_entry.keys()
-                )  # must ensure ascendant to prevent damage
-                for id in xref_k:
-                    stream.seek(xref_entry[id], 0)
-                    try:
-                        pid, _pgen = self.read_object_header(stream)
-                    except ValueError:
-                        break
-                    if pid == id - self.xref_index:
-                        # fixing index item per item is required for revised PDF.
-                        self.xref[gen][pid] = self.xref[gen][id]
-                        del self.xref[gen][id]
-                    # if not, then either it's just plain wrong, or the
-                    # non-zero-index is actually correct
-            stream.seek(loc, 0)  # return to where it was
-
-    def _basic_validation(self, stream: StreamType) -> None:
-        """Ensure file is not empty. Read at most 5 bytes."""
-        stream.seek(0, os.SEEK_SET)
-        try:
-            header_byte = stream.read(5)
-        except UnicodeDecodeError:
-            raise UnsupportedOperation("cannot read header")
-        if header_byte == b"":
-            raise EmptyFileError("Cannot read an empty file")
-        elif header_byte != b"%PDF-":
-            if self.strict:
-                raise PdfReadError(
-                    f"PDF starts with '{header_byte.decode('utf8')}', "
-                    "but '%PDF-' expected"
-                )
-            else:
-                logger_warning(f"invalid pdf header: {header_byte}", __name__)
-        stream.seek(0, os.SEEK_END)
-
-    def _find_eof_marker(self, stream: StreamType) -> None:
-        """
-        Jump to the %%EOF marker.
-
-        According to the specs, the %%EOF marker should be at the very end of
-        the file. Hence for standard-compliant PDF documents this function will
-        read only the last part (DEFAULT_BUFFER_SIZE).
-        """
-        HEADER_SIZE = 8  # to parse whole file, Header is e.g. '%PDF-1.6'
-        line = b""
-        while line[:5] != b"%%EOF":
-            if stream.tell() < HEADER_SIZE:
-                if self.strict:
-                    raise PdfReadError("EOF marker not found")
-                else:
-                    logger_warning("EOF marker not found", __name__)
-            line = read_previous_line(stream)
-
-    def _find_startxref_pos(self, stream: StreamType) -> int:
-        """
-        Find startxref entry - the location of the xref table.
-
-        Args:
-            stream:
-
-        Returns:
-            The bytes offset
-        """
-        line = read_previous_line(stream)
-        try:
-            startxref = int(line)
-        except ValueError:
-            # 'startxref' may be on the same line as the location
-            if not line.startswith(b"startxref"):
-                raise PdfReadError("startxref not found")
-            startxref = int(line[9:].strip())
-            logger_warning("startxref on same line as offset", __name__)
-        else:
-            line = read_previous_line(stream)
-            if line[:9] != b"startxref":
-                raise PdfReadError("startxref not found")
-        return startxref
-
-    def _read_standard_xref_table(self, stream: StreamType) -> None:
-        # standard cross-reference table
-        ref = stream.read(3)
-        if ref != b"ref":
-            raise PdfReadError("xref table read error")
-        read_non_whitespace(stream)
-        stream.seek(-1, 1)
-        first_time = True  # check if the first time looking at the xref table
-        while True:
-            num = cast(int, read_object(stream, self))
-            if first_time and num != 0:
-                self.xref_index = num
-                if self.strict:
-                    logger_warning(
-                        "Xref table not zero-indexed. ID numbers for objects will be corrected.",
-                        __name__,
-                    )
-                    # if table not zero indexed, could be due to error from when PDF was created
-                    # which will lead to mismatched indices later on, only warned and corrected if self.strict==True
-            first_time = False
-            read_non_whitespace(stream)
-            stream.seek(-1, 1)
-            size = cast(int, read_object(stream, self))
-            read_non_whitespace(stream)
-            stream.seek(-1, 1)
-            cnt = 0
-            while cnt < size:
-                line = stream.read(20)
-
-                # It's very clear in section 3.4.3 of the PDF spec
-                # that all cross-reference table lines are a fixed
-                # 20 bytes (as of PDF 1.7). However, some files have
-                # 21-byte entries (or more) due to the use of \r\n
-                # (CRLF) EOL's. Detect that case, and adjust the line
-                # until it does not begin with a \r (CR) or \n (LF).
-                while line[0] in b"\x0D\x0A":
-                    stream.seek(-20 + 1, 1)
-                    line = stream.read(20)
-
-                # On the other hand, some malformed PDF files
-                # use a single character EOL without a preceding
-                # space.  Detect that case, and seek the stream
-                # back one character.  (0-9 means we've bled into
-                # the next xref entry, t means we've bled into the
-                # text "trailer"):
-                if line[-1] in b"0123456789t":
-                    stream.seek(-1, 1)
-
-                try:
-                    offset_b, generation_b = line[:16].split(b" ")
-                    entry_type_b = line[17:18]
-
-                    offset, generation = int(offset_b), int(generation_b)
-                except Exception:
-                    # if something wrong occurred
-                    if hasattr(stream, "getbuffer"):
-                        buf = bytes(stream.getbuffer())  # type: ignore
-                    else:
-                        p = stream.tell()
-                        stream.seek(0, 0)
-                        buf = stream.read(-1)
-                        stream.seek(p)
-
-                    f = re.search(f"{num}\\s+(\\d+)\\s+obj".encode(), buf)
-                    if f is None:
-                        logger_warning(
-                            f"entry {num} in Xref table invalid; object not found",
-                            __name__,
-                        )
-                        generation = 65535
-                        offset = -1
-                    else:
-                        logger_warning(
-                            f"entry {num} in Xref table invalid but object found",
-                            __name__,
-                        )
-                        generation = int(f.group(1))
-                        offset = f.start()
-
-                if generation not in self.xref:
-                    self.xref[generation] = {}
-                    self.xref_free_entry[generation] = {}
-                if num in self.xref[generation]:
-                    # It really seems like we should allow the last
-                    # xref table in the file to override previous
-                    # ones. Since we read the file backwards, assume
-                    # any existing key is already set correctly.
-                    pass
-                else:
-                    self.xref[generation][num] = offset
-                    try:
-                        self.xref_free_entry[generation][num] = entry_type_b == b"f"
-                    except Exception:
-                        pass
-                    try:
-                        self.xref_free_entry[65535][num] = entry_type_b == b"f"
-                    except Exception:
-                        pass
-                cnt += 1
-                num += 1
-            read_non_whitespace(stream)
-            stream.seek(-1, 1)
-            trailer_tag = stream.read(7)
-            if trailer_tag != b"trailer":
-                # more xrefs!
-                stream.seek(-7, 1)
-            else:
-                break
-
-    def _read_xref_tables_and_trailers(
-        self, stream: StreamType, startxref: Optional[int], xref_issue_nr: int
-    ) -> None:
-        self.xref: Dict[int, Dict[Any, Any]] = {}
-        self.xref_free_entry: Dict[int, Dict[Any, Any]] = {}
-        self.xref_objStm: Dict[int, Tuple[Any, Any]] = {}
-        self.trailer = DictionaryObject()
-        while startxref is not None:
-            # load the xref table
-            stream.seek(startxref, 0)
-            x = stream.read(1)
-            if x in b"\r\n":
-                x = stream.read(1)
-            if x == b"x":
-                startxref = self._read_xref(stream)
-            elif xref_issue_nr:
-                try:
-                    self._rebuild_xref_table(stream)
-                    break
-                except Exception:
-                    xref_issue_nr = 0
-            elif x.isdigit():
-                try:
-                    xrefstream = self._read_pdf15_xref_stream(stream)
-                except Exception as e:
-                    if TK.ROOT in self.trailer:
-                        logger_warning(
-                            f"Previous trailer can not be read {e.args}",
-                            __name__,
-                        )
-                        break
-                    else:
-                        raise PdfReadError(f"trailer can not be read {e.args}")
-                trailer_keys = TK.ROOT, TK.ENCRYPT, TK.INFO, TK.ID, TK.SIZE
-                for key in trailer_keys:
-                    if key in xrefstream and key not in self.trailer:
-                        self.trailer[NameObject(key)] = xrefstream.raw_get(key)
-                if "/XRefStm" in xrefstream:
-                    p = stream.tell()
-                    stream.seek(cast(int, xrefstream["/XRefStm"]) + 1, 0)
-                    self._read_pdf15_xref_stream(stream)
-                    stream.seek(p, 0)
-                if "/Prev" in xrefstream:
-                    startxref = cast(int, xrefstream["/Prev"])
-                else:
-                    break
-            else:
-                startxref = self._read_xref_other_error(stream, startxref)
-
-    def _read_xref(self, stream: StreamType) -> Optional[int]:
-        self._read_standard_xref_table(stream)
-        read_non_whitespace(stream)
-        stream.seek(-1, 1)
-        new_trailer = cast(Dict[str, Any], read_object(stream, self))
-        for key, value in new_trailer.items():
-            if key not in self.trailer:
-                self.trailer[key] = value
-        if "/XRefStm" in new_trailer:
-            p = stream.tell()
-            stream.seek(cast(int, new_trailer["/XRefStm"]) + 1, 0)
-            try:
-                self._read_pdf15_xref_stream(stream)
-            except Exception:
-                logger_warning(
-                    f"XRef object at {new_trailer['/XRefStm']} can not be read, some object may be missing",
-                    __name__,
-                )
-            stream.seek(p, 0)
-        if "/Prev" in new_trailer:
-            startxref = new_trailer["/Prev"]
-            return startxref
-        else:
-            return None
-
-    def _read_xref_other_error(
-        self, stream: StreamType, startxref: int
-    ) -> Optional[int]:
-        # some PDFs have /Prev=0 in the trailer, instead of no /Prev
-        if startxref == 0:
-            if self.strict:
-                raise PdfReadError(
-                    "/Prev=0 in the trailer (try opening with strict=False)"
-                )
-            logger_warning(
-                "/Prev=0 in the trailer - assuming there is no previous xref table",
-                __name__,
-            )
-            return None
-        # bad xref character at startxref.  Let's see if we can find
-        # the xref table nearby, as we've observed this error with an
-        # off-by-one before.
-        stream.seek(-11, 1)
-        tmp = stream.read(20)
-        xref_loc = tmp.find(b"xref")
-        if xref_loc != -1:
-            startxref -= 10 - xref_loc
-            return startxref
-        # No explicit xref table, try finding a cross-reference stream.
-        stream.seek(startxref, 0)
-        for look in range(25):  # value extended to cope with more linearized files
-            if stream.read(1).isdigit():
-                # This is not a standard PDF, consider adding a warning
-                startxref += look
-                return startxref
-        # no xref table found at specified location
-        if "/Root" in self.trailer and not self.strict:
-            # if Root has been already found, just raise warning
-            logger_warning("Invalid parent xref., rebuild xref", __name__)
-            try:
-                self._rebuild_xref_table(stream)
-                return None
-            except Exception:
-                raise PdfReadError("can not rebuild xref")
-        raise PdfReadError("Could not find xref table at specified location")
-
-    def _read_pdf15_xref_stream(
-        self, stream: StreamType
-    ) -> Union[ContentStream, EncodedStreamObject, DecodedStreamObject]:
-        # PDF 1.5+ Cross-Reference Stream
-        stream.seek(-1, 1)
-        idnum, generation = self.read_object_header(stream)
-        xrefstream = cast(ContentStream, read_object(stream, self))
-        assert cast(str, xrefstream["/Type"]) == "/XRef"
-        self.cache_indirect_object(generation, idnum, xrefstream)
-        stream_data = BytesIO(b_(xrefstream.get_data()))
-        # Index pairs specify the subsections in the dictionary. If
-        # none create one subsection that spans everything.
-        idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
-        entry_sizes = cast(Dict[Any, Any], xrefstream.get("/W"))
-        assert len(entry_sizes) >= 3
-        if self.strict and len(entry_sizes) > 3:
-            raise PdfReadError(f"Too many entry sizes: {entry_sizes}")
-
-        def get_entry(i: int) -> Union[int, Tuple[int, ...]]:
-            # Reads the correct number of bytes for each entry. See the
-            # discussion of the W parameter in PDF spec table 17.
-            if entry_sizes[i] > 0:
-                d = stream_data.read(entry_sizes[i])
-                return convert_to_int(d, entry_sizes[i])
-
-            # PDF Spec Table 17: A value of zero for an element in the
-            # W array indicates...the default value shall be used
-            if i == 0:
-                return 1  # First value defaults to 1
-            else:
-                return 0
-
-        def used_before(num: int, generation: Union[int, Tuple[int, ...]]) -> bool:
-            # We move backwards through the xrefs, don't replace any.
-            return num in self.xref.get(generation, []) or num in self.xref_objStm  # type: ignore
-
-        # Iterate through each subsection
-        self._read_xref_subsections(idx_pairs, get_entry, used_before)
-        return xrefstream
-
-    @staticmethod
-    def _get_xref_issues(stream: StreamType, startxref: int) -> int:
-        """
-        Return an int which indicates an issue. 0 means there is no issue.
-
-        Args:
-            stream:
-            startxref:
-
-        Returns:
-            0 means no issue, other values represent specific issues.
-        """
-        stream.seek(startxref - 1, 0)  # -1 to check character before
-        line = stream.read(1)
-        if line == b"j":
-            line = stream.read(1)
-        if line not in b"\r\n \t":
-            return 1
-        line = stream.read(4)
-        if line != b"xref":
-            # not an xref so check if it is an XREF object
-            line = b""
-            while line in b"0123456789 \t":
-                line = stream.read(1)
-                if line == b"":
-                    return 2
-            line += stream.read(2)  # 1 char already read, +2 to check "obj"
-            if line.lower() != b"obj":
-                return 3
-        return 0
-
-    def _rebuild_xref_table(self, stream: StreamType) -> None:
-        self.xref = {}
-        stream.seek(0, 0)
-        f_ = stream.read(-1)
-
-        for m in re.finditer(rb"[\r\n \t][ \t]*(\d+)[ \t]+(\d+)[ \t]+obj", f_):
-            idnum = int(m.group(1))
-            generation = int(m.group(2))
-            if generation not in self.xref:
-                self.xref[generation] = {}
-            self.xref[generation][idnum] = m.start(1)
-        stream.seek(0, 0)
-        for m in re.finditer(rb"[\r\n \t][ \t]*trailer[\r\n \t]*(<<)", f_):
-            stream.seek(m.start(1), 0)
-            new_trailer = cast(Dict[Any, Any], read_object(stream, self))
-            # Here, we are parsing the file from start to end, the new data have to erase the existing.
-            for key, value in list(new_trailer.items()):
-                self.trailer[key] = value
-
-    def _read_xref_subsections(
-        self,
-        idx_pairs: List[int],
-        get_entry: Callable[[int], Union[int, Tuple[int, ...]]],
-        used_before: Callable[[int, Union[int, Tuple[int, ...]]], bool],
-    ) -> None:
-        for start, size in self._pairs(idx_pairs):
-            # The subsections must increase
-            for num in range(start, start + size):
-                # The first entry is the type
-                xref_type = get_entry(0)
-                # The rest of the elements depend on the xref_type
-                if xref_type == 0:
-                    # linked list of free objects
-                    next_free_object = get_entry(1)  # noqa: F841
-                    next_generation = get_entry(2)  # noqa: F841
-                elif xref_type == 1:
-                    # objects that are in use but are not compressed
-                    byte_offset = get_entry(1)
-                    generation = get_entry(2)
-                    if generation not in self.xref:
-                        self.xref[generation] = {}  # type: ignore
-                    if not used_before(num, generation):
-                        self.xref[generation][num] = byte_offset  # type: ignore
-                elif xref_type == 2:
-                    # compressed objects
-                    objstr_num = get_entry(1)
-                    obstr_idx = get_entry(2)
-                    generation = 0  # PDF spec table 18, generation is 0
-                    if not used_before(num, generation):
-                        self.xref_objStm[num] = (objstr_num, obstr_idx)
-                elif self.strict:
-                    raise PdfReadError(f"Unknown xref type: {xref_type}")
-
-    def _pairs(self, array: List[int]) -> Iterable[Tuple[int, int]]:
-        i = 0
-        while True:
-            yield array[i], array[i + 1]
-            i += 2
-            if (i + 1) >= len(array):
-                break
-
-    def read_next_end_line(
-        self, stream: StreamType, limit_offset: int = 0
-    ) -> bytes:  # deprecated
-        """.. deprecated:: 2.1.0"""
-        deprecate_no_replacement("read_next_end_line", removed_in="4.0.0")
-        line_parts = []
-        while True:
-            # Prevent infinite loops in malformed PDFs
-            if stream.tell() == 0 or stream.tell() == limit_offset:
-                raise PdfReadError("Could not read malformed PDF file")
-            x = stream.read(1)
-            if stream.tell() < 2:
-                raise PdfReadError("EOL marker not found")
-            stream.seek(-2, 1)
-            if x in (b"\n", b"\r"):  # \n = LF; \r = CR
-                crlf = False
-                while x in (b"\n", b"\r"):
-                    x = stream.read(1)
-                    if x in (b"\n", b"\r"):  # account for CR+LF
-                        stream.seek(-1, 1)
-                        crlf = True
-                    if stream.tell() < 2:
-                        raise PdfReadError("EOL marker not found")
-                    stream.seek(-2, 1)
-                stream.seek(
-                    2 if crlf else 1, 1
-                )  # if using CR+LF, go back 2 bytes, else 1
-                break
-            else:
-                line_parts.append(x)
-        line_parts.reverse()
-        return b"".join(line_parts)
-
-    def readNextEndLine(
-        self, stream: StreamType, limit_offset: int = 0
-    ) -> bytes:  # deprecated
-        """.. deprecated:: 1.28.0"""
-        deprecation_no_replacement("readNextEndLine", "3.0.0")
-        return self.read_next_end_line(stream, limit_offset)
-
-    def decrypt(self, password: Union[str, bytes]) -> PasswordType:
-        """
-        When using an encrypted / secured PDF file with the PDF Standard
-        encryption handler, this function will allow the file to be decrypted.
-        It checks the given password against the document's user password and
-        owner password, and then stores the resulting decryption key if either
-        password is correct.
-
-        It does not matter which password was matched.  Both passwords provide
-        the correct decryption key that will allow the document to be used with
-        this library.
-
-        Args:
-            password: The password to match.
-
-        Returns:
-            An indicator if the document was decrypted and weather it was the
-            owner password or the user password.
-        """
-        if not self._encryption:
-            raise PdfReadError("Not encrypted file")
-        # TODO: raise Exception for wrong password
-        return self._encryption.verify(password)
-
-    def decode_permissions(self, permissions_code: int) -> Dict[str, bool]:
-        # Takes the permissions as an integer, returns the allowed access
-        permissions = {}
-        permissions["print"] = permissions_code & (1 << 3 - 1) != 0  # bit 3
-        permissions["modify"] = permissions_code & (1 << 4 - 1) != 0  # bit 4
-        permissions["copy"] = permissions_code & (1 << 5 - 1) != 0  # bit 5
-        permissions["annotations"] = permissions_code & (1 << 6 - 1) != 0  # bit 6
-        permissions["forms"] = permissions_code & (1 << 9 - 1) != 0  # bit 9
-        permissions["accessability"] = permissions_code & (1 << 10 - 1) != 0  # bit 10
-        permissions["assemble"] = permissions_code & (1 << 11 - 1) != 0  # bit 11
-        permissions["print_high_quality"] = (
-            permissions_code & (1 << 12 - 1) != 0
-        )  # bit 12
-        return permissions
-
-    @property
-    def is_encrypted(self) -> bool:
-        """
-        Read-only boolean property showing whether this PDF file is encrypted.
-
-        Note that this property, if true, will remain true even after the
-        :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called.
-        """
-        return TK.ENCRYPT in self.trailer
-
-    def getIsEncrypted(self) -> bool:  # deprecated
-        """
-        Use :py:attr:`is_encrypted` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getIsEncrypted", "is_encrypted", "3.0.0")
-        return self.is_encrypted
-
-    @property
-    def isEncrypted(self) -> bool:  # deprecated
-        """
-        Use :py:attr:`is_encrypted` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("isEncrypted", "is_encrypted", "3.0.0")
-        return self.is_encrypted
-
-    @property
-    def xfa(self) -> Optional[Dict[str, Any]]:
-        tree: Optional[TreeObject] = None
-        retval: Dict[str, Any] = {}
-        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-
-        if "/AcroForm" not in catalog or not catalog["/AcroForm"]:
-            return None
-
-        tree = cast(TreeObject, catalog["/AcroForm"])
-
-        if "/XFA" in tree:
-            fields = cast(ArrayObject, tree["/XFA"])
-            i = iter(fields)
-            for f in i:
-                tag = f
-                f = next(i)
-                if isinstance(f, IndirectObject):
-                    field = cast(Optional[EncodedStreamObject], f.get_object())
-                    if field:
-                        es = zlib.decompress(b_(field._data))
-                        retval[tag] = es
-        return retval
-
-    def add_form_topname(self, name: str) -> Optional[DictionaryObject]:
-        """
-        Add a top level form that groups all form fields below it.
-
-        Args:
-            name: text string of the "/T" Attribute of the created object
-
-        Returns:
-            The created object. ``None`` means no object was created.
-        """
-        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-
-        if "/AcroForm" not in catalog or not isinstance(
-            catalog["/AcroForm"], DictionaryObject
-        ):
-            return None
-        acroform = cast(DictionaryObject, catalog[NameObject("/AcroForm")])
-        if "/Fields" not in acroform:
-            # TODO: :No error returns but may be extended for XFA Forms
-            return None
-
-        interim = DictionaryObject()
-        interim[NameObject("/T")] = TextStringObject(name)
-        interim[NameObject("/Kids")] = acroform[NameObject("/Fields")]
-        self.cache_indirect_object(
-            0,
-            max([i for (g, i) in self.resolved_objects if g == 0]) + 1,
-            interim,
-        )
-        arr = ArrayObject()
-        arr.append(interim.indirect_reference)
-        acroform[NameObject("/Fields")] = arr
-        for o in cast(ArrayObject, interim["/Kids"]):
-            obj = o.get_object()
-            if "/Parent" in obj:
-                logger_warning(
-                    f"Top Level Form Field {obj.indirect_reference} have a non-expected parent",
-                    __name__,
-                )
-            obj[NameObject("/Parent")] = interim.indirect_reference
-        return interim
-
-    def rename_form_topname(self, name: str) -> Optional[DictionaryObject]:
-        """
-        Rename top level form field that all form fields below it.
-
-        Args:
-            name: text string of the "/T" field of the created object
-
-        Returns:
-            The modified object. ``None`` means no object was modified.
-        """
-        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-
-        if "/AcroForm" not in catalog or not isinstance(
-            catalog["/AcroForm"], DictionaryObject
-        ):
-            return None
-        acroform = cast(DictionaryObject, catalog[NameObject("/AcroForm")])
-        if "/Fields" not in acroform:
-            return None
-
-        interim = cast(
-            DictionaryObject,
-            cast(ArrayObject, acroform[NameObject("/Fields")])[0].get_object(),
-        )
-        interim[NameObject("/T")] = TextStringObject(name)
-        return interim
-
-    def _get_embedded_files_root(self) -> Optional[NameTree]:
-        """
-        Returns the EmbeddedFiles root as a NameTree Object
-        if the root does not exists, return None
-        """
-        catalog = cast(DictionaryObject, self.trailer["/Root"])
-        if "/Names" not in catalog:
-            return None
-        ef = cast(DictionaryObject, catalog["/Names"]).get("/EmbeddedFiles", None)
-        if ef is None:
-            return None
-        efo = ef.get_object()
-        # not for reader
-        """
-            if not isinstance(efo,NameTree):
-            if isinstance(ef,IndirectObject):
-                ef.replace_object(efo)
-            else:
-                cast(DictionaryObject,catalog["/Names"])[
-                    NameObject("/EmbeddedFiles")] = NameTree(efo)
-        """
-        return NameTree(efo)
-
-    @property
-    def embedded_files(self) -> Optional[Mapping[str, List[PdfObject]]]:
-        ef = self._get_embedded_files_root()
-        if ef:
-            return ef.list_items()
-        else:
-            return None
-
-    @property
-    def attachments(self) -> Mapping[str, List[bytes]]:
-        ef = self._get_embedded_files_root()
-        if ef:
-            d = {}
-            for k, v in ef.list_items().items():
-                if isinstance(v, list):
-                    d[k] = [e["/EF"]["/F"].get_data() for e in v]  # type: ignore
-            return d
-        else:
-            return {}
-
-    def _list_attachments(self) -> List[str]:
-        """
-        Retrieves the list of filenames of file attachments.
-
-        Returns:
-            list of filenames
-        """
-        ef = self._get_embedded_files_root()
-        if ef:
-            lst = ef.list_keys()
-        else:
-            lst = []
-        """
-        for ip, p in enumerate(self.pages):
-            for a in [_a.get_object()
-                      for _a in p.get("/Annots",[])]:
-                if _a.get_object().get("/Subtype","") != "/FileAttachements":
-                    continue
-                lst.append(f"$page_{ip}.{get_name_from_file_specification(_a)}")
-        """
-        return lst
-
-    def _get_attachment_list(self, name: str) -> List[bytes]:
-        out = self._get_attachments(name)[name]
-        if isinstance(out, list):
-            return out
-        return [out]
-
-    def _get_attachments(
-        self, filename: Optional[str] = None
-    ) -> Dict[str, Union[bytes, List[bytes]]]:
-        """
-        Retrieves all or selected file attachments of the PDF as a dictionary of file names
-        and the file data as a bytestring.
-
-        Args:
-            filename: If filename is None, then a dictionary of all attachments
-                will be returned, where the key is the filename and the value
-                is the content. Otherwise, a dictionary with just a single key
-                - the filename - and its content will be returned.
-
-        Returns:
-            dictionary of filename -> Union[bytestring or List[ByteString]]
-            if the filename exists multiple times a List of the different version will be provided
-        """
-        ef = self._get_embedded_files_root()
-        if ef is None:
-            return {}
-        if filename is None:
-            return {k: v if len(v) > 1 else v[0] for k, v in self.attachments.items()}
-        else:
-            lst = ef.list_get(filename)
-            return {
-                filename: [x["/EF"]["/F"].get_data() for x in lst]  # type: ignore
-                if isinstance(lst, list)
-                else lst["/EF"]["/F"].get_data()  # type: ignore
-            }
-
-
-class PdfFileReader(PdfReader):  # deprecated
-    def __init__(self, *args: Any, **kwargs: Any) -> None:
-        deprecation_with_replacement("PdfFileReader", "PdfReader", "3.0.0")
-        if "strict" not in kwargs and len(args) < 2:
-            kwargs["strict"] = True  # maintain the default
-        super().__init__(*args, **kwargs)
+# Copyright (c) 2006, Mathieu Fenniak
+# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import re
+import struct
+import zlib
+from datetime import datetime
+from io import BytesIO, UnsupportedOperation
+from pathlib import Path
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+    Union,
+    cast,
+)
+
+from ._encryption import Encryption, PasswordType
+from ._page import PageObject, _VirtualList
+from ._page_labels import index2label as page_index2page_label
+from ._utils import (
+    StrByteType,
+    StreamType,
+    b_,
+    deprecate_no_replacement,
+    deprecation_no_replacement,
+    deprecation_with_replacement,
+    logger_warning,
+    parse_iso8824_date,
+    read_non_whitespace,
+    read_previous_line,
+    read_until_whitespace,
+    skip_over_comment,
+    skip_over_whitespace,
+)
+from .constants import CatalogAttributes as CA
+from .constants import CatalogDictionary as CD
+from .constants import (
+    CheckboxRadioButtonAttributes,
+    GoToActionArguments,
+)
+from .constants import Core as CO
+from .constants import DocumentInformationAttributes as DI
+from .constants import FieldDictionaryAttributes as FA
+from .constants import PageAttributes as PG
+from .constants import PagesAttributes as PA
+from .constants import TrailerKeys as TK
+from .errors import (
+    EmptyFileError,
+    FileNotDecryptedError,
+    PdfReadError,
+    PdfStreamError,
+    WrongPasswordError,
+)
+from .generic import (
+    ArrayObject,
+    BooleanObject,
+    ContentStream,
+    DecodedStreamObject,
+    Destination,
+    DictionaryObject,
+    EncodedStreamObject,
+    Field,
+    Fit,
+    FloatObject,
+    IndirectObject,
+    NameObject,
+    NameTree,
+    NullObject,
+    NumberObject,
+    PdfObject,
+    TextStringObject,
+    TreeObject,
+    ViewerPreferences,
+    read_object,
+)
+from .types import OutlineType, PagemodeType
+from .xmp import XmpInformation
+
+
+def convert_to_int(d: bytes, size: int) -> Union[int, Tuple[Any, ...]]:
+    if size > 8:
+        raise PdfReadError("invalid size in convert_to_int")
+    d = b"\x00\x00\x00\x00\x00\x00\x00\x00" + d
+    d = d[-8:]
+    return struct.unpack(">q", d)[0]
+
+
+def convertToInt(d: bytes, size: int) -> Union[int, Tuple[Any, ...]]:  # deprecated
+    deprecation_with_replacement("convertToInt", "convert_to_int")
+    return convert_to_int(d, size)
+
+
+class DocumentInformation(DictionaryObject):
+    """
+    A class representing the basic document metadata provided in a PDF File.
+    This class is accessible through
+    :py:class:`PdfReader.metadata<pypdf.PdfReader.metadata>`.
+
+    All text properties of the document metadata have
+    *two* properties, eg. author and author_raw. The non-raw property will
+    always return a ``TextStringObject``, making it ideal for a case where
+    the metadata is being displayed. The raw property can sometimes return
+    a ``ByteStringObject``, if pypdf was unable to decode the string's
+    text encoding; this requires additional safety in the caller and
+    therefore is not as commonly accessed.
+    """
+
+    def __init__(self) -> None:
+        DictionaryObject.__init__(self)
+
+    def _get_text(self, key: str) -> Optional[str]:
+        retval = self.get(key, None)
+        if isinstance(retval, TextStringObject):
+            return retval
+        return None
+
+    def getText(self, key: str) -> Optional[str]:  # deprecated
+        """
+        Use the attributes (e.g. :py:attr:`title` / :py:attr:`author`).
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_no_replacement("getText", "3.0.0")
+        return self._get_text(key)
+
+    @property
+    def title(self) -> Optional[str]:
+        """
+        Read-only property accessing the document's title.
+
+        Returns a ``TextStringObject`` or ``None`` if the title is not
+        specified.
+        """
+        return (
+            self._get_text(DI.TITLE) or self.get(DI.TITLE).get_object()  # type: ignore
+            if self.get(DI.TITLE)
+            else None
+        )
+
+    @property
+    def title_raw(self) -> Optional[str]:
+        """The "raw" version of title; can return a ``ByteStringObject``."""
+        return self.get(DI.TITLE)
+
+    @property
+    def author(self) -> Optional[str]:
+        """
+        Read-only property accessing the document's author.
+
+        Returns a ``TextStringObject`` or ``None`` if the author is not
+        specified.
+        """
+        return self._get_text(DI.AUTHOR)
+
+    @property
+    def author_raw(self) -> Optional[str]:
+        """The "raw" version of author; can return a ``ByteStringObject``."""
+        return self.get(DI.AUTHOR)
+
+    @property
+    def subject(self) -> Optional[str]:
+        """
+        Read-only property accessing the document's subject.
+
+        Returns a ``TextStringObject`` or ``None`` if the subject is not
+        specified.
+        """
+        return self._get_text(DI.SUBJECT)
+
+    @property
+    def subject_raw(self) -> Optional[str]:
+        """The "raw" version of subject; can return a ``ByteStringObject``."""
+        return self.get(DI.SUBJECT)
+
+    @property
+    def creator(self) -> Optional[str]:
+        """
+        Read-only property accessing the document's creator.
+
+        If the document was converted to PDF from another format, this is the
+        name of the application (e.g. OpenOffice) that created the original
+        document from which it was converted. Returns a ``TextStringObject`` or
+        ``None`` if the creator is not specified.
+        """
+        return self._get_text(DI.CREATOR)
+
+    @property
+    def creator_raw(self) -> Optional[str]:
+        """The "raw" version of creator; can return a ``ByteStringObject``."""
+        return self.get(DI.CREATOR)
+
+    @property
+    def producer(self) -> Optional[str]:
+        """
+        Read-only property accessing the document's producer.
+
+        If the document was converted to PDF from another format, this is the
+        name of the application (for example, OSX Quartz) that converted it to
+        PDF. Returns a ``TextStringObject`` or ``None`` if the producer is not
+        specified.
+        """
+        return self._get_text(DI.PRODUCER)
+
+    @property
+    def producer_raw(self) -> Optional[str]:
+        """The "raw" version of producer; can return a ``ByteStringObject``."""
+        return self.get(DI.PRODUCER)
+
+    @property
+    def creation_date(self) -> Optional[datetime]:
+        """Read-only property accessing the document's creation date."""
+        return parse_iso8824_date(self._get_text(DI.CREATION_DATE))
+
+    @property
+    def creation_date_raw(self) -> Optional[str]:
+        """
+        The "raw" version of creation date; can return a ``ByteStringObject``.
+
+        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
+        is the offset from UTC.
+        """
+        return self.get(DI.CREATION_DATE)
+
+    @property
+    def modification_date(self) -> Optional[datetime]:
+        """
+        Read-only property accessing the document's modification date.
+
+        The date and time the document was most recently modified.
+        """
+        return parse_iso8824_date(self._get_text(DI.MOD_DATE))
+
+    @property
+    def modification_date_raw(self) -> Optional[str]:
+        """
+        The "raw" version of modification date; can return a
+        ``ByteStringObject``.
+
+        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
+        is the offset from UTC.
+        """
+        return self.get(DI.MOD_DATE)
+
+
+class PdfReader:
+    """
+    Initialize a PdfReader object.
+
+    This operation can take some time, as the PDF stream's cross-reference
+    tables are read into memory.
+
+    Args:
+        stream: A File object or an object that supports the standard read
+            and seek methods similar to a File object. Could also be a
+            string representing a path to a PDF file.
+        strict: Determines whether user should be warned of all
+            problems and also causes some correctable problems to be fatal.
+            Defaults to ``False``.
+        password: Decrypt PDF file at initialization. If the
+            password is None, the file will not be decrypted.
+            Defaults to ``None``
+    """
+
+    @property
+    def viewer_preferences(self) -> Optional[ViewerPreferences]:
+        """Returns the existing ViewerPreferences as an overloaded dictionary."""
+        o = cast(DictionaryObject, self.trailer["/Root"]).get(
+            CD.VIEWER_PREFERENCES, None
+        )
+        if o is None:
+            return None
+        o = o.get_object()
+        if not isinstance(o, ViewerPreferences):
+            o = ViewerPreferences(o)
+        return o
+
+    def __init__(
+        self,
+        stream: Union[StrByteType, Path],
+        strict: bool = False,
+        password: Union[None, str, bytes] = None,
+    ) -> None:
+        self.strict = strict
+        self.flattened_pages: Optional[List[PageObject]] = None
+        self.resolved_objects: Dict[Tuple[Any, Any], Optional[PdfObject]] = {}
+        self.xref_index = 0
+        self._page_id2num: Optional[
+            Dict[Any, Any]
+        ] = None  # map page indirect_reference number to Page Number
+        if hasattr(stream, "mode") and "b" not in stream.mode:  # type: ignore
+            logger_warning(
+                "PdfReader stream/file object is not in binary mode. "
+                "It may not be read correctly.",
+                __name__,
+            )
+        if isinstance(stream, (str, Path)):
+            with open(stream, "rb") as fh:
+                stream = BytesIO(fh.read())
+        self.read(stream)
+        self.stream = stream
+
+        self._override_encryption = False
+        self._encryption: Optional[Encryption] = None
+        if self.is_encrypted:
+            self._override_encryption = True
+            # Some documents may not have a /ID, use two empty
+            # byte strings instead. Solves
+            # https://github.com/py-pdf/pypdf/issues/608
+            id_entry = self.trailer.get(TK.ID)
+            id1_entry = id_entry[0].get_object().original_bytes if id_entry else b""
+            encrypt_entry = cast(
+                DictionaryObject, self.trailer[TK.ENCRYPT].get_object()
+            )
+            self._encryption = Encryption.read(encrypt_entry, id1_entry)
+
+            # try empty password if no password provided
+            pwd = password if password is not None else b""
+            if (
+                self._encryption.verify(pwd) == PasswordType.NOT_DECRYPTED
+                and password is not None
+            ):
+                # raise if password provided
+                raise WrongPasswordError("Wrong password")
+            self._override_encryption = False
+        elif password is not None:
+            raise PdfReadError("Not encrypted file")
+
+    @property
+    def pdf_header(self) -> str:
+        """
+        The first 8 bytes of the file.
+
+        This is typically something like ``'%PDF-1.6'`` and can be used to
+        detect if the file is actually a PDF file and which version it is.
+        """
+        # TODO: Make this return a bytes object for consistency
+        #       but that needs a deprecation
+        loc = self.stream.tell()
+        self.stream.seek(0, 0)
+        pdf_file_version = self.stream.read(8).decode("utf-8", "backslashreplace")
+        self.stream.seek(loc, 0)  # return to where it was
+        return pdf_file_version
+
+    @property
+    def metadata(self) -> Optional[DocumentInformation]:
+        """
+        Retrieve the PDF file's document information dictionary, if it exists.
+
+        Note that some PDF files use metadata streams instead of docinfo
+        dictionaries, and these metadata streams will not be accessed by this
+        function.
+        """
+        if TK.INFO not in self.trailer:
+            return None
+        obj = self.trailer[TK.INFO]
+        retval = DocumentInformation()
+        if isinstance(obj, type(None)):
+            raise PdfReadError(
+                "trailer not found or does not point to document information directory"
+            )
+        retval.update(obj)  # type: ignore
+        return retval
+
+    def getDocumentInfo(self) -> Optional[DocumentInformation]:  # deprecated
+        """
+        Use the attribute :py:attr:`metadata` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getDocumentInfo", "metadata", "3.0.0")
+        return self.metadata
+
+    @property
+    def documentInfo(self) -> Optional[DocumentInformation]:  # deprecated
+        """
+        Use the attribute :py:attr:`metadata` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("documentInfo", "metadata", "3.0.0")
+        return self.metadata
+
+    @property
+    def xmp_metadata(self) -> Optional[XmpInformation]:
+        """XMP (Extensible Metadata Platform) data."""
+        try:
+            self._override_encryption = True
+            return self.trailer[TK.ROOT].xmp_metadata  # type: ignore
+        finally:
+            self._override_encryption = False
+
+    def getXmpMetadata(self) -> Optional[XmpInformation]:  # deprecated
+        """
+        Use the attribute :py:attr:`metadata` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getXmpMetadata", "xmp_metadata", "3.0.0")
+        return self.xmp_metadata
+
+    @property
+    def xmpMetadata(self) -> Optional[XmpInformation]:  # deprecated
+        """
+        Use the attribute :py:attr:`xmp_metadata` instead.
+
+        .. deprecated:: 1.28.0.
+        """
+        deprecation_with_replacement("xmpMetadata", "xmp_metadata", "3.0.0")
+        return self.xmp_metadata
+
+    def _get_num_pages(self) -> int:
+        """
+        Calculate the number of pages in this PDF file.
+
+        Returns:
+            The number of pages of the parsed PDF file
+
+        Raises:
+            PdfReadError: if file is encrypted and restrictions prevent
+                this action.
+        """
+        # Flattened pages will not work on an Encrypted PDF;
+        # the PDF file's page count is used in this case. Otherwise,
+        # the original method (flattened page count) is used.
+        if self.is_encrypted:
+            return self.trailer[TK.ROOT]["/Pages"]["/Count"]  # type: ignore
+        else:
+            if self.flattened_pages is None:
+                self._flatten()
+            return len(self.flattened_pages)  # type: ignore
+
+    def getNumPages(self) -> int:  # deprecated
+        """
+        Use :code:`len(reader.pages)` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("reader.getNumPages", "len(reader.pages)", "3.0.0")
+        return self._get_num_pages()
+
+    @property
+    def numPages(self) -> int:  # deprecated
+        """
+        Use :code:`len(reader.pages)` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("reader.numPages", "len(reader.pages)", "3.0.0")
+        return self._get_num_pages()
+
+    def getPage(self, pageNumber: int) -> PageObject:  # deprecated
+        """
+        Use :code:`reader.pages[page_number]` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement(
+            "reader.getPage(pageNumber)", "reader.pages[page_number]", "3.0.0"
+        )
+        return self._get_page(pageNumber)
+
+    def _get_page(self, page_number: int) -> PageObject:
+        """
+        Retrieve a page by number from this PDF file.
+
+        Args:
+            page_number: The page number to retrieve
+                (pages begin at zero)
+
+        Returns:
+            A :class:`PageObject<pypdf._page.PageObject>` instance.
+        """
+        if self.flattened_pages is None:
+            self._flatten()
+        assert self.flattened_pages is not None, "hint for mypy"
+        return self.flattened_pages[page_number]
+
+    @property
+    def namedDestinations(self) -> Dict[str, Any]:  # deprecated
+        """
+        Use :py:attr:`named_destinations` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("namedDestinations", "named_destinations", "3.0.0")
+        return self.named_destinations
+
+    @property
+    def named_destinations(self) -> Dict[str, Any]:
+        """
+        A read-only dictionary which maps names to
+        :class:`Destinations<pypdf.generic.Destination>`
+        """
+        return self._get_named_destinations()
+
+    # A select group of relevant field attributes. For the complete list,
+    # see section 8.6.2 of the PDF 1.7 reference.
+
+    def get_fields(
+        self,
+        tree: Optional[TreeObject] = None,
+        retval: Optional[Dict[Any, Any]] = None,
+        fileobj: Optional[Any] = None,
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Extract field data if this PDF contains interactive form fields.
+
+        The *tree* and *retval* parameters are for recursive use.
+
+        Args:
+            tree:
+            retval:
+            fileobj: A file object (usually a text file) to write
+                a report to on all interactive form fields found.
+
+        Returns:
+            A dictionary where each key is a field name, and each
+            value is a :class:`Field<pypdf.generic.Field>` object. By
+            default, the mapping name is used for keys.
+            ``None`` if form data could not be located.
+        """
+        field_attributes = FA.attributes_dict()
+        field_attributes.update(CheckboxRadioButtonAttributes.attributes_dict())
+        if retval is None:
+            retval = {}
+            catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+            # get the AcroForm tree
+            if CD.ACRO_FORM in catalog:
+                tree = cast(Optional[TreeObject], catalog[CD.ACRO_FORM])
+            else:
+                return None
+        if tree is None:
+            return retval
+        self._check_kids(tree, retval, fileobj)
+        for attr in field_attributes:
+            if attr in tree:
+                # Tree is a field
+                self._build_field(tree, retval, fileobj, field_attributes)
+                break
+
+        if "/Fields" in tree:
+            fields = cast(ArrayObject, tree["/Fields"])
+            for f in fields:
+                field = f.get_object()
+                self._build_field(field, retval, fileobj, field_attributes)
+
+        return retval
+
+    def getFields(
+        self,
+        tree: Optional[TreeObject] = None,
+        retval: Optional[Dict[Any, Any]] = None,
+        fileobj: Optional[Any] = None,
+    ) -> Optional[Dict[str, Any]]:  # deprecated
+        """
+        Use :meth:`get_fields` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getFields", "get_fields", "3.0.0")
+        return self.get_fields(tree, retval, fileobj)
+
+    def _get_qualified_field_name(self, parent: DictionaryObject) -> str:
+        if "/TM" in parent:
+            return cast(str, parent["/TM"])
+        elif "/Parent" in parent:
+            return (
+                self._get_qualified_field_name(
+                    cast(DictionaryObject, parent["/Parent"])
+                )
+                + "."
+                + cast(str, parent["/T"])
+            )
+        else:
+            return cast(str, parent["/T"])
+
+    def _build_field(
+        self,
+        field: Union[TreeObject, DictionaryObject],
+        retval: Dict[Any, Any],
+        fileobj: Any,
+        field_attributes: Any,
+    ) -> None:
+        self._check_kids(field, retval, fileobj)
+        try:
+            key = cast(str, field["/TM"])
+        except KeyError:
+            try:
+                if "/Parent" in field:
+                    key = (
+                        self._get_qualified_field_name(
+                            cast(DictionaryObject, field["/Parent"])
+                        )
+                        + "."
+                    )
+                else:
+                    key = ""
+                key += cast(str, field["/T"])
+            except KeyError:
+                # Ignore no-name field for now
+                return
+        if fileobj:
+            self._write_field(fileobj, field, field_attributes)
+            fileobj.write("\n")
+        retval[key] = Field(field)
+        obj = retval[key].indirect_reference.get_object()  # to get the full object
+        if obj.get(FA.FT, "") == "/Ch":
+            retval[key][NameObject("/_States_")] = obj[NameObject(FA.Opt)]
+        if obj.get(FA.FT, "") == "/Btn" and "/AP" in obj:
+            #  Checkbox
+            retval[key][NameObject("/_States_")] = ArrayObject(
+                list(obj["/AP"]["/N"].keys())
+            )
+            if "/Off" not in retval[key]["/_States_"]:
+                retval[key][NameObject("/_States_")].append(NameObject("/Off"))
+        elif obj.get(FA.FT, "") == "/Btn" and obj.get(FA.Ff, 0) & FA.FfBits.Radio != 0:
+            states = []
+            for k in obj.get(FA.Kids, {}):
+                k = k.get_object()
+                for s in list(k["/AP"]["/N"].keys()):
+                    if s not in states:
+                        states.append(s)
+                retval[key][NameObject("/_States_")] = ArrayObject(states)
+            if (
+                obj.get(FA.Ff, 0) & FA.FfBits.NoToggleToOff != 0
+                and "/Off" in retval[key]["/_States_"]
+            ):
+                del retval[key]["/_States_"][retval[key]["/_States_"].index("/Off")]
+
+    def _check_kids(
+        self, tree: Union[TreeObject, DictionaryObject], retval: Any, fileobj: Any
+    ) -> None:
+        if PA.KIDS in tree:
+            # recurse down the tree
+            for kid in tree[PA.KIDS]:  # type: ignore
+                self.get_fields(kid.get_object(), retval, fileobj)
+
+    def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None:
+        field_attributes_tuple = FA.attributes()
+        field_attributes_tuple = (
+            field_attributes_tuple + CheckboxRadioButtonAttributes.attributes()
+        )
+
+        for attr in field_attributes_tuple:
+            if attr in (
+                FA.Kids,
+                FA.AA,
+            ):
+                continue
+            attr_name = field_attributes[attr]
+            try:
+                if attr == FA.FT:
+                    # Make the field type value more clear
+                    types = {
+                        "/Btn": "Button",
+                        "/Tx": "Text",
+                        "/Ch": "Choice",
+                        "/Sig": "Signature",
+                    }
+                    if field[attr] in types:
+                        fileobj.write(f"{attr_name}: {types[field[attr]]}\n")
+                elif attr == FA.Parent:
+                    # Let's just write the name of the parent
+                    try:
+                        name = field[attr][FA.TM]
+                    except KeyError:
+                        name = field[attr][FA.T]
+                    fileobj.write(f"{attr_name}: {name}\n")
+                else:
+                    fileobj.write(f"{attr_name}: {field[attr]}\n")
+            except KeyError:
+                # Field attribute is N/A or unknown, so don't write anything
+                pass
+
+    def get_form_text_fields(self, full_qualified_name: bool = False) -> Dict[str, Any]:
+        """
+        Retrieve form fields from the document with textual data.
+
+        Args:
+            full_qualified_name: to get full name
+
+        Returns:
+            A dictionary. The key is the name of the form field,
+            the value is the content of the field.
+
+            If the document contains multiple form fields with the same name, the
+            second and following will get the suffix .2, .3, ...
+        """
+
+        def indexed_key(k: str, fields: dict) -> str:
+            if k not in fields:
+                return k
+            else:
+                return (
+                    k
+                    + "."
+                    + str(sum([1 for kk in fields if kk.startswith(k + ".")]) + 2)
+                )
+
+        # Retrieve document form fields
+        formfields = self.get_fields()
+        if formfields is None:
+            return {}
+        ff = {}
+        for field, value in formfields.items():
+            if value.get("/FT") == "/Tx":
+                if full_qualified_name:
+                    ff[field] = value.get("/V")
+                else:
+                    ff[indexed_key(cast(str, value["/T"]), ff)] = value.get("/V")
+        return ff
+
+    def getFormTextFields(self) -> Dict[str, Any]:  # deprecated
+        """
+        Use :meth:`get_form_text_fields` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement(
+            "getFormTextFields", "get_form_text_fields", "3.0.0"
+        )
+        return self.get_form_text_fields()
+
+    def _get_named_destinations(
+        self,
+        tree: Union[TreeObject, None] = None,
+        retval: Optional[Any] = None,
+    ) -> Dict[str, Any]:
+        """
+        Retrieve the named destinations present in the document.
+
+        Args:
+            tree:
+            retval:
+
+        Returns:
+            A dictionary which maps names to
+            :class:`Destinations<pypdf.generic.Destination>`.
+        """
+        if retval is None:
+            retval = {}
+            catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+
+            # get the name tree
+            if CA.DESTS in catalog:
+                tree = cast(TreeObject, catalog[CA.DESTS])
+            elif CA.NAMES in catalog:
+                names = cast(DictionaryObject, catalog[CA.NAMES])
+                if CA.DESTS in names:
+                    tree = cast(TreeObject, names[CA.DESTS])
+
+        if tree is None:
+            return retval
+
+        if PA.KIDS in tree:
+            # recurse down the tree
+            for kid in cast(ArrayObject, tree[PA.KIDS]):
+                self._get_named_destinations(kid.get_object(), retval)
+        # TABLE 3.33 Entries in a name tree node dictionary (PDF 1.7 specs)
+        elif CA.NAMES in tree:  # KIDS and NAMES are exclusives (PDF 1.7 specs p 162)
+            names = cast(DictionaryObject, tree[CA.NAMES])
+            i = 0
+            while i < len(names):
+                key = cast(str, names[i].get_object())
+                i += 1
+                if not isinstance(key, str):
+                    continue
+                try:
+                    value = names[i].get_object()
+                except IndexError:
+                    break
+                i += 1
+                if isinstance(value, DictionaryObject) and "/D" in value:
+                    value = value["/D"]
+                dest = self._build_destination(key, value)  # type: ignore
+                if dest is not None:
+                    retval[key] = dest
+        else:  # case where Dests is in root catalog (PDF 1.7 specs, §2 about PDF1.1
+            for k__, v__ in tree.items():
+                val = v__.get_object()
+                if isinstance(val, DictionaryObject):
+                    val = val["/D"].get_object()
+                dest = self._build_destination(k__, val)
+                if dest is not None:
+                    retval[k__] = dest
+        return retval
+
+    def getNamedDestinations(
+        self,
+        tree: Union[TreeObject, None] = None,
+        retval: Optional[Any] = None,
+    ) -> Dict[str, Any]:  # deprecated
+        """
+        Use :py:attr:`named_destinations` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement(
+            "getNamedDestinations", "named_destinations", "3.0.0"
+        )
+        return self._get_named_destinations(tree, retval)
+
+    @property
+    def outline(self) -> OutlineType:
+        """
+        Read-only property for the outline present in the document.
+
+        (i.e., a collection of 'outline items' which are also known as
+        'bookmarks')
+        """
+        return self._get_outline()
+
+    @property
+    def outlines(self) -> OutlineType:  # deprecated
+        """
+        Use :py:attr:`outline` instead.
+
+        .. deprecated:: 2.9.0
+        """
+        deprecation_with_replacement("outlines", "outline", "3.0.0")
+        return self.outline
+
+    def _get_outline(
+        self, node: Optional[DictionaryObject] = None, outline: Optional[Any] = None
+    ) -> OutlineType:
+        if outline is None:
+            outline = []
+            catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+
+            # get the outline dictionary and named destinations
+            if CO.OUTLINES in catalog:
+                lines = cast(DictionaryObject, catalog[CO.OUTLINES])
+
+                if isinstance(lines, NullObject):
+                    return outline
+
+                # TABLE 8.3 Entries in the outline dictionary
+                if lines is not None and "/First" in lines:
+                    node = cast(DictionaryObject, lines["/First"])
+            self._namedDests = self._get_named_destinations()
+
+        if node is None:
+            return outline
+
+        # see if there are any more outline items
+        while True:
+            outline_obj = self._build_outline_item(node)
+            if outline_obj:
+                outline.append(outline_obj)
+
+            # check for sub-outline
+            if "/First" in node:
+                sub_outline: List[Any] = []
+                self._get_outline(cast(DictionaryObject, node["/First"]), sub_outline)
+                if sub_outline:
+                    outline.append(sub_outline)
+
+            if "/Next" not in node:
+                break
+            node = cast(DictionaryObject, node["/Next"])
+
+        return outline
+
+    def getOutlines(
+        self, node: Optional[DictionaryObject] = None, outline: Optional[Any] = None
+    ) -> OutlineType:  # deprecated
+        """
+        Use :py:attr:`outline` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getOutlines", "outline", "3.0.0")
+        return self._get_outline(node, outline)
+
+    @property
+    def threads(self) -> Optional[ArrayObject]:
+        """
+        Read-only property for the list of threads.
+
+        See §8.3.2 from PDF 1.7 spec.
+
+        It's an array of dictionaries with "/F" and "/I" properties or
+        None if there are no articles.
+        """
+        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+        if CO.THREADS in catalog:
+            return cast("ArrayObject", catalog[CO.THREADS])
+        else:
+            return None
+
+    def _get_page_number_by_indirect(
+        self, indirect_reference: Union[None, int, NullObject, IndirectObject]
+    ) -> int:
+        """
+        Generate _page_id2num.
+
+        Args:
+            indirect_reference:
+
+        Returns:
+            The page number.
+        """
+        if self._page_id2num is None:
+            self._page_id2num = {
+                x.indirect_reference.idnum: i for i, x in enumerate(self.pages)  # type: ignore
+            }
+
+        if indirect_reference is None or isinstance(indirect_reference, NullObject):
+            return -1
+        if isinstance(indirect_reference, int):
+            idnum = indirect_reference
+        else:
+            idnum = indirect_reference.idnum
+        assert self._page_id2num is not None, "hint for mypy"
+        ret = self._page_id2num.get(idnum, -1)
+        return ret
+
+    def get_page_number(self, page: PageObject) -> int:
+        """
+        Retrieve page number of a given PageObject.
+
+        Args:
+            page: The page to get page number. Should be
+                an instance of :class:`PageObject<pypdf._page.PageObject>`
+
+        Returns:
+            The page number or -1 if page is not found
+        """
+        return self._get_page_number_by_indirect(page.indirect_reference)
+
+    def getPageNumber(self, page: PageObject) -> int:  # deprecated
+        """
+        Use :meth:`get_page_number` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getPageNumber", "get_page_number", "3.0.0")
+        return self.get_page_number(page)
+
+    def get_destination_page_number(self, destination: Destination) -> int:
+        """
+        Retrieve page number of a given Destination object.
+
+        Args:
+            destination: The destination to get page number.
+
+        Returns:
+            The page number or -1 if page is not found
+        """
+        return self._get_page_number_by_indirect(destination.page)
+
+    def getDestinationPageNumber(self, destination: Destination) -> int:  # deprecated
+        """
+        Use :meth:`get_destination_page_number` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement(
+            "getDestinationPageNumber", "get_destination_page_number", "3.0.0"
+        )
+        return self.get_destination_page_number(destination)
+
+    def _build_destination(
+        self,
+        title: str,
+        array: Optional[
+            List[
+                Union[NumberObject, IndirectObject, None, NullObject, DictionaryObject]
+            ]
+        ],
+    ) -> Destination:
+        page, typ = None, None
+        # handle outline items with missing or invalid destination
+        if (
+            isinstance(array, (NullObject, str))
+            or (isinstance(array, ArrayObject) and len(array) == 0)
+            or array is None
+        ):
+            page = NullObject()
+            return Destination(title, page, Fit.fit())
+        else:
+            page, typ = array[0:2]  # type: ignore
+            array = array[2:]
+            try:
+                return Destination(title, page, Fit(fit_type=typ, fit_args=array))  # type: ignore
+            except PdfReadError:
+                logger_warning(f"Unknown destination: {title} {array}", __name__)
+                if self.strict:
+                    raise
+                # create a link to first Page
+                tmp = self.pages[0].indirect_reference
+                indirect_reference = NullObject() if tmp is None else tmp
+                return Destination(title, indirect_reference, Fit.fit())  # type: ignore
+
+    def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
+        dest, title, outline_item = None, None, None
+
+        # title required for valid outline
+        # PDF Reference 1.7: TABLE 8.4 Entries in an outline item dictionary
+        try:
+            title = cast("str", node["/Title"])
+        except KeyError:
+            if self.strict:
+                raise PdfReadError(f"Outline Entry Missing /Title attribute: {node!r}")
+            title = ""  # type: ignore
+
+        if "/A" in node:
+            # Action, PDFv1.7 Section 12.6 (only type GoTo supported)
+            action = cast(DictionaryObject, node["/A"])
+            action_type = cast(NameObject, action[GoToActionArguments.S])
+            if action_type == "/GoTo":
+                dest = action[GoToActionArguments.D]
+        elif "/Dest" in node:
+            # Destination, PDFv1.7 Section 12.3.2
+            dest = node["/Dest"]
+            # if array was referenced in another object, will be a dict w/ key "/D"
+            if isinstance(dest, DictionaryObject) and "/D" in dest:
+                dest = dest["/D"]
+
+        if isinstance(dest, ArrayObject):
+            outline_item = self._build_destination(title, dest)
+        elif isinstance(dest, str):
+            # named destination, addresses NameObject Issue #193
+            # TODO : keep named destination instead of replacing it ?
+            try:
+                outline_item = self._build_destination(
+                    title, self._namedDests[dest].dest_array
+                )
+            except KeyError:
+                # named destination not found in Name Dict
+                outline_item = self._build_destination(title, None)
+        elif dest is None:
+            # outline item not required to have destination or action
+            # PDFv1.7 Table 153
+            outline_item = self._build_destination(title, dest)
+        else:
+            if self.strict:
+                raise PdfReadError(f"Unexpected destination {dest!r}")
+            else:
+                logger_warning(
+                    f"Removed unexpected destination {dest!r} from destination",
+                    __name__,
+                )
+            outline_item = self._build_destination(title, None)  # type: ignore
+
+        # if outline item created, add color, format, and child count if present
+        if outline_item:
+            if "/C" in node:
+                # Color of outline item font in (R, G, B) with values ranging 0.0-1.0
+                outline_item[NameObject("/C")] = ArrayObject(FloatObject(c) for c in node["/C"])  # type: ignore
+            if "/F" in node:
+                # specifies style characteristics bold and/or italic
+                # with 1=italic, 2=bold, 3=both
+                outline_item[NameObject("/F")] = node["/F"]
+            if "/Count" in node:
+                # absolute value = num. visible children
+                # with positive = open/unfolded, negative = closed/folded
+                outline_item[NameObject("/Count")] = node["/Count"]
+            #  if count is 0 we will consider it as open ( in order to have always an is_open to simplify
+            outline_item[NameObject("/%is_open%")] = BooleanObject(
+                node.get("/Count", 0) >= 0
+            )
+        outline_item.node = node
+        try:
+            outline_item.indirect_reference = node.indirect_reference
+        except AttributeError:
+            pass
+        return outline_item
+
+    @property
+    def pages(self) -> List[PageObject]:
+        """Read-only property that emulates a list of :py:class:`Page<pypdf._page.Page>` objects."""
+        return _VirtualList(self._get_num_pages, self._get_page)  # type: ignore
+
+    @property
+    def page_labels(self) -> List[str]:
+        """
+        A list of labels for the pages in this document.
+
+        This property is read-only. The labels are in the order that the pages
+        appear in the document.
+        """
+        return [page_index2page_label(self, i) for i in range(len(self.pages))]
+
+    @property
+    def page_layout(self) -> Optional[str]:
+        """
+        Get the page layout currently being used.
+
+        .. list-table:: Valid ``layout`` values
+           :widths: 50 200
+
+           * - /NoLayout
+             - Layout explicitly not specified
+           * - /SinglePage
+             - Show one page at a time
+           * - /OneColumn
+             - Show one column at a time
+           * - /TwoColumnLeft
+             - Show pages in two columns, odd-numbered pages on the left
+           * - /TwoColumnRight
+             - Show pages in two columns, odd-numbered pages on the right
+           * - /TwoPageLeft
+             - Show two pages at a time, odd-numbered pages on the left
+           * - /TwoPageRight
+             - Show two pages at a time, odd-numbered pages on the right
+        """
+        trailer = cast(DictionaryObject, self.trailer[TK.ROOT])
+        if CD.PAGE_LAYOUT in trailer:
+            return cast(NameObject, trailer[CD.PAGE_LAYOUT])
+        return None
+
+    def getPageLayout(self) -> Optional[str]:  # deprecated
+        """
+        Use :py:attr:`page_layout` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getPageLayout", "page_layout", "3.0.0")
+        return self.page_layout
+
+    @property
+    def pageLayout(self) -> Optional[str]:  # deprecated
+        """
+        Use :py:attr:`page_layout` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("pageLayout", "page_layout", "3.0.0")
+        return self.page_layout
+
+    @property
+    def page_mode(self) -> Optional[PagemodeType]:
+        """
+        Get the page mode currently being used.
+
+        .. list-table:: Valid ``mode`` values
+           :widths: 50 200
+
+           * - /UseNone
+             - Do not show outline or thumbnails panels
+           * - /UseOutlines
+             - Show outline (aka bookmarks) panel
+           * - /UseThumbs
+             - Show page thumbnails panel
+           * - /FullScreen
+             - Fullscreen view
+           * - /UseOC
+             - Show Optional Content Group (OCG) panel
+           * - /UseAttachments
+             - Show attachments panel
+        """
+        try:
+            return self.trailer[TK.ROOT]["/PageMode"]  # type: ignore
+        except KeyError:
+            return None
+
+    def getPageMode(self) -> Optional[PagemodeType]:  # deprecated
+        """
+        Use :py:attr:`page_mode` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getPageMode", "page_mode", "3.0.0")
+        return self.page_mode
+
+    @property
+    def pageMode(self) -> Optional[PagemodeType]:  # deprecated
+        """
+        Use :py:attr:`page_mode` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("pageMode", "page_mode", "3.0.0")
+        return self.page_mode
+
+    def _flatten(
+        self,
+        pages: Union[None, DictionaryObject, PageObject] = None,
+        inherit: Optional[Dict[str, Any]] = None,
+        indirect_reference: Optional[IndirectObject] = None,
+    ) -> None:
+        inheritable_page_attributes = (
+            NameObject(PG.RESOURCES),
+            NameObject(PG.MEDIABOX),
+            NameObject(PG.CROPBOX),
+            NameObject(PG.ROTATE),
+        )
+        if inherit is None:
+            inherit = {}
+        if pages is None:
+            # Fix issue 327: set flattened_pages attribute only for
+            # decrypted file
+            catalog = self.trailer[TK.ROOT].get_object()
+            pages = catalog["/Pages"].get_object()  # type: ignore
+            self.flattened_pages = []
+
+        if PA.TYPE in pages:
+            t = pages[PA.TYPE]  # type: ignore
+        # if pdf has no type, considered as a page if /Kids is missing
+        elif PA.KIDS not in pages:
+            t = "/Page"
+        else:
+            t = "/Pages"
+
+        if t == "/Pages":
+            for attr in inheritable_page_attributes:
+                if attr in pages:
+                    inherit[attr] = pages[attr]
+            for page in pages[PA.KIDS]:  # type: ignore
+                addt = {}
+                if isinstance(page, IndirectObject):
+                    addt["indirect_reference"] = page
+                obj = page.get_object()
+                if obj:
+                    # damaged file may have invalid child in /Pages
+                    self._flatten(obj, inherit, **addt)
+        elif t == "/Page":
+            for attr_in, value in list(inherit.items()):
+                # if the page has it's own value, it does not inherit the
+                # parent's value:
+                if attr_in not in pages:
+                    pages[attr_in] = value
+            page_obj = PageObject(self, indirect_reference)
+            page_obj.update(pages)
+
+            # TODO: Could flattened_pages be None at this point?
+            self.flattened_pages.append(page_obj)  # type: ignore
+
+    def _get_object_from_stream(
+        self, indirect_reference: IndirectObject
+    ) -> Union[int, PdfObject, str]:
+        # indirect reference to object in object stream
+        # read the entire object stream into memory
+        stmnum, idx = self.xref_objStm[indirect_reference.idnum]
+        obj_stm: EncodedStreamObject = IndirectObject(stmnum, 0, self).get_object()  # type: ignore
+        # This is an xref to a stream, so its type better be a stream
+        assert cast(str, obj_stm["/Type"]) == "/ObjStm"
+        # /N is the number of indirect objects in the stream
+        assert idx < obj_stm["/N"]
+        stream_data = BytesIO(b_(obj_stm.get_data()))
+        for i in range(obj_stm["/N"]):  # type: ignore
+            read_non_whitespace(stream_data)
+            stream_data.seek(-1, 1)
+            objnum = NumberObject.read_from_stream(stream_data)
+            read_non_whitespace(stream_data)
+            stream_data.seek(-1, 1)
+            offset = NumberObject.read_from_stream(stream_data)
+            read_non_whitespace(stream_data)
+            stream_data.seek(-1, 1)
+            if objnum != indirect_reference.idnum:
+                # We're only interested in one object
+                continue
+            if self.strict and idx != i:
+                raise PdfReadError("Object is in wrong index.")
+            stream_data.seek(int(obj_stm["/First"] + offset), 0)  # type: ignore
+
+            # to cope with some case where the 'pointer' is on a white space
+            read_non_whitespace(stream_data)
+            stream_data.seek(-1, 1)
+
+            try:
+                obj = read_object(stream_data, self)
+            except PdfStreamError as exc:
+                # Stream object cannot be read. Normally, a critical error, but
+                # Adobe Reader doesn't complain, so continue (in strict mode?)
+                logger_warning(
+                    f"Invalid stream (index {i}) within object "
+                    f"{indirect_reference.idnum} {indirect_reference.generation}: "
+                    f"{exc}",
+                    __name__,
+                )
+
+                if self.strict:
+                    raise PdfReadError(f"Can't read object stream: {exc}")
+                # Replace with null. Hopefully it's nothing important.
+                obj = NullObject()
+            return obj
+
+        if self.strict:
+            raise PdfReadError("This is a fatal error in strict mode.")
+        return NullObject()
+
+    def _get_indirect_object(self, num: int, gen: int) -> Optional[PdfObject]:
+        """
+        Used to ease development.
+
+        This is equivalent to generic.IndirectObject(num,gen,self).get_object()
+
+        Args:
+            num: The object number of the indirect object.
+            gen: The generation number of the indirect object.
+
+        Returns:
+            A PdfObject
+        """
+        return IndirectObject(num, gen, self).get_object()
+
+    def get_object(
+        self, indirect_reference: Union[int, IndirectObject]
+    ) -> Optional[PdfObject]:
+        if isinstance(indirect_reference, int):
+            indirect_reference = IndirectObject(indirect_reference, 0, self)
+        retval = self.cache_get_indirect_object(
+            indirect_reference.generation, indirect_reference.idnum
+        )
+        if retval is not None:
+            return retval
+        if (
+            indirect_reference.generation == 0
+            and indirect_reference.idnum in self.xref_objStm
+        ):
+            retval = self._get_object_from_stream(indirect_reference)  # type: ignore
+        elif (
+            indirect_reference.generation in self.xref
+            and indirect_reference.idnum in self.xref[indirect_reference.generation]
+        ):
+            if self.xref_free_entry.get(indirect_reference.generation, {}).get(
+                indirect_reference.idnum, False
+            ):
+                return NullObject()
+            start = self.xref[indirect_reference.generation][indirect_reference.idnum]
+            self.stream.seek(start, 0)
+            try:
+                idnum, generation = self.read_object_header(self.stream)
+            except Exception:
+                if hasattr(self.stream, "getbuffer"):
+                    buf = bytes(self.stream.getbuffer())  # type: ignore
+                else:
+                    p = self.stream.tell()
+                    self.stream.seek(0, 0)
+                    buf = self.stream.read(-1)
+                    self.stream.seek(p, 0)
+                m = re.search(
+                    rf"\s{indirect_reference.idnum}\s+{indirect_reference.generation}\s+obj".encode(),
+                    buf,
+                )
+                if m is not None:
+                    logger_warning(
+                        f"Object ID {indirect_reference.idnum},{indirect_reference.generation} ref repaired",
+                        __name__,
+                    )
+                    self.xref[indirect_reference.generation][
+                        indirect_reference.idnum
+                    ] = (m.start(0) + 1)
+                    self.stream.seek(m.start(0) + 1)
+                    idnum, generation = self.read_object_header(self.stream)
+                else:
+                    idnum = -1  # exception will be raised below
+            if idnum != indirect_reference.idnum and self.xref_index:
+                # Xref table probably had bad indexes due to not being zero-indexed
+                if self.strict:
+                    raise PdfReadError(
+                        f"Expected object ID ({indirect_reference.idnum} {indirect_reference.generation}) "
+                        f"does not match actual ({idnum} {generation}); "
+                        "xref table not zero-indexed."
+                    )
+                # xref table is corrected in non-strict mode
+            elif idnum != indirect_reference.idnum and self.strict:
+                # some other problem
+                raise PdfReadError(
+                    f"Expected object ID ({indirect_reference.idnum} "
+                    f"{indirect_reference.generation}) does not match actual "
+                    f"({idnum} {generation})."
+                )
+            if self.strict:
+                assert generation == indirect_reference.generation
+            retval = read_object(self.stream, self)  # type: ignore
+
+            # override encryption is used for the /Encrypt dictionary
+            if not self._override_encryption and self._encryption is not None:
+                # if we don't have the encryption key:
+                if not self._encryption.is_decrypted():
+                    raise FileNotDecryptedError("File has not been decrypted")
+                # otherwise, decrypt here...
+                retval = cast(PdfObject, retval)
+                retval = self._encryption.decrypt_object(
+                    retval, indirect_reference.idnum, indirect_reference.generation
+                )
+        else:
+            if hasattr(self.stream, "getbuffer"):
+                buf = bytes(self.stream.getbuffer())  # type: ignore
+            else:
+                p = self.stream.tell()
+                self.stream.seek(0, 0)
+                buf = self.stream.read(-1)
+                self.stream.seek(p, 0)
+            m = re.search(
+                rf"\s{indirect_reference.idnum}\s+{indirect_reference.generation}\s+obj".encode(),
+                buf,
+            )
+            if m is not None:
+                logger_warning(
+                    f"Object {indirect_reference.idnum} {indirect_reference.generation} found",
+                    __name__,
+                )
+                if indirect_reference.generation not in self.xref:
+                    self.xref[indirect_reference.generation] = {}
+                self.xref[indirect_reference.generation][indirect_reference.idnum] = (
+                    m.start(0) + 1
+                )
+                self.stream.seek(m.end(0) + 1)
+                skip_over_whitespace(self.stream)
+                self.stream.seek(-1, 1)
+                retval = read_object(self.stream, self)  # type: ignore
+
+                # override encryption is used for the /Encrypt dictionary
+                if not self._override_encryption and self._encryption is not None:
+                    # if we don't have the encryption key:
+                    if not self._encryption.is_decrypted():
+                        raise FileNotDecryptedError("File has not been decrypted")
+                    # otherwise, decrypt here...
+                    retval = cast(PdfObject, retval)
+                    retval = self._encryption.decrypt_object(
+                        retval, indirect_reference.idnum, indirect_reference.generation
+                    )
+            else:
+                logger_warning(
+                    f"Object {indirect_reference.idnum} {indirect_reference.generation} not defined.",
+                    __name__,
+                )
+                if self.strict:
+                    raise PdfReadError("Could not find object.")
+        self.cache_indirect_object(
+            indirect_reference.generation, indirect_reference.idnum, retval
+        )
+        return retval
+
+    def getObject(
+        self, indirectReference: IndirectObject
+    ) -> Optional[PdfObject]:  # deprecated
+        """
+        Use :meth:`get_object` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getObject", "get_object", "3.0.0")
+        return self.get_object(indirectReference)
+
+    def read_object_header(self, stream: StreamType) -> Tuple[int, int]:
+        # Should never be necessary to read out whitespace, since the
+        # cross-reference table should put us in the right spot to read the
+        # object header.  In reality... some files have stupid cross reference
+        # tables that are off by whitespace bytes.
+        extra = False
+        skip_over_comment(stream)
+        extra |= skip_over_whitespace(stream)
+        stream.seek(-1, 1)
+        idnum = read_until_whitespace(stream)
+        extra |= skip_over_whitespace(stream)
+        stream.seek(-1, 1)
+        generation = read_until_whitespace(stream)
+        extra |= skip_over_whitespace(stream)
+        stream.seek(-1, 1)
+
+        # although it's not used, it might still be necessary to read
+        _obj = stream.read(3)
+
+        read_non_whitespace(stream)
+        stream.seek(-1, 1)
+        if extra and self.strict:
+            logger_warning(
+                f"Superfluous whitespace found in object header {idnum} {generation}",  # type: ignore
+                __name__,
+            )
+        return int(idnum), int(generation)
+
+    def readObjectHeader(self, stream: StreamType) -> Tuple[int, int]:  # deprecated
+        """
+        Use :meth:`read_object_header` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("readObjectHeader", "read_object_header", "3.0.0")
+        return self.read_object_header(stream)
+
+    def cache_get_indirect_object(
+        self, generation: int, idnum: int
+    ) -> Optional[PdfObject]:
+        return self.resolved_objects.get((generation, idnum))
+
+    def cacheGetIndirectObject(
+        self, generation: int, idnum: int
+    ) -> Optional[PdfObject]:  # deprecated
+        """
+        Use :meth:`cache_get_indirect_object` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement(
+            "cacheGetIndirectObject", "cache_get_indirect_object", "3.0.0"
+        )
+        return self.cache_get_indirect_object(generation, idnum)
+
+    def cache_indirect_object(
+        self, generation: int, idnum: int, obj: Optional[PdfObject]
+    ) -> Optional[PdfObject]:
+        if (generation, idnum) in self.resolved_objects:
+            msg = f"Overwriting cache for {generation} {idnum}"
+            if self.strict:
+                raise PdfReadError(msg)
+            logger_warning(msg, __name__)
+        self.resolved_objects[(generation, idnum)] = obj
+        if obj is not None:
+            obj.indirect_reference = IndirectObject(idnum, generation, self)
+        return obj
+
+    def cacheIndirectObject(
+        self, generation: int, idnum: int, obj: Optional[PdfObject]
+    ) -> Optional[PdfObject]:  # deprecated
+        """
+        Use :meth:`cache_indirect_object` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("cacheIndirectObject", "cache_indirect_object")
+        return self.cache_indirect_object(generation, idnum, obj)
+
+    def read(self, stream: StreamType) -> None:
+        self._basic_validation(stream)
+        self._find_eof_marker(stream)
+        startxref = self._find_startxref_pos(stream)
+
+        # check and eventually correct the startxref only in not strict
+        xref_issue_nr = self._get_xref_issues(stream, startxref)
+        if xref_issue_nr != 0:
+            if self.strict and xref_issue_nr:
+                raise PdfReadError("Broken xref table")
+            logger_warning(f"incorrect startxref pointer({xref_issue_nr})", __name__)
+
+        # read all cross reference tables and their trailers
+        self._read_xref_tables_and_trailers(stream, startxref, xref_issue_nr)
+
+        # if not zero-indexed, verify that the table is correct; change it if necessary
+        if self.xref_index and not self.strict:
+            loc = stream.tell()
+            for gen, xref_entry in self.xref.items():
+                if gen == 65535:
+                    continue
+                xref_k = sorted(
+                    xref_entry.keys()
+                )  # must ensure ascendant to prevent damage
+                for id in xref_k:
+                    stream.seek(xref_entry[id], 0)
+                    try:
+                        pid, _pgen = self.read_object_header(stream)
+                    except ValueError:
+                        break
+                    if pid == id - self.xref_index:
+                        # fixing index item per item is required for revised PDF.
+                        self.xref[gen][pid] = self.xref[gen][id]
+                        del self.xref[gen][id]
+                    # if not, then either it's just plain wrong, or the
+                    # non-zero-index is actually correct
+            stream.seek(loc, 0)  # return to where it was
+
+    def _basic_validation(self, stream: StreamType) -> None:
+        """Ensure file is not empty. Read at most 5 bytes."""
+        stream.seek(0, os.SEEK_SET)
+        try:
+            header_byte = stream.read(5)
+        except UnicodeDecodeError:
+            raise UnsupportedOperation("cannot read header")
+        if header_byte == b"":
+            raise EmptyFileError("Cannot read an empty file")
+        elif header_byte != b"%PDF-":
+            if self.strict:
+                raise PdfReadError(
+                    f"PDF starts with '{header_byte.decode('utf8')}', "
+                    "but '%PDF-' expected"
+                )
+            else:
+                logger_warning(f"invalid pdf header: {header_byte}", __name__)
+        stream.seek(0, os.SEEK_END)
+
+    def _find_eof_marker(self, stream: StreamType) -> None:
+        """
+        Jump to the %%EOF marker.
+
+        According to the specs, the %%EOF marker should be at the very end of
+        the file. Hence for standard-compliant PDF documents this function will
+        read only the last part (DEFAULT_BUFFER_SIZE).
+        """
+        HEADER_SIZE = 8  # to parse whole file, Header is e.g. '%PDF-1.6'
+        line = b""
+        while line[:5] != b"%%EOF":
+            if stream.tell() < HEADER_SIZE:
+                if self.strict:
+                    raise PdfReadError("EOF marker not found")
+                else:
+                    logger_warning("EOF marker not found", __name__)
+            line = read_previous_line(stream)
+
+    def _find_startxref_pos(self, stream: StreamType) -> int:
+        """
+        Find startxref entry - the location of the xref table.
+
+        Args:
+            stream:
+
+        Returns:
+            The bytes offset
+        """
+        line = read_previous_line(stream)
+        try:
+            startxref = int(line)
+        except ValueError:
+            # 'startxref' may be on the same line as the location
+            if not line.startswith(b"startxref"):
+                raise PdfReadError("startxref not found")
+            startxref = int(line[9:].strip())
+            logger_warning("startxref on same line as offset", __name__)
+        else:
+            line = read_previous_line(stream)
+            if line[:9] != b"startxref":
+                raise PdfReadError("startxref not found")
+        return startxref
+
+    def _read_standard_xref_table(self, stream: StreamType) -> None:
+        # standard cross-reference table
+        ref = stream.read(3)
+        if ref != b"ref":
+            raise PdfReadError("xref table read error")
+        read_non_whitespace(stream)
+        stream.seek(-1, 1)
+        first_time = True  # check if the first time looking at the xref table
+        while True:
+            num = cast(int, read_object(stream, self))
+            if first_time and num != 0:
+                self.xref_index = num
+                if self.strict:
+                    logger_warning(
+                        "Xref table not zero-indexed. ID numbers for objects will be corrected.",
+                        __name__,
+                    )
+                    # if table not zero indexed, could be due to error from when PDF was created
+                    # which will lead to mismatched indices later on, only warned and corrected if self.strict==True
+            first_time = False
+            read_non_whitespace(stream)
+            stream.seek(-1, 1)
+            size = cast(int, read_object(stream, self))
+            read_non_whitespace(stream)
+            stream.seek(-1, 1)
+            cnt = 0
+            while cnt < size:
+                line = stream.read(20)
+
+                # It's very clear in section 3.4.3 of the PDF spec
+                # that all cross-reference table lines are a fixed
+                # 20 bytes (as of PDF 1.7). However, some files have
+                # 21-byte entries (or more) due to the use of \r\n
+                # (CRLF) EOL's. Detect that case, and adjust the line
+                # until it does not begin with a \r (CR) or \n (LF).
+                while line[0] in b"\x0D\x0A":
+                    stream.seek(-20 + 1, 1)
+                    line = stream.read(20)
+
+                # On the other hand, some malformed PDF files
+                # use a single character EOL without a preceding
+                # space.  Detect that case, and seek the stream
+                # back one character.  (0-9 means we've bled into
+                # the next xref entry, t means we've bled into the
+                # text "trailer"):
+                if line[-1] in b"0123456789t":
+                    stream.seek(-1, 1)
+
+                try:
+                    offset_b, generation_b = line[:16].split(b" ")
+                    entry_type_b = line[17:18]
+
+                    offset, generation = int(offset_b), int(generation_b)
+                except Exception:
+                    # if something wrong occurred
+                    if hasattr(stream, "getbuffer"):
+                        buf = bytes(stream.getbuffer())  # type: ignore
+                    else:
+                        p = stream.tell()
+                        stream.seek(0, 0)
+                        buf = stream.read(-1)
+                        stream.seek(p)
+
+                    f = re.search(f"{num}\\s+(\\d+)\\s+obj".encode(), buf)
+                    if f is None:
+                        logger_warning(
+                            f"entry {num} in Xref table invalid; object not found",
+                            __name__,
+                        )
+                        generation = 65535
+                        offset = -1
+                    else:
+                        logger_warning(
+                            f"entry {num} in Xref table invalid but object found",
+                            __name__,
+                        )
+                        generation = int(f.group(1))
+                        offset = f.start()
+
+                if generation not in self.xref:
+                    self.xref[generation] = {}
+                    self.xref_free_entry[generation] = {}
+                if num in self.xref[generation]:
+                    # It really seems like we should allow the last
+                    # xref table in the file to override previous
+                    # ones. Since we read the file backwards, assume
+                    # any existing key is already set correctly.
+                    pass
+                else:
+                    self.xref[generation][num] = offset
+                    try:
+                        self.xref_free_entry[generation][num] = entry_type_b == b"f"
+                    except Exception:
+                        pass
+                    try:
+                        self.xref_free_entry[65535][num] = entry_type_b == b"f"
+                    except Exception:
+                        pass
+                cnt += 1
+                num += 1
+            read_non_whitespace(stream)
+            stream.seek(-1, 1)
+            trailer_tag = stream.read(7)
+            if trailer_tag != b"trailer":
+                # more xrefs!
+                stream.seek(-7, 1)
+            else:
+                break
+
+    def _read_xref_tables_and_trailers(
+        self, stream: StreamType, startxref: Optional[int], xref_issue_nr: int
+    ) -> None:
+        self.xref: Dict[int, Dict[Any, Any]] = {}
+        self.xref_free_entry: Dict[int, Dict[Any, Any]] = {}
+        self.xref_objStm: Dict[int, Tuple[Any, Any]] = {}
+        self.trailer = DictionaryObject()
+        while startxref is not None:
+            # load the xref table
+            stream.seek(startxref, 0)
+            x = stream.read(1)
+            if x in b"\r\n":
+                x = stream.read(1)
+            if x == b"x":
+                startxref = self._read_xref(stream)
+            elif xref_issue_nr:
+                try:
+                    self._rebuild_xref_table(stream)
+                    break
+                except Exception:
+                    xref_issue_nr = 0
+            elif x.isdigit():
+                try:
+                    xrefstream = self._read_pdf15_xref_stream(stream)
+                except Exception as e:
+                    if TK.ROOT in self.trailer:
+                        logger_warning(
+                            f"Previous trailer can not be read {e.args}",
+                            __name__,
+                        )
+                        break
+                    else:
+                        raise PdfReadError(f"trailer can not be read {e.args}")
+                trailer_keys = TK.ROOT, TK.ENCRYPT, TK.INFO, TK.ID, TK.SIZE
+                for key in trailer_keys:
+                    if key in xrefstream and key not in self.trailer:
+                        self.trailer[NameObject(key)] = xrefstream.raw_get(key)
+                if "/XRefStm" in xrefstream:
+                    p = stream.tell()
+                    stream.seek(cast(int, xrefstream["/XRefStm"]) + 1, 0)
+                    self._read_pdf15_xref_stream(stream)
+                    stream.seek(p, 0)
+                if "/Prev" in xrefstream:
+                    startxref = cast(int, xrefstream["/Prev"])
+                else:
+                    break
+            else:
+                startxref = self._read_xref_other_error(stream, startxref)
+
+    def _read_xref(self, stream: StreamType) -> Optional[int]:
+        self._read_standard_xref_table(stream)
+        read_non_whitespace(stream)
+        stream.seek(-1, 1)
+        new_trailer = cast(Dict[str, Any], read_object(stream, self))
+        for key, value in new_trailer.items():
+            if key not in self.trailer:
+                self.trailer[key] = value
+        if "/XRefStm" in new_trailer:
+            p = stream.tell()
+            stream.seek(cast(int, new_trailer["/XRefStm"]) + 1, 0)
+            try:
+                self._read_pdf15_xref_stream(stream)
+            except Exception:
+                logger_warning(
+                    f"XRef object at {new_trailer['/XRefStm']} can not be read, some object may be missing",
+                    __name__,
+                )
+            stream.seek(p, 0)
+        if "/Prev" in new_trailer:
+            startxref = new_trailer["/Prev"]
+            return startxref
+        else:
+            return None
+
+    def _read_xref_other_error(
+        self, stream: StreamType, startxref: int
+    ) -> Optional[int]:
+        # some PDFs have /Prev=0 in the trailer, instead of no /Prev
+        if startxref == 0:
+            if self.strict:
+                raise PdfReadError(
+                    "/Prev=0 in the trailer (try opening with strict=False)"
+                )
+            logger_warning(
+                "/Prev=0 in the trailer - assuming there is no previous xref table",
+                __name__,
+            )
+            return None
+        # bad xref character at startxref.  Let's see if we can find
+        # the xref table nearby, as we've observed this error with an
+        # off-by-one before.
+        stream.seek(-11, 1)
+        tmp = stream.read(20)
+        xref_loc = tmp.find(b"xref")
+        if xref_loc != -1:
+            startxref -= 10 - xref_loc
+            return startxref
+        # No explicit xref table, try finding a cross-reference stream.
+        stream.seek(startxref, 0)
+        for look in range(25):  # value extended to cope with more linearized files
+            if stream.read(1).isdigit():
+                # This is not a standard PDF, consider adding a warning
+                startxref += look
+                return startxref
+        # no xref table found at specified location
+        if "/Root" in self.trailer and not self.strict:
+            # if Root has been already found, just raise warning
+            logger_warning("Invalid parent xref., rebuild xref", __name__)
+            try:
+                self._rebuild_xref_table(stream)
+                return None
+            except Exception:
+                raise PdfReadError("can not rebuild xref")
+        raise PdfReadError("Could not find xref table at specified location")
+
+    def _read_pdf15_xref_stream(
+        self, stream: StreamType
+    ) -> Union[ContentStream, EncodedStreamObject, DecodedStreamObject]:
+        # PDF 1.5+ Cross-Reference Stream
+        stream.seek(-1, 1)
+        idnum, generation = self.read_object_header(stream)
+        xrefstream = cast(ContentStream, read_object(stream, self))
+        assert cast(str, xrefstream["/Type"]) == "/XRef"
+        self.cache_indirect_object(generation, idnum, xrefstream)
+        stream_data = BytesIO(b_(xrefstream.get_data()))
+        # Index pairs specify the subsections in the dictionary. If
+        # none create one subsection that spans everything.
+        idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
+        entry_sizes = cast(Dict[Any, Any], xrefstream.get("/W"))
+        assert len(entry_sizes) >= 3
+        if self.strict and len(entry_sizes) > 3:
+            raise PdfReadError(f"Too many entry sizes: {entry_sizes}")
+
+        def get_entry(i: int) -> Union[int, Tuple[int, ...]]:
+            # Reads the correct number of bytes for each entry. See the
+            # discussion of the W parameter in PDF spec table 17.
+            if entry_sizes[i] > 0:
+                d = stream_data.read(entry_sizes[i])
+                return convert_to_int(d, entry_sizes[i])
+
+            # PDF Spec Table 17: A value of zero for an element in the
+            # W array indicates...the default value shall be used
+            if i == 0:
+                return 1  # First value defaults to 1
+            else:
+                return 0
+
+        def used_before(num: int, generation: Union[int, Tuple[int, ...]]) -> bool:
+            # We move backwards through the xrefs, don't replace any.
+            return num in self.xref.get(generation, []) or num in self.xref_objStm  # type: ignore
+
+        # Iterate through each subsection
+        self._read_xref_subsections(idx_pairs, get_entry, used_before)
+        return xrefstream
+
+    @staticmethod
+    def _get_xref_issues(stream: StreamType, startxref: int) -> int:
+        """
+        Return an int which indicates an issue. 0 means there is no issue.
+
+        Args:
+            stream:
+            startxref:
+
+        Returns:
+            0 means no issue, other values represent specific issues.
+        """
+        stream.seek(startxref - 1, 0)  # -1 to check character before
+        line = stream.read(1)
+        if line == b"j":
+            line = stream.read(1)
+        if line not in b"\r\n \t":
+            return 1
+        line = stream.read(4)
+        if line != b"xref":
+            # not an xref so check if it is an XREF object
+            line = b""
+            while line in b"0123456789 \t":
+                line = stream.read(1)
+                if line == b"":
+                    return 2
+            line += stream.read(2)  # 1 char already read, +2 to check "obj"
+            if line.lower() != b"obj":
+                return 3
+        return 0
+
+    def _rebuild_xref_table(self, stream: StreamType) -> None:
+        self.xref = {}
+        stream.seek(0, 0)
+        f_ = stream.read(-1)
+
+        for m in re.finditer(rb"[\r\n \t][ \t]*(\d+)[ \t]+(\d+)[ \t]+obj", f_):
+            idnum = int(m.group(1))
+            generation = int(m.group(2))
+            if generation not in self.xref:
+                self.xref[generation] = {}
+            self.xref[generation][idnum] = m.start(1)
+        stream.seek(0, 0)
+        for m in re.finditer(rb"[\r\n \t][ \t]*trailer[\r\n \t]*(<<)", f_):
+            stream.seek(m.start(1), 0)
+            new_trailer = cast(Dict[Any, Any], read_object(stream, self))
+            # Here, we are parsing the file from start to end, the new data have to erase the existing.
+            for key, value in list(new_trailer.items()):
+                self.trailer[key] = value
+
+    def _read_xref_subsections(
+        self,
+        idx_pairs: List[int],
+        get_entry: Callable[[int], Union[int, Tuple[int, ...]]],
+        used_before: Callable[[int, Union[int, Tuple[int, ...]]], bool],
+    ) -> None:
+        for start, size in self._pairs(idx_pairs):
+            # The subsections must increase
+            for num in range(start, start + size):
+                # The first entry is the type
+                xref_type = get_entry(0)
+                # The rest of the elements depend on the xref_type
+                if xref_type == 0:
+                    # linked list of free objects
+                    next_free_object = get_entry(1)  # noqa: F841
+                    next_generation = get_entry(2)  # noqa: F841
+                elif xref_type == 1:
+                    # objects that are in use but are not compressed
+                    byte_offset = get_entry(1)
+                    generation = get_entry(2)
+                    if generation not in self.xref:
+                        self.xref[generation] = {}  # type: ignore
+                    if not used_before(num, generation):
+                        self.xref[generation][num] = byte_offset  # type: ignore
+                elif xref_type == 2:
+                    # compressed objects
+                    objstr_num = get_entry(1)
+                    obstr_idx = get_entry(2)
+                    generation = 0  # PDF spec table 18, generation is 0
+                    if not used_before(num, generation):
+                        self.xref_objStm[num] = (objstr_num, obstr_idx)
+                elif self.strict:
+                    raise PdfReadError(f"Unknown xref type: {xref_type}")
+
+    def _pairs(self, array: List[int]) -> Iterable[Tuple[int, int]]:
+        i = 0
+        while True:
+            yield array[i], array[i + 1]
+            i += 2
+            if (i + 1) >= len(array):
+                break
+
+    def read_next_end_line(
+        self, stream: StreamType, limit_offset: int = 0
+    ) -> bytes:  # deprecated
+        """.. deprecated:: 2.1.0"""
+        deprecate_no_replacement("read_next_end_line", removed_in="4.0.0")
+        line_parts = []
+        while True:
+            # Prevent infinite loops in malformed PDFs
+            if stream.tell() == 0 or stream.tell() == limit_offset:
+                raise PdfReadError("Could not read malformed PDF file")
+            x = stream.read(1)
+            if stream.tell() < 2:
+                raise PdfReadError("EOL marker not found")
+            stream.seek(-2, 1)
+            if x in (b"\n", b"\r"):  # \n = LF; \r = CR
+                crlf = False
+                while x in (b"\n", b"\r"):
+                    x = stream.read(1)
+                    if x in (b"\n", b"\r"):  # account for CR+LF
+                        stream.seek(-1, 1)
+                        crlf = True
+                    if stream.tell() < 2:
+                        raise PdfReadError("EOL marker not found")
+                    stream.seek(-2, 1)
+                stream.seek(
+                    2 if crlf else 1, 1
+                )  # if using CR+LF, go back 2 bytes, else 1
+                break
+            else:
+                line_parts.append(x)
+        line_parts.reverse()
+        return b"".join(line_parts)
+
+    def readNextEndLine(
+        self, stream: StreamType, limit_offset: int = 0
+    ) -> bytes:  # deprecated
+        """.. deprecated:: 1.28.0"""
+        deprecation_no_replacement("readNextEndLine", "3.0.0")
+        return self.read_next_end_line(stream, limit_offset)
+
+    def decrypt(self, password: Union[str, bytes]) -> PasswordType:
+        """
+        When using an encrypted / secured PDF file with the PDF Standard
+        encryption handler, this function will allow the file to be decrypted.
+        It checks the given password against the document's user password and
+        owner password, and then stores the resulting decryption key if either
+        password is correct.
+
+        It does not matter which password was matched.  Both passwords provide
+        the correct decryption key that will allow the document to be used with
+        this library.
+
+        Args:
+            password: The password to match.
+
+        Returns:
+            An indicator if the document was decrypted and weather it was the
+            owner password or the user password.
+        """
+        if not self._encryption:
+            raise PdfReadError("Not encrypted file")
+        # TODO: raise Exception for wrong password
+        return self._encryption.verify(password)
+
+    def decode_permissions(self, permissions_code: int) -> Dict[str, bool]:
+        # Takes the permissions as an integer, returns the allowed access
+        permissions = {}
+        permissions["print"] = permissions_code & (1 << 3 - 1) != 0  # bit 3
+        permissions["modify"] = permissions_code & (1 << 4 - 1) != 0  # bit 4
+        permissions["copy"] = permissions_code & (1 << 5 - 1) != 0  # bit 5
+        permissions["annotations"] = permissions_code & (1 << 6 - 1) != 0  # bit 6
+        permissions["forms"] = permissions_code & (1 << 9 - 1) != 0  # bit 9
+        permissions["accessability"] = permissions_code & (1 << 10 - 1) != 0  # bit 10
+        permissions["assemble"] = permissions_code & (1 << 11 - 1) != 0  # bit 11
+        permissions["print_high_quality"] = (
+            permissions_code & (1 << 12 - 1) != 0
+        )  # bit 12
+        return permissions
+
+    @property
+    def is_encrypted(self) -> bool:
+        """
+        Read-only boolean property showing whether this PDF file is encrypted.
+
+        Note that this property, if true, will remain true even after the
+        :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called.
+        """
+        return TK.ENCRYPT in self.trailer
+
+    def getIsEncrypted(self) -> bool:  # deprecated
+        """
+        Use :py:attr:`is_encrypted` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getIsEncrypted", "is_encrypted", "3.0.0")
+        return self.is_encrypted
+
+    @property
+    def isEncrypted(self) -> bool:  # deprecated
+        """
+        Use :py:attr:`is_encrypted` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("isEncrypted", "is_encrypted", "3.0.0")
+        return self.is_encrypted
+
+    @property
+    def xfa(self) -> Optional[Dict[str, Any]]:
+        tree: Optional[TreeObject] = None
+        retval: Dict[str, Any] = {}
+        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+
+        if "/AcroForm" not in catalog or not catalog["/AcroForm"]:
+            return None
+
+        tree = cast(TreeObject, catalog["/AcroForm"])
+
+        if "/XFA" in tree:
+            fields = cast(ArrayObject, tree["/XFA"])
+            i = iter(fields)
+            for f in i:
+                tag = f
+                f = next(i)
+                if isinstance(f, IndirectObject):
+                    field = cast(Optional[EncodedStreamObject], f.get_object())
+                    if field:
+                        es = zlib.decompress(b_(field._data))
+                        retval[tag] = es
+        return retval
+
+    def add_form_topname(self, name: str) -> Optional[DictionaryObject]:
+        """
+        Add a top level form that groups all form fields below it.
+
+        Args:
+            name: text string of the "/T" Attribute of the created object
+
+        Returns:
+            The created object. ``None`` means no object was created.
+        """
+        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+
+        if "/AcroForm" not in catalog or not isinstance(
+            catalog["/AcroForm"], DictionaryObject
+        ):
+            return None
+        acroform = cast(DictionaryObject, catalog[NameObject("/AcroForm")])
+        if "/Fields" not in acroform:
+            # TODO: :No error returns but may be extended for XFA Forms
+            return None
+
+        interim = DictionaryObject()
+        interim[NameObject("/T")] = TextStringObject(name)
+        interim[NameObject("/Kids")] = acroform[NameObject("/Fields")]
+        self.cache_indirect_object(
+            0,
+            max([i for (g, i) in self.resolved_objects if g == 0]) + 1,
+            interim,
+        )
+        arr = ArrayObject()
+        arr.append(interim.indirect_reference)
+        acroform[NameObject("/Fields")] = arr
+        for o in cast(ArrayObject, interim["/Kids"]):
+            obj = o.get_object()
+            if "/Parent" in obj:
+                logger_warning(
+                    f"Top Level Form Field {obj.indirect_reference} have a non-expected parent",
+                    __name__,
+                )
+            obj[NameObject("/Parent")] = interim.indirect_reference
+        return interim
+
+    def rename_form_topname(self, name: str) -> Optional[DictionaryObject]:
+        """
+        Rename top level form field that all form fields below it.
+
+        Args:
+            name: text string of the "/T" field of the created object
+
+        Returns:
+            The modified object. ``None`` means no object was modified.
+        """
+        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+
+        if "/AcroForm" not in catalog or not isinstance(
+            catalog["/AcroForm"], DictionaryObject
+        ):
+            return None
+        acroform = cast(DictionaryObject, catalog[NameObject("/AcroForm")])
+        if "/Fields" not in acroform:
+            return None
+
+        interim = cast(
+            DictionaryObject,
+            cast(ArrayObject, acroform[NameObject("/Fields")])[0].get_object(),
+        )
+        interim[NameObject("/T")] = TextStringObject(name)
+        return interim
+
+    def _get_embedded_files_root(self) -> Optional[NameTree]:
+        """
+        Returns the EmbeddedFiles root as a NameTree Object
+        if the root does not exists, return None
+        """
+        catalog = cast(DictionaryObject, self.trailer["/Root"])
+        if "/Names" not in catalog:
+            return None
+        ef = cast(DictionaryObject, catalog["/Names"]).get("/EmbeddedFiles", None)
+        if ef is None:
+            return None
+        efo = ef.get_object()
+        # not for reader
+        """
+            if not isinstance(efo,NameTree):
+            if isinstance(ef,IndirectObject):
+                ef.replace_object(efo)
+            else:
+                cast(DictionaryObject,catalog["/Names"])[
+                    NameObject("/EmbeddedFiles")] = NameTree(efo)
+        """
+        return NameTree(efo)
+
+    @property
+    def embedded_files(self) -> Optional[Mapping[str, List[PdfObject]]]:
+        ef = self._get_embedded_files_root()
+        if ef:
+            return ef.list_items()
+        else:
+            return None
+
+    @property
+    def attachments(self) -> Mapping[str, Union[List[bytes], List[Dict[str, bytes]]]]:
+        ef = self._get_embedded_files_root()
+        if ef:
+            d: Dict[str, Union[List[bytes], List[Dict[str, bytes]]]] = {}
+            for k, v in ef.list_items().items():
+                if isinstance(v, list):
+                    if k not in d:
+                        d[k] = []  # type: ignore
+                    for e in v:
+                        e = cast(DictionaryObject, e.get_object())
+                        if "/EF" in e:
+                            d[k].append(e["/EF"]["/F"].get_data())  # type: ignore
+                        elif "/RF" in e:
+                            r = cast(
+                                ArrayObject, cast(DictionaryObject, e["/RF"])["/F"]
+                            )
+                            di: Dict[str, bytes] = {}
+                            i = 0
+                            while i < len(r):
+                                di[cast(str, r[i])] = r[i + 1].get_object().get_data()
+                                i += 2
+                            d[k].append(di)
+            return d
+        else:
+            return {}
+
+    def _list_attachments(self) -> List[str]:
+        """
+        Retrieves the list of filenames of file attachments.
+
+        Returns:
+            list of filenames
+        """
+        ef = self._get_embedded_files_root()
+        if ef:
+            lst = ef.list_keys()
+        else:
+            lst = []
+        """
+        for ip, p in enumerate(self.pages):
+            for a in [_a.get_object()
+                      for _a in p.get("/Annots",[])]:
+                if _a.get_object().get("/Subtype","") != "/FileAttachements":
+                    continue
+                lst.append(f"$page_{ip}.{get_name_from_file_specification(_a)}")
+        """
+        return lst
+
+    def _get_attachment_list(self, name: str) -> List[bytes]:
+        out = self._get_attachments(name)[name]
+        if isinstance(out, list):
+            return out
+        return [out]
+
+    def _get_attachments(
+        self, filename: Optional[str] = None
+    ) -> Dict[str, Union[bytes, List[bytes], Dict[str, bytes]]]:
+        """
+        Retrieves all or selected file attachments of the PDF as a dictionary of file names
+        and the file data as a bytestring.
+
+        Args:
+            filename: If filename is None, then a dictionary of all attachments
+                will be returned, where the key is the filename and the value
+                is the content. Otherwise, a dictionary with just a single key
+                - the filename - and its content will be returned.
+
+        Returns:
+            dictionary of filename -> Union[bytestring or List[ByteString]]
+            if the filename exists multiple times a List of the different version will be provided
+        """
+        ef = self._get_embedded_files_root()
+        if ef is None:
+            return {}
+        if filename is None:
+            return {k: v if len(v) > 1 else v[0] for k, v in self.attachments.items()}  # type: ignore
+        else:
+            lst = ef.list_get(filename)
+            return {
+                filename: [x["/EF"]["/F"].get_data() for x in lst]  # type: ignore
+                if isinstance(lst, list)
+                else lst["/EF"]["/F"].get_data()  # type: ignore
+            }
+
+
+class PdfFileReader(PdfReader):  # deprecated
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        deprecation_with_replacement("PdfFileReader", "PdfReader", "3.0.0")
+        if "strict" not in kwargs and len(args) < 2:
+            kwargs["strict"] = True  # maintain the default
+        super().__init__(*args, **kwargs)
diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index befe617d0..4b29d3a9e 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -713,16 +713,28 @@ def _get_embedded_files_root(self) -> Optional[NameTree]:
         if ef is None:
             return None
         efo = ef.get_object()
-        # not for reader
-        """
-            if not isinstance(efo,NameTree):
-            if isinstance(ef,IndirectObject):
+        if not isinstance(efo, NameTree):
+            efo = NameTree(efo)
+            if isinstance(ef, IndirectObject):
                 ef.replace_object(efo)
             else:
-                cast(DictionaryObject,catalog["/Names"])[
-                    NameObject("/EmbeddedFiles")] = NameTree(efo)
-        """
-        return NameTree(efo)
+                cast(DictionaryObject, catalog["/Names"])[
+                    NameObject("/EmbeddedFiles")
+                ] = efo
+        return efo
+
+    def _create_attachment_root(self) -> NameTree:
+        if "/Names" not in self._root_object:
+            self._root_object[NameObject("/Names")] = self._add_object(
+                DictionaryObject()
+            )
+        node = cast(DictionaryObject, self._root_object["/Names"])
+        if "/EmbeddedFiles" not in node:
+            node[NameObject("/EmbeddedFiles")] = self._add_object(NameTree())
+        node = cast(NameTree, node["/EmbeddedFiles"])
+        if "/Kids" not in node and "/Names" not in node:
+            node[NameObject("/Names")] = ArrayObject()
+        return node
 
     @property
     def embedded_files(self) -> Optional[Mapping[str, List[PdfObject]]]:
@@ -733,18 +745,37 @@ def embedded_files(self) -> Optional[Mapping[str, List[PdfObject]]]:
             return None
 
     @property
-    def attachments(self) -> Mapping[str, List[bytes]]:
+    def attachments(self) -> Mapping[str, Union[List[bytes], List[Dict[str, bytes]]]]:
         ef = self._get_embedded_files_root()
         if ef:
             d = {}
             for k, v in ef.list_items().items():
                 if isinstance(v, list):
-                    d[k] = [e["/EF"]["/F"].get_data() for e in v]  # type: ignore
+                    if k not in d:
+                        d[k] = []
+                    for e in v:
+                        e = e.get_object()
+                        if "/EF" in e:
+                            d[k].append(e["/EF"]["/F"].get_data())  # type: ignore
+                        elif "/RF" in e:
+                            r = cast(ArrayObject, e["/RF"]["/F"])
+                            di = {}
+                            i = 0
+                            while i < len(r):
+                                di[r[i]] = r[i + 1].get_object().get_data()
+                                i += 2
+                            d[k].append(di)
             return d
         else:
             return {}
 
-    def add_attachment(self, filename: str, data: Union[str, bytes]) -> None:
+    def add_attachment(
+        self,
+        filename: str,
+        data: Union[str, bytes, List[Tuple[str, bytes]]],
+        fname: Optional[str] = None,
+        desc: str = "",
+    ) -> DictionaryObject:
         """
         Embed a file inside the PDF.
 
@@ -753,9 +784,20 @@ def add_attachment(self, filename: str, data: Union[str, bytes]) -> None:
         Section 7.11.3
 
         Args:
-            filename: The filename to display.
+            filename: The filename to display (in UTF-16).
             data: The data in the file.
+                if data is an array, it will feed
+            fname: an old style name for "/F" entry (should be ansi). if None will be automatically proposed
+            desc: a description string
+
+        Returns:
+            The filespec DictionaryObject
         """
+        if fname is None:
+            st = filename.replace("/", "\\/").replace("\\\\/", "\\/")
+            fname = st.encode().decode("ansi", errors="xmlcharreplace")
+            fname = f"{fname}"  # to escape string
+
         # We need three entries:
         # * The file's data
         # * The /Filespec entry
@@ -773,9 +815,22 @@ def add_attachment(self, filename: str, data: Union[str, bytes]) -> None:
         # endstream
         # endobj
 
-        file_entry = DecodedStreamObject()
-        file_entry.set_data(b_(data))
-        file_entry.update({NameObject(PA.TYPE): NameObject("/EmbeddedFile")})
+        if isinstance(data, list):
+            ef_entry = DictionaryObject()
+            a = ArrayObject()
+            ef_entry.update({NameObject("/F"): self._add_object(a)})
+            for fn, da in data:
+                a.append(TextStringObject(fn))
+                file_entry = DecodedStreamObject()
+                file_entry.set_data(b_(da))
+                file_entry.update({NameObject(PA.TYPE): NameObject("/EmbeddedFile")})
+                a.append(self._add_object(file_entry))
+        else:
+            file_entry = DecodedStreamObject()
+            file_entry.set_data(b_(data))
+            file_entry.update({NameObject(PA.TYPE): NameObject("/EmbeddedFile")})
+            ef_entry = DictionaryObject()
+            ef_entry.update({NameObject("/F"): self._add_object(file_entry)})
 
         # The Filespec entry
         # Sample:
@@ -786,51 +841,29 @@ def add_attachment(self, filename: str, data: Union[str, bytes]) -> None:
         #  /EF << /F 8 0 R >>
         # >>
 
-        ef_entry = DictionaryObject()
-        ef_entry.update({NameObject("/F"): self._add_object(file_entry)})
-
         filespec = DictionaryObject()
         filespec.update(
             {
                 NameObject(PA.TYPE): NameObject("/Filespec"),
-                NameObject(FileSpecificationDictionaryEntries.F): create_string_object(
+                NameObject(FileSpecificationDictionaryEntries.UF): TextStringObject(
                     filename
-                ),  # Perhaps also try TextStringObject
-                NameObject(FileSpecificationDictionaryEntries.EF): ef_entry,
+                ),
+                NameObject(FileSpecificationDictionaryEntries.F): TextStringObject(
+                    fname
+                ),
+                NameObject(FileSpecificationDictionaryEntries.DESC): TextStringObject(
+                    desc
+                ),
             }
         )
-
-        # Then create the entry for the root, as it needs
-        # a reference to the Filespec
-        # Sample:
-        # 1 0 obj
-        # <<
-        #  /Type /Catalog
-        #  /Outlines 2 0 R
-        #  /Pages 3 0 R
-        #  /Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >>
-        # >>
-        # endobj
-
-        if CA.NAMES not in self._root_object:
-            self._root_object[NameObject(CA.NAMES)] = self._add_object(
-                DictionaryObject()
-            )
-        if "/EmbeddedFiles" not in cast(DictionaryObject, self._root_object[CA.NAMES]):
-            embedded_files_names_dictionary = DictionaryObject(
-                {NameObject(CA.NAMES): ArrayObject()}
-            )
-            cast(DictionaryObject, self._root_object[CA.NAMES])[
-                NameObject("/EmbeddedFiles")
-            ] = self._add_object(embedded_files_names_dictionary)
+        if isinstance(data, list):
+            filespec[NameObject(FileSpecificationDictionaryEntries.RF)] = ef_entry
         else:
-            embedded_files_names_dictionary = cast(
-                DictionaryObject,
-                cast(DictionaryObject, self._root_object[CA.NAMES])["/EmbeddedFiles"],
-            )
-        cast(ArrayObject, embedded_files_names_dictionary[CA.NAMES]).extend(
-            [create_string_object(filename), filespec]
-        )
+            filespec[NameObject(FileSpecificationDictionaryEntries.EF)] = ef_entry
+
+        nm = self._get_embedded_files_root() or self._create_attachment_root()
+        nm.list_add(filename, self._add_object(filespec))
+        return filespec
 
     def addAttachment(self, fname: str, fdata: Union[str, bytes]) -> None:  # deprecated
         """
diff --git a/pypdf/constants.py b/pypdf/constants.py
index bde9ff22d..7f282d48e 100644
--- a/pypdf/constants.py
+++ b/pypdf/constants.py
@@ -149,8 +149,11 @@ class FileSpecificationDictionaryEntries:
 
     Type = "/Type"
     FS = "/FS"  # The name of the file system to be used to interpret this file specification
-    F = "/F"  # A file specification string of the form described in Section 3.10.1
+    F = "/F"  # A file specification string of the file as described in Section 3.10.1
+    UF = "/UF"  # A unicode string of the file as described in Section 3.10.1
     EF = "/EF"  # dictionary, containing a subset of the keys F , UF , DOS , Mac , and Unix
+    RF = "/RF"  # dictionary, containing arrays of /EmbeddedFile
+    DESC = "/Desc"  # description of the file as de
 
 
 class StreamAttributes:
diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index dd14945fa..b86a494e3 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1566,7 +1566,10 @@ def _get(key: str, o: Optional[PdfObject]) -> List[PdfObject]:
         return _get(key, self)
 
     def list_add(
-        self, key: str, data: PdfObject, overwrite: bool = False
+        self,
+        key: Union[str, TextStringObject],
+        data: PdfObject,
+        overwrite: bool = False,
     ) -> Optional[IndirectObject]:
         """
         Add the data entry from the Name Tree
@@ -1588,22 +1591,28 @@ def list_add(
                 raise TypeError
         except (TypeError, AttributeError):
             raise TypeError("Object does not belong to a PdfWriter")
+        if not isinstance(key, TextStringObject):
+            key = TextStringObject(key)
 
         def _update_limits(
-            obj: DictionaryObject, lo: Optional[str], hi: Optional[str]
+            obj: DictionaryObject,
+            lo: Optional[TextStringObject],
+            hi: Optional[TextStringObject],
         ) -> bool:
             if "/Limits" not in obj:
                 return False
             a = cast("ArrayObject", obj["/Limits"])
             if lo is not None and lo < a[0]:
-                a[0] = TextStringObject(lo)
+                a[0] = lo
                 return True
             if hi is not None and hi > a[0]:
-                a[1] = TextStringObject(lo)
+                a[1] = hi
                 return True
             return False
 
-        def _add_in(o: Optional[PdfObject], app: bool = True) -> Optional[PdfObject]:
+        def _add_in(
+            o: Optional[PdfObject], appb: bool = True, app: bool = True
+        ) -> Optional[PdfObject]:
             nonlocal overwrite, writer, key, data
             if o is None:
                 return None
@@ -1611,9 +1620,9 @@ def _add_in(o: Optional[PdfObject], app: bool = True) -> Optional[PdfObject]:
             if "/Names" in o:
                 _l = cast(ArrayObject, o["/Names"])
                 li = o.get("/Limits", [_l[0], _l[-2]])
-                if key < li[0]:
+                if not appb and key < li[0]:
                     return None
-                if not app and _l > li[1]:
+                if not app and key > li[1]:
                     return None
                 i = 0
                 while i < len(_l):
@@ -1632,7 +1641,7 @@ def _add_in(o: Optional[PdfObject], app: bool = True) -> Optional[PdfObject]:
                         _l.insert(i + 1, writer._add_object(data))
                         _update_limits(o, key, None)
                         return _l[i + 1]
-                    i += 1
+                    i += 2
                 if app:
                     _l.append(key)
                     _l.append(writer._add_object(data))
@@ -1642,13 +1651,13 @@ def _add_in(o: Optional[PdfObject], app: bool = True) -> Optional[PdfObject]:
             else:  # kids
                 ar = cast(ArrayObject, o["/Kids"])
                 for x in ar:
-                    r = _add_in(x, x == ar[-1])
+                    r = _add_in(x, x == ar[0], x == ar[-1])
                     if r:
                         _update_limits(o, key, key)
                         return r
                 return None
 
-        o = _add_in(self, True)
+        o = _add_in(self, True, True)
         return o.indirect_reference if o is not None else None
 
 

From 8b99ea02c6bd7a20a6574df98879a2e5bbeb2bb6 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Thu, 5 Oct 2023 23:36:47 +0200
Subject: [PATCH 04/13] new commit

---
 pypdf/_reader.py                  | 4663 +++++++++++++++--------------
 pypdf/_writer.py                  |   40 +-
 pypdf/generic/_data_structures.py |   28 +-
 tests/test_writer.py              | 3735 +++++++++++------------
 4 files changed, 4247 insertions(+), 4219 deletions(-)

diff --git a/pypdf/_reader.py b/pypdf/_reader.py
index 8bd9e2454..f5d0c5ada 100644
--- a/pypdf/_reader.py
+++ b/pypdf/_reader.py
@@ -1,2331 +1,2332 @@
-# Copyright (c) 2006, Mathieu Fenniak
-# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
-#
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-# * The name of the author may not be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-import os
-import re
-import struct
-import zlib
-from datetime import datetime
-from io import BytesIO, UnsupportedOperation
-from pathlib import Path
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    Iterable,
-    List,
-    Mapping,
-    Optional,
-    Tuple,
-    Union,
-    cast,
-)
-
-from ._encryption import Encryption, PasswordType
-from ._page import PageObject, _VirtualList
-from ._page_labels import index2label as page_index2page_label
-from ._utils import (
-    StrByteType,
-    StreamType,
-    b_,
-    deprecate_no_replacement,
-    deprecation_no_replacement,
-    deprecation_with_replacement,
-    logger_warning,
-    parse_iso8824_date,
-    read_non_whitespace,
-    read_previous_line,
-    read_until_whitespace,
-    skip_over_comment,
-    skip_over_whitespace,
-)
-from .constants import CatalogAttributes as CA
-from .constants import CatalogDictionary as CD
-from .constants import (
-    CheckboxRadioButtonAttributes,
-    GoToActionArguments,
-)
-from .constants import Core as CO
-from .constants import DocumentInformationAttributes as DI
-from .constants import FieldDictionaryAttributes as FA
-from .constants import PageAttributes as PG
-from .constants import PagesAttributes as PA
-from .constants import TrailerKeys as TK
-from .errors import (
-    EmptyFileError,
-    FileNotDecryptedError,
-    PdfReadError,
-    PdfStreamError,
-    WrongPasswordError,
-)
-from .generic import (
-    ArrayObject,
-    BooleanObject,
-    ContentStream,
-    DecodedStreamObject,
-    Destination,
-    DictionaryObject,
-    EncodedStreamObject,
-    Field,
-    Fit,
-    FloatObject,
-    IndirectObject,
-    NameObject,
-    NameTree,
-    NullObject,
-    NumberObject,
-    PdfObject,
-    TextStringObject,
-    TreeObject,
-    ViewerPreferences,
-    read_object,
-)
-from .types import OutlineType, PagemodeType
-from .xmp import XmpInformation
-
-
-def convert_to_int(d: bytes, size: int) -> Union[int, Tuple[Any, ...]]:
-    if size > 8:
-        raise PdfReadError("invalid size in convert_to_int")
-    d = b"\x00\x00\x00\x00\x00\x00\x00\x00" + d
-    d = d[-8:]
-    return struct.unpack(">q", d)[0]
-
-
-def convertToInt(d: bytes, size: int) -> Union[int, Tuple[Any, ...]]:  # deprecated
-    deprecation_with_replacement("convertToInt", "convert_to_int")
-    return convert_to_int(d, size)
-
-
-class DocumentInformation(DictionaryObject):
-    """
-    A class representing the basic document metadata provided in a PDF File.
-    This class is accessible through
-    :py:class:`PdfReader.metadata<pypdf.PdfReader.metadata>`.
-
-    All text properties of the document metadata have
-    *two* properties, eg. author and author_raw. The non-raw property will
-    always return a ``TextStringObject``, making it ideal for a case where
-    the metadata is being displayed. The raw property can sometimes return
-    a ``ByteStringObject``, if pypdf was unable to decode the string's
-    text encoding; this requires additional safety in the caller and
-    therefore is not as commonly accessed.
-    """
-
-    def __init__(self) -> None:
-        DictionaryObject.__init__(self)
-
-    def _get_text(self, key: str) -> Optional[str]:
-        retval = self.get(key, None)
-        if isinstance(retval, TextStringObject):
-            return retval
-        return None
-
-    def getText(self, key: str) -> Optional[str]:  # deprecated
-        """
-        Use the attributes (e.g. :py:attr:`title` / :py:attr:`author`).
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_no_replacement("getText", "3.0.0")
-        return self._get_text(key)
-
-    @property
-    def title(self) -> Optional[str]:
-        """
-        Read-only property accessing the document's title.
-
-        Returns a ``TextStringObject`` or ``None`` if the title is not
-        specified.
-        """
-        return (
-            self._get_text(DI.TITLE) or self.get(DI.TITLE).get_object()  # type: ignore
-            if self.get(DI.TITLE)
-            else None
-        )
-
-    @property
-    def title_raw(self) -> Optional[str]:
-        """The "raw" version of title; can return a ``ByteStringObject``."""
-        return self.get(DI.TITLE)
-
-    @property
-    def author(self) -> Optional[str]:
-        """
-        Read-only property accessing the document's author.
-
-        Returns a ``TextStringObject`` or ``None`` if the author is not
-        specified.
-        """
-        return self._get_text(DI.AUTHOR)
-
-    @property
-    def author_raw(self) -> Optional[str]:
-        """The "raw" version of author; can return a ``ByteStringObject``."""
-        return self.get(DI.AUTHOR)
-
-    @property
-    def subject(self) -> Optional[str]:
-        """
-        Read-only property accessing the document's subject.
-
-        Returns a ``TextStringObject`` or ``None`` if the subject is not
-        specified.
-        """
-        return self._get_text(DI.SUBJECT)
-
-    @property
-    def subject_raw(self) -> Optional[str]:
-        """The "raw" version of subject; can return a ``ByteStringObject``."""
-        return self.get(DI.SUBJECT)
-
-    @property
-    def creator(self) -> Optional[str]:
-        """
-        Read-only property accessing the document's creator.
-
-        If the document was converted to PDF from another format, this is the
-        name of the application (e.g. OpenOffice) that created the original
-        document from which it was converted. Returns a ``TextStringObject`` or
-        ``None`` if the creator is not specified.
-        """
-        return self._get_text(DI.CREATOR)
-
-    @property
-    def creator_raw(self) -> Optional[str]:
-        """The "raw" version of creator; can return a ``ByteStringObject``."""
-        return self.get(DI.CREATOR)
-
-    @property
-    def producer(self) -> Optional[str]:
-        """
-        Read-only property accessing the document's producer.
-
-        If the document was converted to PDF from another format, this is the
-        name of the application (for example, OSX Quartz) that converted it to
-        PDF. Returns a ``TextStringObject`` or ``None`` if the producer is not
-        specified.
-        """
-        return self._get_text(DI.PRODUCER)
-
-    @property
-    def producer_raw(self) -> Optional[str]:
-        """The "raw" version of producer; can return a ``ByteStringObject``."""
-        return self.get(DI.PRODUCER)
-
-    @property
-    def creation_date(self) -> Optional[datetime]:
-        """Read-only property accessing the document's creation date."""
-        return parse_iso8824_date(self._get_text(DI.CREATION_DATE))
-
-    @property
-    def creation_date_raw(self) -> Optional[str]:
-        """
-        The "raw" version of creation date; can return a ``ByteStringObject``.
-
-        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
-        is the offset from UTC.
-        """
-        return self.get(DI.CREATION_DATE)
-
-    @property
-    def modification_date(self) -> Optional[datetime]:
-        """
-        Read-only property accessing the document's modification date.
-
-        The date and time the document was most recently modified.
-        """
-        return parse_iso8824_date(self._get_text(DI.MOD_DATE))
-
-    @property
-    def modification_date_raw(self) -> Optional[str]:
-        """
-        The "raw" version of modification date; can return a
-        ``ByteStringObject``.
-
-        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
-        is the offset from UTC.
-        """
-        return self.get(DI.MOD_DATE)
-
-
-class PdfReader:
-    """
-    Initialize a PdfReader object.
-
-    This operation can take some time, as the PDF stream's cross-reference
-    tables are read into memory.
-
-    Args:
-        stream: A File object or an object that supports the standard read
-            and seek methods similar to a File object. Could also be a
-            string representing a path to a PDF file.
-        strict: Determines whether user should be warned of all
-            problems and also causes some correctable problems to be fatal.
-            Defaults to ``False``.
-        password: Decrypt PDF file at initialization. If the
-            password is None, the file will not be decrypted.
-            Defaults to ``None``
-    """
-
-    @property
-    def viewer_preferences(self) -> Optional[ViewerPreferences]:
-        """Returns the existing ViewerPreferences as an overloaded dictionary."""
-        o = cast(DictionaryObject, self.trailer["/Root"]).get(
-            CD.VIEWER_PREFERENCES, None
-        )
-        if o is None:
-            return None
-        o = o.get_object()
-        if not isinstance(o, ViewerPreferences):
-            o = ViewerPreferences(o)
-        return o
-
-    def __init__(
-        self,
-        stream: Union[StrByteType, Path],
-        strict: bool = False,
-        password: Union[None, str, bytes] = None,
-    ) -> None:
-        self.strict = strict
-        self.flattened_pages: Optional[List[PageObject]] = None
-        self.resolved_objects: Dict[Tuple[Any, Any], Optional[PdfObject]] = {}
-        self.xref_index = 0
-        self._page_id2num: Optional[
-            Dict[Any, Any]
-        ] = None  # map page indirect_reference number to Page Number
-        if hasattr(stream, "mode") and "b" not in stream.mode:  # type: ignore
-            logger_warning(
-                "PdfReader stream/file object is not in binary mode. "
-                "It may not be read correctly.",
-                __name__,
-            )
-        if isinstance(stream, (str, Path)):
-            with open(stream, "rb") as fh:
-                stream = BytesIO(fh.read())
-        self.read(stream)
-        self.stream = stream
-
-        self._override_encryption = False
-        self._encryption: Optional[Encryption] = None
-        if self.is_encrypted:
-            self._override_encryption = True
-            # Some documents may not have a /ID, use two empty
-            # byte strings instead. Solves
-            # https://github.com/py-pdf/pypdf/issues/608
-            id_entry = self.trailer.get(TK.ID)
-            id1_entry = id_entry[0].get_object().original_bytes if id_entry else b""
-            encrypt_entry = cast(
-                DictionaryObject, self.trailer[TK.ENCRYPT].get_object()
-            )
-            self._encryption = Encryption.read(encrypt_entry, id1_entry)
-
-            # try empty password if no password provided
-            pwd = password if password is not None else b""
-            if (
-                self._encryption.verify(pwd) == PasswordType.NOT_DECRYPTED
-                and password is not None
-            ):
-                # raise if password provided
-                raise WrongPasswordError("Wrong password")
-            self._override_encryption = False
-        elif password is not None:
-            raise PdfReadError("Not encrypted file")
-
-    @property
-    def pdf_header(self) -> str:
-        """
-        The first 8 bytes of the file.
-
-        This is typically something like ``'%PDF-1.6'`` and can be used to
-        detect if the file is actually a PDF file and which version it is.
-        """
-        # TODO: Make this return a bytes object for consistency
-        #       but that needs a deprecation
-        loc = self.stream.tell()
-        self.stream.seek(0, 0)
-        pdf_file_version = self.stream.read(8).decode("utf-8", "backslashreplace")
-        self.stream.seek(loc, 0)  # return to where it was
-        return pdf_file_version
-
-    @property
-    def metadata(self) -> Optional[DocumentInformation]:
-        """
-        Retrieve the PDF file's document information dictionary, if it exists.
-
-        Note that some PDF files use metadata streams instead of docinfo
-        dictionaries, and these metadata streams will not be accessed by this
-        function.
-        """
-        if TK.INFO not in self.trailer:
-            return None
-        obj = self.trailer[TK.INFO]
-        retval = DocumentInformation()
-        if isinstance(obj, type(None)):
-            raise PdfReadError(
-                "trailer not found or does not point to document information directory"
-            )
-        retval.update(obj)  # type: ignore
-        return retval
-
-    def getDocumentInfo(self) -> Optional[DocumentInformation]:  # deprecated
-        """
-        Use the attribute :py:attr:`metadata` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getDocumentInfo", "metadata", "3.0.0")
-        return self.metadata
-
-    @property
-    def documentInfo(self) -> Optional[DocumentInformation]:  # deprecated
-        """
-        Use the attribute :py:attr:`metadata` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("documentInfo", "metadata", "3.0.0")
-        return self.metadata
-
-    @property
-    def xmp_metadata(self) -> Optional[XmpInformation]:
-        """XMP (Extensible Metadata Platform) data."""
-        try:
-            self._override_encryption = True
-            return self.trailer[TK.ROOT].xmp_metadata  # type: ignore
-        finally:
-            self._override_encryption = False
-
-    def getXmpMetadata(self) -> Optional[XmpInformation]:  # deprecated
-        """
-        Use the attribute :py:attr:`metadata` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getXmpMetadata", "xmp_metadata", "3.0.0")
-        return self.xmp_metadata
-
-    @property
-    def xmpMetadata(self) -> Optional[XmpInformation]:  # deprecated
-        """
-        Use the attribute :py:attr:`xmp_metadata` instead.
-
-        .. deprecated:: 1.28.0.
-        """
-        deprecation_with_replacement("xmpMetadata", "xmp_metadata", "3.0.0")
-        return self.xmp_metadata
-
-    def _get_num_pages(self) -> int:
-        """
-        Calculate the number of pages in this PDF file.
-
-        Returns:
-            The number of pages of the parsed PDF file
-
-        Raises:
-            PdfReadError: if file is encrypted and restrictions prevent
-                this action.
-        """
-        # Flattened pages will not work on an Encrypted PDF;
-        # the PDF file's page count is used in this case. Otherwise,
-        # the original method (flattened page count) is used.
-        if self.is_encrypted:
-            return self.trailer[TK.ROOT]["/Pages"]["/Count"]  # type: ignore
-        else:
-            if self.flattened_pages is None:
-                self._flatten()
-            return len(self.flattened_pages)  # type: ignore
-
-    def getNumPages(self) -> int:  # deprecated
-        """
-        Use :code:`len(reader.pages)` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("reader.getNumPages", "len(reader.pages)", "3.0.0")
-        return self._get_num_pages()
-
-    @property
-    def numPages(self) -> int:  # deprecated
-        """
-        Use :code:`len(reader.pages)` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("reader.numPages", "len(reader.pages)", "3.0.0")
-        return self._get_num_pages()
-
-    def getPage(self, pageNumber: int) -> PageObject:  # deprecated
-        """
-        Use :code:`reader.pages[page_number]` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement(
-            "reader.getPage(pageNumber)", "reader.pages[page_number]", "3.0.0"
-        )
-        return self._get_page(pageNumber)
-
-    def _get_page(self, page_number: int) -> PageObject:
-        """
-        Retrieve a page by number from this PDF file.
-
-        Args:
-            page_number: The page number to retrieve
-                (pages begin at zero)
-
-        Returns:
-            A :class:`PageObject<pypdf._page.PageObject>` instance.
-        """
-        if self.flattened_pages is None:
-            self._flatten()
-        assert self.flattened_pages is not None, "hint for mypy"
-        return self.flattened_pages[page_number]
-
-    @property
-    def namedDestinations(self) -> Dict[str, Any]:  # deprecated
-        """
-        Use :py:attr:`named_destinations` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("namedDestinations", "named_destinations", "3.0.0")
-        return self.named_destinations
-
-    @property
-    def named_destinations(self) -> Dict[str, Any]:
-        """
-        A read-only dictionary which maps names to
-        :class:`Destinations<pypdf.generic.Destination>`
-        """
-        return self._get_named_destinations()
-
-    # A select group of relevant field attributes. For the complete list,
-    # see section 8.6.2 of the PDF 1.7 reference.
-
-    def get_fields(
-        self,
-        tree: Optional[TreeObject] = None,
-        retval: Optional[Dict[Any, Any]] = None,
-        fileobj: Optional[Any] = None,
-    ) -> Optional[Dict[str, Any]]:
-        """
-        Extract field data if this PDF contains interactive form fields.
-
-        The *tree* and *retval* parameters are for recursive use.
-
-        Args:
-            tree:
-            retval:
-            fileobj: A file object (usually a text file) to write
-                a report to on all interactive form fields found.
-
-        Returns:
-            A dictionary where each key is a field name, and each
-            value is a :class:`Field<pypdf.generic.Field>` object. By
-            default, the mapping name is used for keys.
-            ``None`` if form data could not be located.
-        """
-        field_attributes = FA.attributes_dict()
-        field_attributes.update(CheckboxRadioButtonAttributes.attributes_dict())
-        if retval is None:
-            retval = {}
-            catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-            # get the AcroForm tree
-            if CD.ACRO_FORM in catalog:
-                tree = cast(Optional[TreeObject], catalog[CD.ACRO_FORM])
-            else:
-                return None
-        if tree is None:
-            return retval
-        self._check_kids(tree, retval, fileobj)
-        for attr in field_attributes:
-            if attr in tree:
-                # Tree is a field
-                self._build_field(tree, retval, fileobj, field_attributes)
-                break
-
-        if "/Fields" in tree:
-            fields = cast(ArrayObject, tree["/Fields"])
-            for f in fields:
-                field = f.get_object()
-                self._build_field(field, retval, fileobj, field_attributes)
-
-        return retval
-
-    def getFields(
-        self,
-        tree: Optional[TreeObject] = None,
-        retval: Optional[Dict[Any, Any]] = None,
-        fileobj: Optional[Any] = None,
-    ) -> Optional[Dict[str, Any]]:  # deprecated
-        """
-        Use :meth:`get_fields` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getFields", "get_fields", "3.0.0")
-        return self.get_fields(tree, retval, fileobj)
-
-    def _get_qualified_field_name(self, parent: DictionaryObject) -> str:
-        if "/TM" in parent:
-            return cast(str, parent["/TM"])
-        elif "/Parent" in parent:
-            return (
-                self._get_qualified_field_name(
-                    cast(DictionaryObject, parent["/Parent"])
-                )
-                + "."
-                + cast(str, parent["/T"])
-            )
-        else:
-            return cast(str, parent["/T"])
-
-    def _build_field(
-        self,
-        field: Union[TreeObject, DictionaryObject],
-        retval: Dict[Any, Any],
-        fileobj: Any,
-        field_attributes: Any,
-    ) -> None:
-        self._check_kids(field, retval, fileobj)
-        try:
-            key = cast(str, field["/TM"])
-        except KeyError:
-            try:
-                if "/Parent" in field:
-                    key = (
-                        self._get_qualified_field_name(
-                            cast(DictionaryObject, field["/Parent"])
-                        )
-                        + "."
-                    )
-                else:
-                    key = ""
-                key += cast(str, field["/T"])
-            except KeyError:
-                # Ignore no-name field for now
-                return
-        if fileobj:
-            self._write_field(fileobj, field, field_attributes)
-            fileobj.write("\n")
-        retval[key] = Field(field)
-        obj = retval[key].indirect_reference.get_object()  # to get the full object
-        if obj.get(FA.FT, "") == "/Ch":
-            retval[key][NameObject("/_States_")] = obj[NameObject(FA.Opt)]
-        if obj.get(FA.FT, "") == "/Btn" and "/AP" in obj:
-            #  Checkbox
-            retval[key][NameObject("/_States_")] = ArrayObject(
-                list(obj["/AP"]["/N"].keys())
-            )
-            if "/Off" not in retval[key]["/_States_"]:
-                retval[key][NameObject("/_States_")].append(NameObject("/Off"))
-        elif obj.get(FA.FT, "") == "/Btn" and obj.get(FA.Ff, 0) & FA.FfBits.Radio != 0:
-            states = []
-            for k in obj.get(FA.Kids, {}):
-                k = k.get_object()
-                for s in list(k["/AP"]["/N"].keys()):
-                    if s not in states:
-                        states.append(s)
-                retval[key][NameObject("/_States_")] = ArrayObject(states)
-            if (
-                obj.get(FA.Ff, 0) & FA.FfBits.NoToggleToOff != 0
-                and "/Off" in retval[key]["/_States_"]
-            ):
-                del retval[key]["/_States_"][retval[key]["/_States_"].index("/Off")]
-
-    def _check_kids(
-        self, tree: Union[TreeObject, DictionaryObject], retval: Any, fileobj: Any
-    ) -> None:
-        if PA.KIDS in tree:
-            # recurse down the tree
-            for kid in tree[PA.KIDS]:  # type: ignore
-                self.get_fields(kid.get_object(), retval, fileobj)
-
-    def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None:
-        field_attributes_tuple = FA.attributes()
-        field_attributes_tuple = (
-            field_attributes_tuple + CheckboxRadioButtonAttributes.attributes()
-        )
-
-        for attr in field_attributes_tuple:
-            if attr in (
-                FA.Kids,
-                FA.AA,
-            ):
-                continue
-            attr_name = field_attributes[attr]
-            try:
-                if attr == FA.FT:
-                    # Make the field type value more clear
-                    types = {
-                        "/Btn": "Button",
-                        "/Tx": "Text",
-                        "/Ch": "Choice",
-                        "/Sig": "Signature",
-                    }
-                    if field[attr] in types:
-                        fileobj.write(f"{attr_name}: {types[field[attr]]}\n")
-                elif attr == FA.Parent:
-                    # Let's just write the name of the parent
-                    try:
-                        name = field[attr][FA.TM]
-                    except KeyError:
-                        name = field[attr][FA.T]
-                    fileobj.write(f"{attr_name}: {name}\n")
-                else:
-                    fileobj.write(f"{attr_name}: {field[attr]}\n")
-            except KeyError:
-                # Field attribute is N/A or unknown, so don't write anything
-                pass
-
-    def get_form_text_fields(self, full_qualified_name: bool = False) -> Dict[str, Any]:
-        """
-        Retrieve form fields from the document with textual data.
-
-        Args:
-            full_qualified_name: to get full name
-
-        Returns:
-            A dictionary. The key is the name of the form field,
-            the value is the content of the field.
-
-            If the document contains multiple form fields with the same name, the
-            second and following will get the suffix .2, .3, ...
-        """
-
-        def indexed_key(k: str, fields: dict) -> str:
-            if k not in fields:
-                return k
-            else:
-                return (
-                    k
-                    + "."
-                    + str(sum([1 for kk in fields if kk.startswith(k + ".")]) + 2)
-                )
-
-        # Retrieve document form fields
-        formfields = self.get_fields()
-        if formfields is None:
-            return {}
-        ff = {}
-        for field, value in formfields.items():
-            if value.get("/FT") == "/Tx":
-                if full_qualified_name:
-                    ff[field] = value.get("/V")
-                else:
-                    ff[indexed_key(cast(str, value["/T"]), ff)] = value.get("/V")
-        return ff
-
-    def getFormTextFields(self) -> Dict[str, Any]:  # deprecated
-        """
-        Use :meth:`get_form_text_fields` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement(
-            "getFormTextFields", "get_form_text_fields", "3.0.0"
-        )
-        return self.get_form_text_fields()
-
-    def _get_named_destinations(
-        self,
-        tree: Union[TreeObject, None] = None,
-        retval: Optional[Any] = None,
-    ) -> Dict[str, Any]:
-        """
-        Retrieve the named destinations present in the document.
-
-        Args:
-            tree:
-            retval:
-
-        Returns:
-            A dictionary which maps names to
-            :class:`Destinations<pypdf.generic.Destination>`.
-        """
-        if retval is None:
-            retval = {}
-            catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-
-            # get the name tree
-            if CA.DESTS in catalog:
-                tree = cast(TreeObject, catalog[CA.DESTS])
-            elif CA.NAMES in catalog:
-                names = cast(DictionaryObject, catalog[CA.NAMES])
-                if CA.DESTS in names:
-                    tree = cast(TreeObject, names[CA.DESTS])
-
-        if tree is None:
-            return retval
-
-        if PA.KIDS in tree:
-            # recurse down the tree
-            for kid in cast(ArrayObject, tree[PA.KIDS]):
-                self._get_named_destinations(kid.get_object(), retval)
-        # TABLE 3.33 Entries in a name tree node dictionary (PDF 1.7 specs)
-        elif CA.NAMES in tree:  # KIDS and NAMES are exclusives (PDF 1.7 specs p 162)
-            names = cast(DictionaryObject, tree[CA.NAMES])
-            i = 0
-            while i < len(names):
-                key = cast(str, names[i].get_object())
-                i += 1
-                if not isinstance(key, str):
-                    continue
-                try:
-                    value = names[i].get_object()
-                except IndexError:
-                    break
-                i += 1
-                if isinstance(value, DictionaryObject) and "/D" in value:
-                    value = value["/D"]
-                dest = self._build_destination(key, value)  # type: ignore
-                if dest is not None:
-                    retval[key] = dest
-        else:  # case where Dests is in root catalog (PDF 1.7 specs, §2 about PDF1.1
-            for k__, v__ in tree.items():
-                val = v__.get_object()
-                if isinstance(val, DictionaryObject):
-                    val = val["/D"].get_object()
-                dest = self._build_destination(k__, val)
-                if dest is not None:
-                    retval[k__] = dest
-        return retval
-
-    def getNamedDestinations(
-        self,
-        tree: Union[TreeObject, None] = None,
-        retval: Optional[Any] = None,
-    ) -> Dict[str, Any]:  # deprecated
-        """
-        Use :py:attr:`named_destinations` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement(
-            "getNamedDestinations", "named_destinations", "3.0.0"
-        )
-        return self._get_named_destinations(tree, retval)
-
-    @property
-    def outline(self) -> OutlineType:
-        """
-        Read-only property for the outline present in the document.
-
-        (i.e., a collection of 'outline items' which are also known as
-        'bookmarks')
-        """
-        return self._get_outline()
-
-    @property
-    def outlines(self) -> OutlineType:  # deprecated
-        """
-        Use :py:attr:`outline` instead.
-
-        .. deprecated:: 2.9.0
-        """
-        deprecation_with_replacement("outlines", "outline", "3.0.0")
-        return self.outline
-
-    def _get_outline(
-        self, node: Optional[DictionaryObject] = None, outline: Optional[Any] = None
-    ) -> OutlineType:
-        if outline is None:
-            outline = []
-            catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-
-            # get the outline dictionary and named destinations
-            if CO.OUTLINES in catalog:
-                lines = cast(DictionaryObject, catalog[CO.OUTLINES])
-
-                if isinstance(lines, NullObject):
-                    return outline
-
-                # TABLE 8.3 Entries in the outline dictionary
-                if lines is not None and "/First" in lines:
-                    node = cast(DictionaryObject, lines["/First"])
-            self._namedDests = self._get_named_destinations()
-
-        if node is None:
-            return outline
-
-        # see if there are any more outline items
-        while True:
-            outline_obj = self._build_outline_item(node)
-            if outline_obj:
-                outline.append(outline_obj)
-
-            # check for sub-outline
-            if "/First" in node:
-                sub_outline: List[Any] = []
-                self._get_outline(cast(DictionaryObject, node["/First"]), sub_outline)
-                if sub_outline:
-                    outline.append(sub_outline)
-
-            if "/Next" not in node:
-                break
-            node = cast(DictionaryObject, node["/Next"])
-
-        return outline
-
-    def getOutlines(
-        self, node: Optional[DictionaryObject] = None, outline: Optional[Any] = None
-    ) -> OutlineType:  # deprecated
-        """
-        Use :py:attr:`outline` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getOutlines", "outline", "3.0.0")
-        return self._get_outline(node, outline)
-
-    @property
-    def threads(self) -> Optional[ArrayObject]:
-        """
-        Read-only property for the list of threads.
-
-        See §8.3.2 from PDF 1.7 spec.
-
-        It's an array of dictionaries with "/F" and "/I" properties or
-        None if there are no articles.
-        """
-        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-        if CO.THREADS in catalog:
-            return cast("ArrayObject", catalog[CO.THREADS])
-        else:
-            return None
-
-    def _get_page_number_by_indirect(
-        self, indirect_reference: Union[None, int, NullObject, IndirectObject]
-    ) -> int:
-        """
-        Generate _page_id2num.
-
-        Args:
-            indirect_reference:
-
-        Returns:
-            The page number.
-        """
-        if self._page_id2num is None:
-            self._page_id2num = {
-                x.indirect_reference.idnum: i for i, x in enumerate(self.pages)  # type: ignore
-            }
-
-        if indirect_reference is None or isinstance(indirect_reference, NullObject):
-            return -1
-        if isinstance(indirect_reference, int):
-            idnum = indirect_reference
-        else:
-            idnum = indirect_reference.idnum
-        assert self._page_id2num is not None, "hint for mypy"
-        ret = self._page_id2num.get(idnum, -1)
-        return ret
-
-    def get_page_number(self, page: PageObject) -> int:
-        """
-        Retrieve page number of a given PageObject.
-
-        Args:
-            page: The page to get page number. Should be
-                an instance of :class:`PageObject<pypdf._page.PageObject>`
-
-        Returns:
-            The page number or -1 if page is not found
-        """
-        return self._get_page_number_by_indirect(page.indirect_reference)
-
-    def getPageNumber(self, page: PageObject) -> int:  # deprecated
-        """
-        Use :meth:`get_page_number` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getPageNumber", "get_page_number", "3.0.0")
-        return self.get_page_number(page)
-
-    def get_destination_page_number(self, destination: Destination) -> int:
-        """
-        Retrieve page number of a given Destination object.
-
-        Args:
-            destination: The destination to get page number.
-
-        Returns:
-            The page number or -1 if page is not found
-        """
-        return self._get_page_number_by_indirect(destination.page)
-
-    def getDestinationPageNumber(self, destination: Destination) -> int:  # deprecated
-        """
-        Use :meth:`get_destination_page_number` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement(
-            "getDestinationPageNumber", "get_destination_page_number", "3.0.0"
-        )
-        return self.get_destination_page_number(destination)
-
-    def _build_destination(
-        self,
-        title: str,
-        array: Optional[
-            List[
-                Union[NumberObject, IndirectObject, None, NullObject, DictionaryObject]
-            ]
-        ],
-    ) -> Destination:
-        page, typ = None, None
-        # handle outline items with missing or invalid destination
-        if (
-            isinstance(array, (NullObject, str))
-            or (isinstance(array, ArrayObject) and len(array) == 0)
-            or array is None
-        ):
-            page = NullObject()
-            return Destination(title, page, Fit.fit())
-        else:
-            page, typ = array[0:2]  # type: ignore
-            array = array[2:]
-            try:
-                return Destination(title, page, Fit(fit_type=typ, fit_args=array))  # type: ignore
-            except PdfReadError:
-                logger_warning(f"Unknown destination: {title} {array}", __name__)
-                if self.strict:
-                    raise
-                # create a link to first Page
-                tmp = self.pages[0].indirect_reference
-                indirect_reference = NullObject() if tmp is None else tmp
-                return Destination(title, indirect_reference, Fit.fit())  # type: ignore
-
-    def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
-        dest, title, outline_item = None, None, None
-
-        # title required for valid outline
-        # PDF Reference 1.7: TABLE 8.4 Entries in an outline item dictionary
-        try:
-            title = cast("str", node["/Title"])
-        except KeyError:
-            if self.strict:
-                raise PdfReadError(f"Outline Entry Missing /Title attribute: {node!r}")
-            title = ""  # type: ignore
-
-        if "/A" in node:
-            # Action, PDFv1.7 Section 12.6 (only type GoTo supported)
-            action = cast(DictionaryObject, node["/A"])
-            action_type = cast(NameObject, action[GoToActionArguments.S])
-            if action_type == "/GoTo":
-                dest = action[GoToActionArguments.D]
-        elif "/Dest" in node:
-            # Destination, PDFv1.7 Section 12.3.2
-            dest = node["/Dest"]
-            # if array was referenced in another object, will be a dict w/ key "/D"
-            if isinstance(dest, DictionaryObject) and "/D" in dest:
-                dest = dest["/D"]
-
-        if isinstance(dest, ArrayObject):
-            outline_item = self._build_destination(title, dest)
-        elif isinstance(dest, str):
-            # named destination, addresses NameObject Issue #193
-            # TODO : keep named destination instead of replacing it ?
-            try:
-                outline_item = self._build_destination(
-                    title, self._namedDests[dest].dest_array
-                )
-            except KeyError:
-                # named destination not found in Name Dict
-                outline_item = self._build_destination(title, None)
-        elif dest is None:
-            # outline item not required to have destination or action
-            # PDFv1.7 Table 153
-            outline_item = self._build_destination(title, dest)
-        else:
-            if self.strict:
-                raise PdfReadError(f"Unexpected destination {dest!r}")
-            else:
-                logger_warning(
-                    f"Removed unexpected destination {dest!r} from destination",
-                    __name__,
-                )
-            outline_item = self._build_destination(title, None)  # type: ignore
-
-        # if outline item created, add color, format, and child count if present
-        if outline_item:
-            if "/C" in node:
-                # Color of outline item font in (R, G, B) with values ranging 0.0-1.0
-                outline_item[NameObject("/C")] = ArrayObject(FloatObject(c) for c in node["/C"])  # type: ignore
-            if "/F" in node:
-                # specifies style characteristics bold and/or italic
-                # with 1=italic, 2=bold, 3=both
-                outline_item[NameObject("/F")] = node["/F"]
-            if "/Count" in node:
-                # absolute value = num. visible children
-                # with positive = open/unfolded, negative = closed/folded
-                outline_item[NameObject("/Count")] = node["/Count"]
-            #  if count is 0 we will consider it as open ( in order to have always an is_open to simplify
-            outline_item[NameObject("/%is_open%")] = BooleanObject(
-                node.get("/Count", 0) >= 0
-            )
-        outline_item.node = node
-        try:
-            outline_item.indirect_reference = node.indirect_reference
-        except AttributeError:
-            pass
-        return outline_item
-
-    @property
-    def pages(self) -> List[PageObject]:
-        """Read-only property that emulates a list of :py:class:`Page<pypdf._page.Page>` objects."""
-        return _VirtualList(self._get_num_pages, self._get_page)  # type: ignore
-
-    @property
-    def page_labels(self) -> List[str]:
-        """
-        A list of labels for the pages in this document.
-
-        This property is read-only. The labels are in the order that the pages
-        appear in the document.
-        """
-        return [page_index2page_label(self, i) for i in range(len(self.pages))]
-
-    @property
-    def page_layout(self) -> Optional[str]:
-        """
-        Get the page layout currently being used.
-
-        .. list-table:: Valid ``layout`` values
-           :widths: 50 200
-
-           * - /NoLayout
-             - Layout explicitly not specified
-           * - /SinglePage
-             - Show one page at a time
-           * - /OneColumn
-             - Show one column at a time
-           * - /TwoColumnLeft
-             - Show pages in two columns, odd-numbered pages on the left
-           * - /TwoColumnRight
-             - Show pages in two columns, odd-numbered pages on the right
-           * - /TwoPageLeft
-             - Show two pages at a time, odd-numbered pages on the left
-           * - /TwoPageRight
-             - Show two pages at a time, odd-numbered pages on the right
-        """
-        trailer = cast(DictionaryObject, self.trailer[TK.ROOT])
-        if CD.PAGE_LAYOUT in trailer:
-            return cast(NameObject, trailer[CD.PAGE_LAYOUT])
-        return None
-
-    def getPageLayout(self) -> Optional[str]:  # deprecated
-        """
-        Use :py:attr:`page_layout` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getPageLayout", "page_layout", "3.0.0")
-        return self.page_layout
-
-    @property
-    def pageLayout(self) -> Optional[str]:  # deprecated
-        """
-        Use :py:attr:`page_layout` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("pageLayout", "page_layout", "3.0.0")
-        return self.page_layout
-
-    @property
-    def page_mode(self) -> Optional[PagemodeType]:
-        """
-        Get the page mode currently being used.
-
-        .. list-table:: Valid ``mode`` values
-           :widths: 50 200
-
-           * - /UseNone
-             - Do not show outline or thumbnails panels
-           * - /UseOutlines
-             - Show outline (aka bookmarks) panel
-           * - /UseThumbs
-             - Show page thumbnails panel
-           * - /FullScreen
-             - Fullscreen view
-           * - /UseOC
-             - Show Optional Content Group (OCG) panel
-           * - /UseAttachments
-             - Show attachments panel
-        """
-        try:
-            return self.trailer[TK.ROOT]["/PageMode"]  # type: ignore
-        except KeyError:
-            return None
-
-    def getPageMode(self) -> Optional[PagemodeType]:  # deprecated
-        """
-        Use :py:attr:`page_mode` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getPageMode", "page_mode", "3.0.0")
-        return self.page_mode
-
-    @property
-    def pageMode(self) -> Optional[PagemodeType]:  # deprecated
-        """
-        Use :py:attr:`page_mode` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("pageMode", "page_mode", "3.0.0")
-        return self.page_mode
-
-    def _flatten(
-        self,
-        pages: Union[None, DictionaryObject, PageObject] = None,
-        inherit: Optional[Dict[str, Any]] = None,
-        indirect_reference: Optional[IndirectObject] = None,
-    ) -> None:
-        inheritable_page_attributes = (
-            NameObject(PG.RESOURCES),
-            NameObject(PG.MEDIABOX),
-            NameObject(PG.CROPBOX),
-            NameObject(PG.ROTATE),
-        )
-        if inherit is None:
-            inherit = {}
-        if pages is None:
-            # Fix issue 327: set flattened_pages attribute only for
-            # decrypted file
-            catalog = self.trailer[TK.ROOT].get_object()
-            pages = catalog["/Pages"].get_object()  # type: ignore
-            self.flattened_pages = []
-
-        if PA.TYPE in pages:
-            t = pages[PA.TYPE]  # type: ignore
-        # if pdf has no type, considered as a page if /Kids is missing
-        elif PA.KIDS not in pages:
-            t = "/Page"
-        else:
-            t = "/Pages"
-
-        if t == "/Pages":
-            for attr in inheritable_page_attributes:
-                if attr in pages:
-                    inherit[attr] = pages[attr]
-            for page in pages[PA.KIDS]:  # type: ignore
-                addt = {}
-                if isinstance(page, IndirectObject):
-                    addt["indirect_reference"] = page
-                obj = page.get_object()
-                if obj:
-                    # damaged file may have invalid child in /Pages
-                    self._flatten(obj, inherit, **addt)
-        elif t == "/Page":
-            for attr_in, value in list(inherit.items()):
-                # if the page has it's own value, it does not inherit the
-                # parent's value:
-                if attr_in not in pages:
-                    pages[attr_in] = value
-            page_obj = PageObject(self, indirect_reference)
-            page_obj.update(pages)
-
-            # TODO: Could flattened_pages be None at this point?
-            self.flattened_pages.append(page_obj)  # type: ignore
-
-    def _get_object_from_stream(
-        self, indirect_reference: IndirectObject
-    ) -> Union[int, PdfObject, str]:
-        # indirect reference to object in object stream
-        # read the entire object stream into memory
-        stmnum, idx = self.xref_objStm[indirect_reference.idnum]
-        obj_stm: EncodedStreamObject = IndirectObject(stmnum, 0, self).get_object()  # type: ignore
-        # This is an xref to a stream, so its type better be a stream
-        assert cast(str, obj_stm["/Type"]) == "/ObjStm"
-        # /N is the number of indirect objects in the stream
-        assert idx < obj_stm["/N"]
-        stream_data = BytesIO(b_(obj_stm.get_data()))
-        for i in range(obj_stm["/N"]):  # type: ignore
-            read_non_whitespace(stream_data)
-            stream_data.seek(-1, 1)
-            objnum = NumberObject.read_from_stream(stream_data)
-            read_non_whitespace(stream_data)
-            stream_data.seek(-1, 1)
-            offset = NumberObject.read_from_stream(stream_data)
-            read_non_whitespace(stream_data)
-            stream_data.seek(-1, 1)
-            if objnum != indirect_reference.idnum:
-                # We're only interested in one object
-                continue
-            if self.strict and idx != i:
-                raise PdfReadError("Object is in wrong index.")
-            stream_data.seek(int(obj_stm["/First"] + offset), 0)  # type: ignore
-
-            # to cope with some case where the 'pointer' is on a white space
-            read_non_whitespace(stream_data)
-            stream_data.seek(-1, 1)
-
-            try:
-                obj = read_object(stream_data, self)
-            except PdfStreamError as exc:
-                # Stream object cannot be read. Normally, a critical error, but
-                # Adobe Reader doesn't complain, so continue (in strict mode?)
-                logger_warning(
-                    f"Invalid stream (index {i}) within object "
-                    f"{indirect_reference.idnum} {indirect_reference.generation}: "
-                    f"{exc}",
-                    __name__,
-                )
-
-                if self.strict:
-                    raise PdfReadError(f"Can't read object stream: {exc}")
-                # Replace with null. Hopefully it's nothing important.
-                obj = NullObject()
-            return obj
-
-        if self.strict:
-            raise PdfReadError("This is a fatal error in strict mode.")
-        return NullObject()
-
-    def _get_indirect_object(self, num: int, gen: int) -> Optional[PdfObject]:
-        """
-        Used to ease development.
-
-        This is equivalent to generic.IndirectObject(num,gen,self).get_object()
-
-        Args:
-            num: The object number of the indirect object.
-            gen: The generation number of the indirect object.
-
-        Returns:
-            A PdfObject
-        """
-        return IndirectObject(num, gen, self).get_object()
-
-    def get_object(
-        self, indirect_reference: Union[int, IndirectObject]
-    ) -> Optional[PdfObject]:
-        if isinstance(indirect_reference, int):
-            indirect_reference = IndirectObject(indirect_reference, 0, self)
-        retval = self.cache_get_indirect_object(
-            indirect_reference.generation, indirect_reference.idnum
-        )
-        if retval is not None:
-            return retval
-        if (
-            indirect_reference.generation == 0
-            and indirect_reference.idnum in self.xref_objStm
-        ):
-            retval = self._get_object_from_stream(indirect_reference)  # type: ignore
-        elif (
-            indirect_reference.generation in self.xref
-            and indirect_reference.idnum in self.xref[indirect_reference.generation]
-        ):
-            if self.xref_free_entry.get(indirect_reference.generation, {}).get(
-                indirect_reference.idnum, False
-            ):
-                return NullObject()
-            start = self.xref[indirect_reference.generation][indirect_reference.idnum]
-            self.stream.seek(start, 0)
-            try:
-                idnum, generation = self.read_object_header(self.stream)
-            except Exception:
-                if hasattr(self.stream, "getbuffer"):
-                    buf = bytes(self.stream.getbuffer())  # type: ignore
-                else:
-                    p = self.stream.tell()
-                    self.stream.seek(0, 0)
-                    buf = self.stream.read(-1)
-                    self.stream.seek(p, 0)
-                m = re.search(
-                    rf"\s{indirect_reference.idnum}\s+{indirect_reference.generation}\s+obj".encode(),
-                    buf,
-                )
-                if m is not None:
-                    logger_warning(
-                        f"Object ID {indirect_reference.idnum},{indirect_reference.generation} ref repaired",
-                        __name__,
-                    )
-                    self.xref[indirect_reference.generation][
-                        indirect_reference.idnum
-                    ] = (m.start(0) + 1)
-                    self.stream.seek(m.start(0) + 1)
-                    idnum, generation = self.read_object_header(self.stream)
-                else:
-                    idnum = -1  # exception will be raised below
-            if idnum != indirect_reference.idnum and self.xref_index:
-                # Xref table probably had bad indexes due to not being zero-indexed
-                if self.strict:
-                    raise PdfReadError(
-                        f"Expected object ID ({indirect_reference.idnum} {indirect_reference.generation}) "
-                        f"does not match actual ({idnum} {generation}); "
-                        "xref table not zero-indexed."
-                    )
-                # xref table is corrected in non-strict mode
-            elif idnum != indirect_reference.idnum and self.strict:
-                # some other problem
-                raise PdfReadError(
-                    f"Expected object ID ({indirect_reference.idnum} "
-                    f"{indirect_reference.generation}) does not match actual "
-                    f"({idnum} {generation})."
-                )
-            if self.strict:
-                assert generation == indirect_reference.generation
-            retval = read_object(self.stream, self)  # type: ignore
-
-            # override encryption is used for the /Encrypt dictionary
-            if not self._override_encryption and self._encryption is not None:
-                # if we don't have the encryption key:
-                if not self._encryption.is_decrypted():
-                    raise FileNotDecryptedError("File has not been decrypted")
-                # otherwise, decrypt here...
-                retval = cast(PdfObject, retval)
-                retval = self._encryption.decrypt_object(
-                    retval, indirect_reference.idnum, indirect_reference.generation
-                )
-        else:
-            if hasattr(self.stream, "getbuffer"):
-                buf = bytes(self.stream.getbuffer())  # type: ignore
-            else:
-                p = self.stream.tell()
-                self.stream.seek(0, 0)
-                buf = self.stream.read(-1)
-                self.stream.seek(p, 0)
-            m = re.search(
-                rf"\s{indirect_reference.idnum}\s+{indirect_reference.generation}\s+obj".encode(),
-                buf,
-            )
-            if m is not None:
-                logger_warning(
-                    f"Object {indirect_reference.idnum} {indirect_reference.generation} found",
-                    __name__,
-                )
-                if indirect_reference.generation not in self.xref:
-                    self.xref[indirect_reference.generation] = {}
-                self.xref[indirect_reference.generation][indirect_reference.idnum] = (
-                    m.start(0) + 1
-                )
-                self.stream.seek(m.end(0) + 1)
-                skip_over_whitespace(self.stream)
-                self.stream.seek(-1, 1)
-                retval = read_object(self.stream, self)  # type: ignore
-
-                # override encryption is used for the /Encrypt dictionary
-                if not self._override_encryption and self._encryption is not None:
-                    # if we don't have the encryption key:
-                    if not self._encryption.is_decrypted():
-                        raise FileNotDecryptedError("File has not been decrypted")
-                    # otherwise, decrypt here...
-                    retval = cast(PdfObject, retval)
-                    retval = self._encryption.decrypt_object(
-                        retval, indirect_reference.idnum, indirect_reference.generation
-                    )
-            else:
-                logger_warning(
-                    f"Object {indirect_reference.idnum} {indirect_reference.generation} not defined.",
-                    __name__,
-                )
-                if self.strict:
-                    raise PdfReadError("Could not find object.")
-        self.cache_indirect_object(
-            indirect_reference.generation, indirect_reference.idnum, retval
-        )
-        return retval
-
-    def getObject(
-        self, indirectReference: IndirectObject
-    ) -> Optional[PdfObject]:  # deprecated
-        """
-        Use :meth:`get_object` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getObject", "get_object", "3.0.0")
-        return self.get_object(indirectReference)
-
-    def read_object_header(self, stream: StreamType) -> Tuple[int, int]:
-        # Should never be necessary to read out whitespace, since the
-        # cross-reference table should put us in the right spot to read the
-        # object header.  In reality... some files have stupid cross reference
-        # tables that are off by whitespace bytes.
-        extra = False
-        skip_over_comment(stream)
-        extra |= skip_over_whitespace(stream)
-        stream.seek(-1, 1)
-        idnum = read_until_whitespace(stream)
-        extra |= skip_over_whitespace(stream)
-        stream.seek(-1, 1)
-        generation = read_until_whitespace(stream)
-        extra |= skip_over_whitespace(stream)
-        stream.seek(-1, 1)
-
-        # although it's not used, it might still be necessary to read
-        _obj = stream.read(3)
-
-        read_non_whitespace(stream)
-        stream.seek(-1, 1)
-        if extra and self.strict:
-            logger_warning(
-                f"Superfluous whitespace found in object header {idnum} {generation}",  # type: ignore
-                __name__,
-            )
-        return int(idnum), int(generation)
-
-    def readObjectHeader(self, stream: StreamType) -> Tuple[int, int]:  # deprecated
-        """
-        Use :meth:`read_object_header` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("readObjectHeader", "read_object_header", "3.0.0")
-        return self.read_object_header(stream)
-
-    def cache_get_indirect_object(
-        self, generation: int, idnum: int
-    ) -> Optional[PdfObject]:
-        return self.resolved_objects.get((generation, idnum))
-
-    def cacheGetIndirectObject(
-        self, generation: int, idnum: int
-    ) -> Optional[PdfObject]:  # deprecated
-        """
-        Use :meth:`cache_get_indirect_object` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement(
-            "cacheGetIndirectObject", "cache_get_indirect_object", "3.0.0"
-        )
-        return self.cache_get_indirect_object(generation, idnum)
-
-    def cache_indirect_object(
-        self, generation: int, idnum: int, obj: Optional[PdfObject]
-    ) -> Optional[PdfObject]:
-        if (generation, idnum) in self.resolved_objects:
-            msg = f"Overwriting cache for {generation} {idnum}"
-            if self.strict:
-                raise PdfReadError(msg)
-            logger_warning(msg, __name__)
-        self.resolved_objects[(generation, idnum)] = obj
-        if obj is not None:
-            obj.indirect_reference = IndirectObject(idnum, generation, self)
-        return obj
-
-    def cacheIndirectObject(
-        self, generation: int, idnum: int, obj: Optional[PdfObject]
-    ) -> Optional[PdfObject]:  # deprecated
-        """
-        Use :meth:`cache_indirect_object` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("cacheIndirectObject", "cache_indirect_object")
-        return self.cache_indirect_object(generation, idnum, obj)
-
-    def read(self, stream: StreamType) -> None:
-        self._basic_validation(stream)
-        self._find_eof_marker(stream)
-        startxref = self._find_startxref_pos(stream)
-
-        # check and eventually correct the startxref only in not strict
-        xref_issue_nr = self._get_xref_issues(stream, startxref)
-        if xref_issue_nr != 0:
-            if self.strict and xref_issue_nr:
-                raise PdfReadError("Broken xref table")
-            logger_warning(f"incorrect startxref pointer({xref_issue_nr})", __name__)
-
-        # read all cross reference tables and their trailers
-        self._read_xref_tables_and_trailers(stream, startxref, xref_issue_nr)
-
-        # if not zero-indexed, verify that the table is correct; change it if necessary
-        if self.xref_index and not self.strict:
-            loc = stream.tell()
-            for gen, xref_entry in self.xref.items():
-                if gen == 65535:
-                    continue
-                xref_k = sorted(
-                    xref_entry.keys()
-                )  # must ensure ascendant to prevent damage
-                for id in xref_k:
-                    stream.seek(xref_entry[id], 0)
-                    try:
-                        pid, _pgen = self.read_object_header(stream)
-                    except ValueError:
-                        break
-                    if pid == id - self.xref_index:
-                        # fixing index item per item is required for revised PDF.
-                        self.xref[gen][pid] = self.xref[gen][id]
-                        del self.xref[gen][id]
-                    # if not, then either it's just plain wrong, or the
-                    # non-zero-index is actually correct
-            stream.seek(loc, 0)  # return to where it was
-
-    def _basic_validation(self, stream: StreamType) -> None:
-        """Ensure file is not empty. Read at most 5 bytes."""
-        stream.seek(0, os.SEEK_SET)
-        try:
-            header_byte = stream.read(5)
-        except UnicodeDecodeError:
-            raise UnsupportedOperation("cannot read header")
-        if header_byte == b"":
-            raise EmptyFileError("Cannot read an empty file")
-        elif header_byte != b"%PDF-":
-            if self.strict:
-                raise PdfReadError(
-                    f"PDF starts with '{header_byte.decode('utf8')}', "
-                    "but '%PDF-' expected"
-                )
-            else:
-                logger_warning(f"invalid pdf header: {header_byte}", __name__)
-        stream.seek(0, os.SEEK_END)
-
-    def _find_eof_marker(self, stream: StreamType) -> None:
-        """
-        Jump to the %%EOF marker.
-
-        According to the specs, the %%EOF marker should be at the very end of
-        the file. Hence for standard-compliant PDF documents this function will
-        read only the last part (DEFAULT_BUFFER_SIZE).
-        """
-        HEADER_SIZE = 8  # to parse whole file, Header is e.g. '%PDF-1.6'
-        line = b""
-        while line[:5] != b"%%EOF":
-            if stream.tell() < HEADER_SIZE:
-                if self.strict:
-                    raise PdfReadError("EOF marker not found")
-                else:
-                    logger_warning("EOF marker not found", __name__)
-            line = read_previous_line(stream)
-
-    def _find_startxref_pos(self, stream: StreamType) -> int:
-        """
-        Find startxref entry - the location of the xref table.
-
-        Args:
-            stream:
-
-        Returns:
-            The bytes offset
-        """
-        line = read_previous_line(stream)
-        try:
-            startxref = int(line)
-        except ValueError:
-            # 'startxref' may be on the same line as the location
-            if not line.startswith(b"startxref"):
-                raise PdfReadError("startxref not found")
-            startxref = int(line[9:].strip())
-            logger_warning("startxref on same line as offset", __name__)
-        else:
-            line = read_previous_line(stream)
-            if line[:9] != b"startxref":
-                raise PdfReadError("startxref not found")
-        return startxref
-
-    def _read_standard_xref_table(self, stream: StreamType) -> None:
-        # standard cross-reference table
-        ref = stream.read(3)
-        if ref != b"ref":
-            raise PdfReadError("xref table read error")
-        read_non_whitespace(stream)
-        stream.seek(-1, 1)
-        first_time = True  # check if the first time looking at the xref table
-        while True:
-            num = cast(int, read_object(stream, self))
-            if first_time and num != 0:
-                self.xref_index = num
-                if self.strict:
-                    logger_warning(
-                        "Xref table not zero-indexed. ID numbers for objects will be corrected.",
-                        __name__,
-                    )
-                    # if table not zero indexed, could be due to error from when PDF was created
-                    # which will lead to mismatched indices later on, only warned and corrected if self.strict==True
-            first_time = False
-            read_non_whitespace(stream)
-            stream.seek(-1, 1)
-            size = cast(int, read_object(stream, self))
-            read_non_whitespace(stream)
-            stream.seek(-1, 1)
-            cnt = 0
-            while cnt < size:
-                line = stream.read(20)
-
-                # It's very clear in section 3.4.3 of the PDF spec
-                # that all cross-reference table lines are a fixed
-                # 20 bytes (as of PDF 1.7). However, some files have
-                # 21-byte entries (or more) due to the use of \r\n
-                # (CRLF) EOL's. Detect that case, and adjust the line
-                # until it does not begin with a \r (CR) or \n (LF).
-                while line[0] in b"\x0D\x0A":
-                    stream.seek(-20 + 1, 1)
-                    line = stream.read(20)
-
-                # On the other hand, some malformed PDF files
-                # use a single character EOL without a preceding
-                # space.  Detect that case, and seek the stream
-                # back one character.  (0-9 means we've bled into
-                # the next xref entry, t means we've bled into the
-                # text "trailer"):
-                if line[-1] in b"0123456789t":
-                    stream.seek(-1, 1)
-
-                try:
-                    offset_b, generation_b = line[:16].split(b" ")
-                    entry_type_b = line[17:18]
-
-                    offset, generation = int(offset_b), int(generation_b)
-                except Exception:
-                    # if something wrong occurred
-                    if hasattr(stream, "getbuffer"):
-                        buf = bytes(stream.getbuffer())  # type: ignore
-                    else:
-                        p = stream.tell()
-                        stream.seek(0, 0)
-                        buf = stream.read(-1)
-                        stream.seek(p)
-
-                    f = re.search(f"{num}\\s+(\\d+)\\s+obj".encode(), buf)
-                    if f is None:
-                        logger_warning(
-                            f"entry {num} in Xref table invalid; object not found",
-                            __name__,
-                        )
-                        generation = 65535
-                        offset = -1
-                    else:
-                        logger_warning(
-                            f"entry {num} in Xref table invalid but object found",
-                            __name__,
-                        )
-                        generation = int(f.group(1))
-                        offset = f.start()
-
-                if generation not in self.xref:
-                    self.xref[generation] = {}
-                    self.xref_free_entry[generation] = {}
-                if num in self.xref[generation]:
-                    # It really seems like we should allow the last
-                    # xref table in the file to override previous
-                    # ones. Since we read the file backwards, assume
-                    # any existing key is already set correctly.
-                    pass
-                else:
-                    self.xref[generation][num] = offset
-                    try:
-                        self.xref_free_entry[generation][num] = entry_type_b == b"f"
-                    except Exception:
-                        pass
-                    try:
-                        self.xref_free_entry[65535][num] = entry_type_b == b"f"
-                    except Exception:
-                        pass
-                cnt += 1
-                num += 1
-            read_non_whitespace(stream)
-            stream.seek(-1, 1)
-            trailer_tag = stream.read(7)
-            if trailer_tag != b"trailer":
-                # more xrefs!
-                stream.seek(-7, 1)
-            else:
-                break
-
-    def _read_xref_tables_and_trailers(
-        self, stream: StreamType, startxref: Optional[int], xref_issue_nr: int
-    ) -> None:
-        self.xref: Dict[int, Dict[Any, Any]] = {}
-        self.xref_free_entry: Dict[int, Dict[Any, Any]] = {}
-        self.xref_objStm: Dict[int, Tuple[Any, Any]] = {}
-        self.trailer = DictionaryObject()
-        while startxref is not None:
-            # load the xref table
-            stream.seek(startxref, 0)
-            x = stream.read(1)
-            if x in b"\r\n":
-                x = stream.read(1)
-            if x == b"x":
-                startxref = self._read_xref(stream)
-            elif xref_issue_nr:
-                try:
-                    self._rebuild_xref_table(stream)
-                    break
-                except Exception:
-                    xref_issue_nr = 0
-            elif x.isdigit():
-                try:
-                    xrefstream = self._read_pdf15_xref_stream(stream)
-                except Exception as e:
-                    if TK.ROOT in self.trailer:
-                        logger_warning(
-                            f"Previous trailer can not be read {e.args}",
-                            __name__,
-                        )
-                        break
-                    else:
-                        raise PdfReadError(f"trailer can not be read {e.args}")
-                trailer_keys = TK.ROOT, TK.ENCRYPT, TK.INFO, TK.ID, TK.SIZE
-                for key in trailer_keys:
-                    if key in xrefstream and key not in self.trailer:
-                        self.trailer[NameObject(key)] = xrefstream.raw_get(key)
-                if "/XRefStm" in xrefstream:
-                    p = stream.tell()
-                    stream.seek(cast(int, xrefstream["/XRefStm"]) + 1, 0)
-                    self._read_pdf15_xref_stream(stream)
-                    stream.seek(p, 0)
-                if "/Prev" in xrefstream:
-                    startxref = cast(int, xrefstream["/Prev"])
-                else:
-                    break
-            else:
-                startxref = self._read_xref_other_error(stream, startxref)
-
-    def _read_xref(self, stream: StreamType) -> Optional[int]:
-        self._read_standard_xref_table(stream)
-        read_non_whitespace(stream)
-        stream.seek(-1, 1)
-        new_trailer = cast(Dict[str, Any], read_object(stream, self))
-        for key, value in new_trailer.items():
-            if key not in self.trailer:
-                self.trailer[key] = value
-        if "/XRefStm" in new_trailer:
-            p = stream.tell()
-            stream.seek(cast(int, new_trailer["/XRefStm"]) + 1, 0)
-            try:
-                self._read_pdf15_xref_stream(stream)
-            except Exception:
-                logger_warning(
-                    f"XRef object at {new_trailer['/XRefStm']} can not be read, some object may be missing",
-                    __name__,
-                )
-            stream.seek(p, 0)
-        if "/Prev" in new_trailer:
-            startxref = new_trailer["/Prev"]
-            return startxref
-        else:
-            return None
-
-    def _read_xref_other_error(
-        self, stream: StreamType, startxref: int
-    ) -> Optional[int]:
-        # some PDFs have /Prev=0 in the trailer, instead of no /Prev
-        if startxref == 0:
-            if self.strict:
-                raise PdfReadError(
-                    "/Prev=0 in the trailer (try opening with strict=False)"
-                )
-            logger_warning(
-                "/Prev=0 in the trailer - assuming there is no previous xref table",
-                __name__,
-            )
-            return None
-        # bad xref character at startxref.  Let's see if we can find
-        # the xref table nearby, as we've observed this error with an
-        # off-by-one before.
-        stream.seek(-11, 1)
-        tmp = stream.read(20)
-        xref_loc = tmp.find(b"xref")
-        if xref_loc != -1:
-            startxref -= 10 - xref_loc
-            return startxref
-        # No explicit xref table, try finding a cross-reference stream.
-        stream.seek(startxref, 0)
-        for look in range(25):  # value extended to cope with more linearized files
-            if stream.read(1).isdigit():
-                # This is not a standard PDF, consider adding a warning
-                startxref += look
-                return startxref
-        # no xref table found at specified location
-        if "/Root" in self.trailer and not self.strict:
-            # if Root has been already found, just raise warning
-            logger_warning("Invalid parent xref., rebuild xref", __name__)
-            try:
-                self._rebuild_xref_table(stream)
-                return None
-            except Exception:
-                raise PdfReadError("can not rebuild xref")
-        raise PdfReadError("Could not find xref table at specified location")
-
-    def _read_pdf15_xref_stream(
-        self, stream: StreamType
-    ) -> Union[ContentStream, EncodedStreamObject, DecodedStreamObject]:
-        # PDF 1.5+ Cross-Reference Stream
-        stream.seek(-1, 1)
-        idnum, generation = self.read_object_header(stream)
-        xrefstream = cast(ContentStream, read_object(stream, self))
-        assert cast(str, xrefstream["/Type"]) == "/XRef"
-        self.cache_indirect_object(generation, idnum, xrefstream)
-        stream_data = BytesIO(b_(xrefstream.get_data()))
-        # Index pairs specify the subsections in the dictionary. If
-        # none create one subsection that spans everything.
-        idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
-        entry_sizes = cast(Dict[Any, Any], xrefstream.get("/W"))
-        assert len(entry_sizes) >= 3
-        if self.strict and len(entry_sizes) > 3:
-            raise PdfReadError(f"Too many entry sizes: {entry_sizes}")
-
-        def get_entry(i: int) -> Union[int, Tuple[int, ...]]:
-            # Reads the correct number of bytes for each entry. See the
-            # discussion of the W parameter in PDF spec table 17.
-            if entry_sizes[i] > 0:
-                d = stream_data.read(entry_sizes[i])
-                return convert_to_int(d, entry_sizes[i])
-
-            # PDF Spec Table 17: A value of zero for an element in the
-            # W array indicates...the default value shall be used
-            if i == 0:
-                return 1  # First value defaults to 1
-            else:
-                return 0
-
-        def used_before(num: int, generation: Union[int, Tuple[int, ...]]) -> bool:
-            # We move backwards through the xrefs, don't replace any.
-            return num in self.xref.get(generation, []) or num in self.xref_objStm  # type: ignore
-
-        # Iterate through each subsection
-        self._read_xref_subsections(idx_pairs, get_entry, used_before)
-        return xrefstream
-
-    @staticmethod
-    def _get_xref_issues(stream: StreamType, startxref: int) -> int:
-        """
-        Return an int which indicates an issue. 0 means there is no issue.
-
-        Args:
-            stream:
-            startxref:
-
-        Returns:
-            0 means no issue, other values represent specific issues.
-        """
-        stream.seek(startxref - 1, 0)  # -1 to check character before
-        line = stream.read(1)
-        if line == b"j":
-            line = stream.read(1)
-        if line not in b"\r\n \t":
-            return 1
-        line = stream.read(4)
-        if line != b"xref":
-            # not an xref so check if it is an XREF object
-            line = b""
-            while line in b"0123456789 \t":
-                line = stream.read(1)
-                if line == b"":
-                    return 2
-            line += stream.read(2)  # 1 char already read, +2 to check "obj"
-            if line.lower() != b"obj":
-                return 3
-        return 0
-
-    def _rebuild_xref_table(self, stream: StreamType) -> None:
-        self.xref = {}
-        stream.seek(0, 0)
-        f_ = stream.read(-1)
-
-        for m in re.finditer(rb"[\r\n \t][ \t]*(\d+)[ \t]+(\d+)[ \t]+obj", f_):
-            idnum = int(m.group(1))
-            generation = int(m.group(2))
-            if generation not in self.xref:
-                self.xref[generation] = {}
-            self.xref[generation][idnum] = m.start(1)
-        stream.seek(0, 0)
-        for m in re.finditer(rb"[\r\n \t][ \t]*trailer[\r\n \t]*(<<)", f_):
-            stream.seek(m.start(1), 0)
-            new_trailer = cast(Dict[Any, Any], read_object(stream, self))
-            # Here, we are parsing the file from start to end, the new data have to erase the existing.
-            for key, value in list(new_trailer.items()):
-                self.trailer[key] = value
-
-    def _read_xref_subsections(
-        self,
-        idx_pairs: List[int],
-        get_entry: Callable[[int], Union[int, Tuple[int, ...]]],
-        used_before: Callable[[int, Union[int, Tuple[int, ...]]], bool],
-    ) -> None:
-        for start, size in self._pairs(idx_pairs):
-            # The subsections must increase
-            for num in range(start, start + size):
-                # The first entry is the type
-                xref_type = get_entry(0)
-                # The rest of the elements depend on the xref_type
-                if xref_type == 0:
-                    # linked list of free objects
-                    next_free_object = get_entry(1)  # noqa: F841
-                    next_generation = get_entry(2)  # noqa: F841
-                elif xref_type == 1:
-                    # objects that are in use but are not compressed
-                    byte_offset = get_entry(1)
-                    generation = get_entry(2)
-                    if generation not in self.xref:
-                        self.xref[generation] = {}  # type: ignore
-                    if not used_before(num, generation):
-                        self.xref[generation][num] = byte_offset  # type: ignore
-                elif xref_type == 2:
-                    # compressed objects
-                    objstr_num = get_entry(1)
-                    obstr_idx = get_entry(2)
-                    generation = 0  # PDF spec table 18, generation is 0
-                    if not used_before(num, generation):
-                        self.xref_objStm[num] = (objstr_num, obstr_idx)
-                elif self.strict:
-                    raise PdfReadError(f"Unknown xref type: {xref_type}")
-
-    def _pairs(self, array: List[int]) -> Iterable[Tuple[int, int]]:
-        i = 0
-        while True:
-            yield array[i], array[i + 1]
-            i += 2
-            if (i + 1) >= len(array):
-                break
-
-    def read_next_end_line(
-        self, stream: StreamType, limit_offset: int = 0
-    ) -> bytes:  # deprecated
-        """.. deprecated:: 2.1.0"""
-        deprecate_no_replacement("read_next_end_line", removed_in="4.0.0")
-        line_parts = []
-        while True:
-            # Prevent infinite loops in malformed PDFs
-            if stream.tell() == 0 or stream.tell() == limit_offset:
-                raise PdfReadError("Could not read malformed PDF file")
-            x = stream.read(1)
-            if stream.tell() < 2:
-                raise PdfReadError("EOL marker not found")
-            stream.seek(-2, 1)
-            if x in (b"\n", b"\r"):  # \n = LF; \r = CR
-                crlf = False
-                while x in (b"\n", b"\r"):
-                    x = stream.read(1)
-                    if x in (b"\n", b"\r"):  # account for CR+LF
-                        stream.seek(-1, 1)
-                        crlf = True
-                    if stream.tell() < 2:
-                        raise PdfReadError("EOL marker not found")
-                    stream.seek(-2, 1)
-                stream.seek(
-                    2 if crlf else 1, 1
-                )  # if using CR+LF, go back 2 bytes, else 1
-                break
-            else:
-                line_parts.append(x)
-        line_parts.reverse()
-        return b"".join(line_parts)
-
-    def readNextEndLine(
-        self, stream: StreamType, limit_offset: int = 0
-    ) -> bytes:  # deprecated
-        """.. deprecated:: 1.28.0"""
-        deprecation_no_replacement("readNextEndLine", "3.0.0")
-        return self.read_next_end_line(stream, limit_offset)
-
-    def decrypt(self, password: Union[str, bytes]) -> PasswordType:
-        """
-        When using an encrypted / secured PDF file with the PDF Standard
-        encryption handler, this function will allow the file to be decrypted.
-        It checks the given password against the document's user password and
-        owner password, and then stores the resulting decryption key if either
-        password is correct.
-
-        It does not matter which password was matched.  Both passwords provide
-        the correct decryption key that will allow the document to be used with
-        this library.
-
-        Args:
-            password: The password to match.
-
-        Returns:
-            An indicator if the document was decrypted and weather it was the
-            owner password or the user password.
-        """
-        if not self._encryption:
-            raise PdfReadError("Not encrypted file")
-        # TODO: raise Exception for wrong password
-        return self._encryption.verify(password)
-
-    def decode_permissions(self, permissions_code: int) -> Dict[str, bool]:
-        # Takes the permissions as an integer, returns the allowed access
-        permissions = {}
-        permissions["print"] = permissions_code & (1 << 3 - 1) != 0  # bit 3
-        permissions["modify"] = permissions_code & (1 << 4 - 1) != 0  # bit 4
-        permissions["copy"] = permissions_code & (1 << 5 - 1) != 0  # bit 5
-        permissions["annotations"] = permissions_code & (1 << 6 - 1) != 0  # bit 6
-        permissions["forms"] = permissions_code & (1 << 9 - 1) != 0  # bit 9
-        permissions["accessability"] = permissions_code & (1 << 10 - 1) != 0  # bit 10
-        permissions["assemble"] = permissions_code & (1 << 11 - 1) != 0  # bit 11
-        permissions["print_high_quality"] = (
-            permissions_code & (1 << 12 - 1) != 0
-        )  # bit 12
-        return permissions
-
-    @property
-    def is_encrypted(self) -> bool:
-        """
-        Read-only boolean property showing whether this PDF file is encrypted.
-
-        Note that this property, if true, will remain true even after the
-        :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called.
-        """
-        return TK.ENCRYPT in self.trailer
-
-    def getIsEncrypted(self) -> bool:  # deprecated
-        """
-        Use :py:attr:`is_encrypted` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("getIsEncrypted", "is_encrypted", "3.0.0")
-        return self.is_encrypted
-
-    @property
-    def isEncrypted(self) -> bool:  # deprecated
-        """
-        Use :py:attr:`is_encrypted` instead.
-
-        .. deprecated:: 1.28.0
-        """
-        deprecation_with_replacement("isEncrypted", "is_encrypted", "3.0.0")
-        return self.is_encrypted
-
-    @property
-    def xfa(self) -> Optional[Dict[str, Any]]:
-        tree: Optional[TreeObject] = None
-        retval: Dict[str, Any] = {}
-        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-
-        if "/AcroForm" not in catalog or not catalog["/AcroForm"]:
-            return None
-
-        tree = cast(TreeObject, catalog["/AcroForm"])
-
-        if "/XFA" in tree:
-            fields = cast(ArrayObject, tree["/XFA"])
-            i = iter(fields)
-            for f in i:
-                tag = f
-                f = next(i)
-                if isinstance(f, IndirectObject):
-                    field = cast(Optional[EncodedStreamObject], f.get_object())
-                    if field:
-                        es = zlib.decompress(b_(field._data))
-                        retval[tag] = es
-        return retval
-
-    def add_form_topname(self, name: str) -> Optional[DictionaryObject]:
-        """
-        Add a top level form that groups all form fields below it.
-
-        Args:
-            name: text string of the "/T" Attribute of the created object
-
-        Returns:
-            The created object. ``None`` means no object was created.
-        """
-        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-
-        if "/AcroForm" not in catalog or not isinstance(
-            catalog["/AcroForm"], DictionaryObject
-        ):
-            return None
-        acroform = cast(DictionaryObject, catalog[NameObject("/AcroForm")])
-        if "/Fields" not in acroform:
-            # TODO: :No error returns but may be extended for XFA Forms
-            return None
-
-        interim = DictionaryObject()
-        interim[NameObject("/T")] = TextStringObject(name)
-        interim[NameObject("/Kids")] = acroform[NameObject("/Fields")]
-        self.cache_indirect_object(
-            0,
-            max([i for (g, i) in self.resolved_objects if g == 0]) + 1,
-            interim,
-        )
-        arr = ArrayObject()
-        arr.append(interim.indirect_reference)
-        acroform[NameObject("/Fields")] = arr
-        for o in cast(ArrayObject, interim["/Kids"]):
-            obj = o.get_object()
-            if "/Parent" in obj:
-                logger_warning(
-                    f"Top Level Form Field {obj.indirect_reference} have a non-expected parent",
-                    __name__,
-                )
-            obj[NameObject("/Parent")] = interim.indirect_reference
-        return interim
-
-    def rename_form_topname(self, name: str) -> Optional[DictionaryObject]:
-        """
-        Rename top level form field that all form fields below it.
-
-        Args:
-            name: text string of the "/T" field of the created object
-
-        Returns:
-            The modified object. ``None`` means no object was modified.
-        """
-        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
-
-        if "/AcroForm" not in catalog or not isinstance(
-            catalog["/AcroForm"], DictionaryObject
-        ):
-            return None
-        acroform = cast(DictionaryObject, catalog[NameObject("/AcroForm")])
-        if "/Fields" not in acroform:
-            return None
-
-        interim = cast(
-            DictionaryObject,
-            cast(ArrayObject, acroform[NameObject("/Fields")])[0].get_object(),
-        )
-        interim[NameObject("/T")] = TextStringObject(name)
-        return interim
-
-    def _get_embedded_files_root(self) -> Optional[NameTree]:
-        """
-        Returns the EmbeddedFiles root as a NameTree Object
-        if the root does not exists, return None
-        """
-        catalog = cast(DictionaryObject, self.trailer["/Root"])
-        if "/Names" not in catalog:
-            return None
-        ef = cast(DictionaryObject, catalog["/Names"]).get("/EmbeddedFiles", None)
-        if ef is None:
-            return None
-        efo = ef.get_object()
-        # not for reader
-        """
-            if not isinstance(efo,NameTree):
-            if isinstance(ef,IndirectObject):
-                ef.replace_object(efo)
-            else:
-                cast(DictionaryObject,catalog["/Names"])[
-                    NameObject("/EmbeddedFiles")] = NameTree(efo)
-        """
-        return NameTree(efo)
-
-    @property
-    def embedded_files(self) -> Optional[Mapping[str, List[PdfObject]]]:
-        ef = self._get_embedded_files_root()
-        if ef:
-            return ef.list_items()
-        else:
-            return None
-
-    @property
-    def attachments(self) -> Mapping[str, Union[List[bytes], List[Dict[str, bytes]]]]:
-        ef = self._get_embedded_files_root()
-        if ef:
-            d: Dict[str, Union[List[bytes], List[Dict[str, bytes]]]] = {}
-            for k, v in ef.list_items().items():
-                if isinstance(v, list):
-                    if k not in d:
-                        d[k] = []  # type: ignore
-                    for e in v:
-                        e = cast(DictionaryObject, e.get_object())
-                        if "/EF" in e:
-                            d[k].append(e["/EF"]["/F"].get_data())  # type: ignore
-                        elif "/RF" in e:
-                            r = cast(
-                                ArrayObject, cast(DictionaryObject, e["/RF"])["/F"]
-                            )
-                            di: Dict[str, bytes] = {}
-                            i = 0
-                            while i < len(r):
-                                di[cast(str, r[i])] = r[i + 1].get_object().get_data()
-                                i += 2
-                            d[k].append(di)
-            return d
-        else:
-            return {}
-
-    def _list_attachments(self) -> List[str]:
-        """
-        Retrieves the list of filenames of file attachments.
-
-        Returns:
-            list of filenames
-        """
-        ef = self._get_embedded_files_root()
-        if ef:
-            lst = ef.list_keys()
-        else:
-            lst = []
-        """
-        for ip, p in enumerate(self.pages):
-            for a in [_a.get_object()
-                      for _a in p.get("/Annots",[])]:
-                if _a.get_object().get("/Subtype","") != "/FileAttachements":
-                    continue
-                lst.append(f"$page_{ip}.{get_name_from_file_specification(_a)}")
-        """
-        return lst
-
-    def _get_attachment_list(self, name: str) -> List[bytes]:
-        out = self._get_attachments(name)[name]
-        if isinstance(out, list):
-            return out
-        return [out]
-
-    def _get_attachments(
-        self, filename: Optional[str] = None
-    ) -> Dict[str, Union[bytes, List[bytes], Dict[str, bytes]]]:
-        """
-        Retrieves all or selected file attachments of the PDF as a dictionary of file names
-        and the file data as a bytestring.
-
-        Args:
-            filename: If filename is None, then a dictionary of all attachments
-                will be returned, where the key is the filename and the value
-                is the content. Otherwise, a dictionary with just a single key
-                - the filename - and its content will be returned.
-
-        Returns:
-            dictionary of filename -> Union[bytestring or List[ByteString]]
-            if the filename exists multiple times a List of the different version will be provided
-        """
-        ef = self._get_embedded_files_root()
-        if ef is None:
-            return {}
-        if filename is None:
-            return {k: v if len(v) > 1 else v[0] for k, v in self.attachments.items()}  # type: ignore
-        else:
-            lst = ef.list_get(filename)
-            return {
-                filename: [x["/EF"]["/F"].get_data() for x in lst]  # type: ignore
-                if isinstance(lst, list)
-                else lst["/EF"]["/F"].get_data()  # type: ignore
-            }
-
-
-class PdfFileReader(PdfReader):  # deprecated
-    def __init__(self, *args: Any, **kwargs: Any) -> None:
-        deprecation_with_replacement("PdfFileReader", "PdfReader", "3.0.0")
-        if "strict" not in kwargs and len(args) < 2:
-            kwargs["strict"] = True  # maintain the default
-        super().__init__(*args, **kwargs)
+# Copyright (c) 2006, Mathieu Fenniak
+# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import re
+import struct
+import zlib
+from datetime import datetime
+from io import BytesIO, UnsupportedOperation
+from pathlib import Path
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+    Union,
+    cast,
+)
+
+from ._encryption import Encryption, PasswordType
+from ._page import PageObject, _VirtualList
+from ._page_labels import index2label as page_index2page_label
+from ._utils import (
+    StrByteType,
+    StreamType,
+    b_,
+    deprecate_no_replacement,
+    deprecation_no_replacement,
+    deprecation_with_replacement,
+    logger_warning,
+    parse_iso8824_date,
+    read_non_whitespace,
+    read_previous_line,
+    read_until_whitespace,
+    skip_over_comment,
+    skip_over_whitespace,
+)
+from .constants import CatalogAttributes as CA
+from .constants import CatalogDictionary as CD
+from .constants import (
+    CheckboxRadioButtonAttributes,
+    GoToActionArguments,
+)
+from .constants import Core as CO
+from .constants import DocumentInformationAttributes as DI
+from .constants import FieldDictionaryAttributes as FA
+from .constants import PageAttributes as PG
+from .constants import PagesAttributes as PA
+from .constants import TrailerKeys as TK
+from .errors import (
+    EmptyFileError,
+    FileNotDecryptedError,
+    PdfReadError,
+    PdfStreamError,
+    WrongPasswordError,
+)
+from .generic import (
+    ArrayObject,
+    BooleanObject,
+    ContentStream,
+    DecodedStreamObject,
+    Destination,
+    DictionaryObject,
+    EncodedStreamObject,
+    Field,
+    Fit,
+    FloatObject,
+    IndirectObject,
+    NameObject,
+    NameTree,
+    NullObject,
+    NumberObject,
+    PdfObject,
+    TextStringObject,
+    TreeObject,
+    ViewerPreferences,
+    read_object,
+)
+from .types import OutlineType, PagemodeType
+from .xmp import XmpInformation
+
+
+def convert_to_int(d: bytes, size: int) -> Union[int, Tuple[Any, ...]]:
+    if size > 8:
+        raise PdfReadError("invalid size in convert_to_int")
+    d = b"\x00\x00\x00\x00\x00\x00\x00\x00" + d
+    d = d[-8:]
+    return struct.unpack(">q", d)[0]
+
+
+def convertToInt(d: bytes, size: int) -> Union[int, Tuple[Any, ...]]:  # deprecated
+    deprecation_with_replacement("convertToInt", "convert_to_int")
+    return convert_to_int(d, size)
+
+
+class DocumentInformation(DictionaryObject):
+    """
+    A class representing the basic document metadata provided in a PDF File.
+    This class is accessible through
+    :py:class:`PdfReader.metadata<pypdf.PdfReader.metadata>`.
+
+    All text properties of the document metadata have
+    *two* properties, eg. author and author_raw. The non-raw property will
+    always return a ``TextStringObject``, making it ideal for a case where
+    the metadata is being displayed. The raw property can sometimes return
+    a ``ByteStringObject``, if pypdf was unable to decode the string's
+    text encoding; this requires additional safety in the caller and
+    therefore is not as commonly accessed.
+    """
+
+    def __init__(self) -> None:
+        DictionaryObject.__init__(self)
+
+    def _get_text(self, key: str) -> Optional[str]:
+        retval = self.get(key, None)
+        if isinstance(retval, TextStringObject):
+            return retval
+        return None
+
+    def getText(self, key: str) -> Optional[str]:  # deprecated
+        """
+        Use the attributes (e.g. :py:attr:`title` / :py:attr:`author`).
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_no_replacement("getText", "3.0.0")
+        return self._get_text(key)
+
+    @property
+    def title(self) -> Optional[str]:
+        """
+        Read-only property accessing the document's title.
+
+        Returns a ``TextStringObject`` or ``None`` if the title is not
+        specified.
+        """
+        return (
+            self._get_text(DI.TITLE) or self.get(DI.TITLE).get_object()  # type: ignore
+            if self.get(DI.TITLE)
+            else None
+        )
+
+    @property
+    def title_raw(self) -> Optional[str]:
+        """The "raw" version of title; can return a ``ByteStringObject``."""
+        return self.get(DI.TITLE)
+
+    @property
+    def author(self) -> Optional[str]:
+        """
+        Read-only property accessing the document's author.
+
+        Returns a ``TextStringObject`` or ``None`` if the author is not
+        specified.
+        """
+        return self._get_text(DI.AUTHOR)
+
+    @property
+    def author_raw(self) -> Optional[str]:
+        """The "raw" version of author; can return a ``ByteStringObject``."""
+        return self.get(DI.AUTHOR)
+
+    @property
+    def subject(self) -> Optional[str]:
+        """
+        Read-only property accessing the document's subject.
+
+        Returns a ``TextStringObject`` or ``None`` if the subject is not
+        specified.
+        """
+        return self._get_text(DI.SUBJECT)
+
+    @property
+    def subject_raw(self) -> Optional[str]:
+        """The "raw" version of subject; can return a ``ByteStringObject``."""
+        return self.get(DI.SUBJECT)
+
+    @property
+    def creator(self) -> Optional[str]:
+        """
+        Read-only property accessing the document's creator.
+
+        If the document was converted to PDF from another format, this is the
+        name of the application (e.g. OpenOffice) that created the original
+        document from which it was converted. Returns a ``TextStringObject`` or
+        ``None`` if the creator is not specified.
+        """
+        return self._get_text(DI.CREATOR)
+
+    @property
+    def creator_raw(self) -> Optional[str]:
+        """The "raw" version of creator; can return a ``ByteStringObject``."""
+        return self.get(DI.CREATOR)
+
+    @property
+    def producer(self) -> Optional[str]:
+        """
+        Read-only property accessing the document's producer.
+
+        If the document was converted to PDF from another format, this is the
+        name of the application (for example, OSX Quartz) that converted it to
+        PDF. Returns a ``TextStringObject`` or ``None`` if the producer is not
+        specified.
+        """
+        return self._get_text(DI.PRODUCER)
+
+    @property
+    def producer_raw(self) -> Optional[str]:
+        """The "raw" version of producer; can return a ``ByteStringObject``."""
+        return self.get(DI.PRODUCER)
+
+    @property
+    def creation_date(self) -> Optional[datetime]:
+        """Read-only property accessing the document's creation date."""
+        return parse_iso8824_date(self._get_text(DI.CREATION_DATE))
+
+    @property
+    def creation_date_raw(self) -> Optional[str]:
+        """
+        The "raw" version of creation date; can return a ``ByteStringObject``.
+
+        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
+        is the offset from UTC.
+        """
+        return self.get(DI.CREATION_DATE)
+
+    @property
+    def modification_date(self) -> Optional[datetime]:
+        """
+        Read-only property accessing the document's modification date.
+
+        The date and time the document was most recently modified.
+        """
+        return parse_iso8824_date(self._get_text(DI.MOD_DATE))
+
+    @property
+    def modification_date_raw(self) -> Optional[str]:
+        """
+        The "raw" version of modification date; can return a
+        ``ByteStringObject``.
+
+        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
+        is the offset from UTC.
+        """
+        return self.get(DI.MOD_DATE)
+
+
+class PdfReader:
+    """
+    Initialize a PdfReader object.
+
+    This operation can take some time, as the PDF stream's cross-reference
+    tables are read into memory.
+
+    Args:
+        stream: A File object or an object that supports the standard read
+            and seek methods similar to a File object. Could also be a
+            string representing a path to a PDF file.
+        strict: Determines whether user should be warned of all
+            problems and also causes some correctable problems to be fatal.
+            Defaults to ``False``.
+        password: Decrypt PDF file at initialization. If the
+            password is None, the file will not be decrypted.
+            Defaults to ``None``
+    """
+
+    @property
+    def viewer_preferences(self) -> Optional[ViewerPreferences]:
+        """Returns the existing ViewerPreferences as an overloaded dictionary."""
+        o = cast(DictionaryObject, self.trailer["/Root"]).get(
+            CD.VIEWER_PREFERENCES, None
+        )
+        if o is None:
+            return None
+        o = o.get_object()
+        if not isinstance(o, ViewerPreferences):
+            o = ViewerPreferences(o)
+        return o
+
+    def __init__(
+        self,
+        stream: Union[StrByteType, Path],
+        strict: bool = False,
+        password: Union[None, str, bytes] = None,
+    ) -> None:
+        self.strict = strict
+        self.flattened_pages: Optional[List[PageObject]] = None
+        self.resolved_objects: Dict[Tuple[Any, Any], Optional[PdfObject]] = {}
+        self.xref_index = 0
+        self._page_id2num: Optional[
+            Dict[Any, Any]
+        ] = None  # map page indirect_reference number to Page Number
+        if hasattr(stream, "mode") and "b" not in stream.mode:  # type: ignore
+            logger_warning(
+                "PdfReader stream/file object is not in binary mode. "
+                "It may not be read correctly.",
+                __name__,
+            )
+        if isinstance(stream, (str, Path)):
+            with open(stream, "rb") as fh:
+                stream = BytesIO(fh.read())
+        self.read(stream)
+        self.stream = stream
+
+        self._override_encryption = False
+        self._encryption: Optional[Encryption] = None
+        if self.is_encrypted:
+            self._override_encryption = True
+            # Some documents may not have a /ID, use two empty
+            # byte strings instead. Solves
+            # https://github.com/py-pdf/pypdf/issues/608
+            id_entry = self.trailer.get(TK.ID)
+            id1_entry = id_entry[0].get_object().original_bytes if id_entry else b""
+            encrypt_entry = cast(
+                DictionaryObject, self.trailer[TK.ENCRYPT].get_object()
+            )
+            self._encryption = Encryption.read(encrypt_entry, id1_entry)
+
+            # try empty password if no password provided
+            pwd = password if password is not None else b""
+            if (
+                self._encryption.verify(pwd) == PasswordType.NOT_DECRYPTED
+                and password is not None
+            ):
+                # raise if password provided
+                raise WrongPasswordError("Wrong password")
+            self._override_encryption = False
+        elif password is not None:
+            raise PdfReadError("Not encrypted file")
+
+    @property
+    def pdf_header(self) -> str:
+        """
+        The first 8 bytes of the file.
+
+        This is typically something like ``'%PDF-1.6'`` and can be used to
+        detect if the file is actually a PDF file and which version it is.
+        """
+        # TODO: Make this return a bytes object for consistency
+        #       but that needs a deprecation
+        loc = self.stream.tell()
+        self.stream.seek(0, 0)
+        pdf_file_version = self.stream.read(8).decode("utf-8", "backslashreplace")
+        self.stream.seek(loc, 0)  # return to where it was
+        return pdf_file_version
+
+    @property
+    def metadata(self) -> Optional[DocumentInformation]:
+        """
+        Retrieve the PDF file's document information dictionary, if it exists.
+
+        Note that some PDF files use metadata streams instead of docinfo
+        dictionaries, and these metadata streams will not be accessed by this
+        function.
+        """
+        if TK.INFO not in self.trailer:
+            return None
+        obj = self.trailer[TK.INFO]
+        retval = DocumentInformation()
+        if isinstance(obj, type(None)):
+            raise PdfReadError(
+                "trailer not found or does not point to document information directory"
+            )
+        retval.update(obj)  # type: ignore
+        return retval
+
+    def getDocumentInfo(self) -> Optional[DocumentInformation]:  # deprecated
+        """
+        Use the attribute :py:attr:`metadata` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getDocumentInfo", "metadata", "3.0.0")
+        return self.metadata
+
+    @property
+    def documentInfo(self) -> Optional[DocumentInformation]:  # deprecated
+        """
+        Use the attribute :py:attr:`metadata` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("documentInfo", "metadata", "3.0.0")
+        return self.metadata
+
+    @property
+    def xmp_metadata(self) -> Optional[XmpInformation]:
+        """XMP (Extensible Metadata Platform) data."""
+        try:
+            self._override_encryption = True
+            return self.trailer[TK.ROOT].xmp_metadata  # type: ignore
+        finally:
+            self._override_encryption = False
+
+    def getXmpMetadata(self) -> Optional[XmpInformation]:  # deprecated
+        """
+        Use the attribute :py:attr:`metadata` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getXmpMetadata", "xmp_metadata", "3.0.0")
+        return self.xmp_metadata
+
+    @property
+    def xmpMetadata(self) -> Optional[XmpInformation]:  # deprecated
+        """
+        Use the attribute :py:attr:`xmp_metadata` instead.
+
+        .. deprecated:: 1.28.0.
+        """
+        deprecation_with_replacement("xmpMetadata", "xmp_metadata", "3.0.0")
+        return self.xmp_metadata
+
+    def _get_num_pages(self) -> int:
+        """
+        Calculate the number of pages in this PDF file.
+
+        Returns:
+            The number of pages of the parsed PDF file
+
+        Raises:
+            PdfReadError: if file is encrypted and restrictions prevent
+                this action.
+        """
+        # Flattened pages will not work on an Encrypted PDF;
+        # the PDF file's page count is used in this case. Otherwise,
+        # the original method (flattened page count) is used.
+        if self.is_encrypted:
+            return self.trailer[TK.ROOT]["/Pages"]["/Count"]  # type: ignore
+        else:
+            if self.flattened_pages is None:
+                self._flatten()
+            return len(self.flattened_pages)  # type: ignore
+
+    def getNumPages(self) -> int:  # deprecated
+        """
+        Use :code:`len(reader.pages)` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("reader.getNumPages", "len(reader.pages)", "3.0.0")
+        return self._get_num_pages()
+
+    @property
+    def numPages(self) -> int:  # deprecated
+        """
+        Use :code:`len(reader.pages)` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("reader.numPages", "len(reader.pages)", "3.0.0")
+        return self._get_num_pages()
+
+    def getPage(self, pageNumber: int) -> PageObject:  # deprecated
+        """
+        Use :code:`reader.pages[page_number]` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement(
+            "reader.getPage(pageNumber)", "reader.pages[page_number]", "3.0.0"
+        )
+        return self._get_page(pageNumber)
+
+    def _get_page(self, page_number: int) -> PageObject:
+        """
+        Retrieve a page by number from this PDF file.
+
+        Args:
+            page_number: The page number to retrieve
+                (pages begin at zero)
+
+        Returns:
+            A :class:`PageObject<pypdf._page.PageObject>` instance.
+        """
+        if self.flattened_pages is None:
+            self._flatten()
+        assert self.flattened_pages is not None, "hint for mypy"
+        return self.flattened_pages[page_number]
+
+    @property
+    def namedDestinations(self) -> Dict[str, Any]:  # deprecated
+        """
+        Use :py:attr:`named_destinations` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("namedDestinations", "named_destinations", "3.0.0")
+        return self.named_destinations
+
+    @property
+    def named_destinations(self) -> Dict[str, Any]:
+        """
+        A read-only dictionary which maps names to
+        :class:`Destinations<pypdf.generic.Destination>`
+        """
+        return self._get_named_destinations()
+
+    # A select group of relevant field attributes. For the complete list,
+    # see section 8.6.2 of the PDF 1.7 reference.
+
+    def get_fields(
+        self,
+        tree: Optional[TreeObject] = None,
+        retval: Optional[Dict[Any, Any]] = None,
+        fileobj: Optional[Any] = None,
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Extract field data if this PDF contains interactive form fields.
+
+        The *tree* and *retval* parameters are for recursive use.
+
+        Args:
+            tree:
+            retval:
+            fileobj: A file object (usually a text file) to write
+                a report to on all interactive form fields found.
+
+        Returns:
+            A dictionary where each key is a field name, and each
+            value is a :class:`Field<pypdf.generic.Field>` object. By
+            default, the mapping name is used for keys.
+            ``None`` if form data could not be located.
+        """
+        field_attributes = FA.attributes_dict()
+        field_attributes.update(CheckboxRadioButtonAttributes.attributes_dict())
+        if retval is None:
+            retval = {}
+            catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+            # get the AcroForm tree
+            if CD.ACRO_FORM in catalog:
+                tree = cast(Optional[TreeObject], catalog[CD.ACRO_FORM])
+            else:
+                return None
+        if tree is None:
+            return retval
+        self._check_kids(tree, retval, fileobj)
+        for attr in field_attributes:
+            if attr in tree:
+                # Tree is a field
+                self._build_field(tree, retval, fileobj, field_attributes)
+                break
+
+        if "/Fields" in tree:
+            fields = cast(ArrayObject, tree["/Fields"])
+            for f in fields:
+                field = f.get_object()
+                self._build_field(field, retval, fileobj, field_attributes)
+
+        return retval
+
+    def getFields(
+        self,
+        tree: Optional[TreeObject] = None,
+        retval: Optional[Dict[Any, Any]] = None,
+        fileobj: Optional[Any] = None,
+    ) -> Optional[Dict[str, Any]]:  # deprecated
+        """
+        Use :meth:`get_fields` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getFields", "get_fields", "3.0.0")
+        return self.get_fields(tree, retval, fileobj)
+
+    def _get_qualified_field_name(self, parent: DictionaryObject) -> str:
+        if "/TM" in parent:
+            return cast(str, parent["/TM"])
+        elif "/Parent" in parent:
+            return (
+                self._get_qualified_field_name(
+                    cast(DictionaryObject, parent["/Parent"])
+                )
+                + "."
+                + cast(str, parent["/T"])
+            )
+        else:
+            return cast(str, parent["/T"])
+
+    def _build_field(
+        self,
+        field: Union[TreeObject, DictionaryObject],
+        retval: Dict[Any, Any],
+        fileobj: Any,
+        field_attributes: Any,
+    ) -> None:
+        self._check_kids(field, retval, fileobj)
+        try:
+            key = cast(str, field["/TM"])
+        except KeyError:
+            try:
+                if "/Parent" in field:
+                    key = (
+                        self._get_qualified_field_name(
+                            cast(DictionaryObject, field["/Parent"])
+                        )
+                        + "."
+                    )
+                else:
+                    key = ""
+                key += cast(str, field["/T"])
+            except KeyError:
+                # Ignore no-name field for now
+                return
+        if fileobj:
+            self._write_field(fileobj, field, field_attributes)
+            fileobj.write("\n")
+        retval[key] = Field(field)
+        obj = retval[key].indirect_reference.get_object()  # to get the full object
+        if obj.get(FA.FT, "") == "/Ch":
+            retval[key][NameObject("/_States_")] = obj[NameObject(FA.Opt)]
+        if obj.get(FA.FT, "") == "/Btn" and "/AP" in obj:
+            #  Checkbox
+            retval[key][NameObject("/_States_")] = ArrayObject(
+                list(obj["/AP"]["/N"].keys())
+            )
+            if "/Off" not in retval[key]["/_States_"]:
+                retval[key][NameObject("/_States_")].append(NameObject("/Off"))
+        elif obj.get(FA.FT, "") == "/Btn" and obj.get(FA.Ff, 0) & FA.FfBits.Radio != 0:
+            states = []
+            for k in obj.get(FA.Kids, {}):
+                k = k.get_object()
+                for s in list(k["/AP"]["/N"].keys()):
+                    if s not in states:
+                        states.append(s)
+                retval[key][NameObject("/_States_")] = ArrayObject(states)
+            if (
+                obj.get(FA.Ff, 0) & FA.FfBits.NoToggleToOff != 0
+                and "/Off" in retval[key]["/_States_"]
+            ):
+                del retval[key]["/_States_"][retval[key]["/_States_"].index("/Off")]
+
+    def _check_kids(
+        self, tree: Union[TreeObject, DictionaryObject], retval: Any, fileobj: Any
+    ) -> None:
+        if PA.KIDS in tree:
+            # recurse down the tree
+            for kid in tree[PA.KIDS]:  # type: ignore
+                self.get_fields(kid.get_object(), retval, fileobj)
+
+    def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None:
+        field_attributes_tuple = FA.attributes()
+        field_attributes_tuple = (
+            field_attributes_tuple + CheckboxRadioButtonAttributes.attributes()
+        )
+
+        for attr in field_attributes_tuple:
+            if attr in (
+                FA.Kids,
+                FA.AA,
+            ):
+                continue
+            attr_name = field_attributes[attr]
+            try:
+                if attr == FA.FT:
+                    # Make the field type value more clear
+                    types = {
+                        "/Btn": "Button",
+                        "/Tx": "Text",
+                        "/Ch": "Choice",
+                        "/Sig": "Signature",
+                    }
+                    if field[attr] in types:
+                        fileobj.write(f"{attr_name}: {types[field[attr]]}\n")
+                elif attr == FA.Parent:
+                    # Let's just write the name of the parent
+                    try:
+                        name = field[attr][FA.TM]
+                    except KeyError:
+                        name = field[attr][FA.T]
+                    fileobj.write(f"{attr_name}: {name}\n")
+                else:
+                    fileobj.write(f"{attr_name}: {field[attr]}\n")
+            except KeyError:
+                # Field attribute is N/A or unknown, so don't write anything
+                pass
+
+    def get_form_text_fields(self, full_qualified_name: bool = False) -> Dict[str, Any]:
+        """
+        Retrieve form fields from the document with textual data.
+
+        Args:
+            full_qualified_name: to get full name
+
+        Returns:
+            A dictionary. The key is the name of the form field,
+            the value is the content of the field.
+
+            If the document contains multiple form fields with the same name, the
+            second and following will get the suffix .2, .3, ...
+        """
+
+        def indexed_key(k: str, fields: dict) -> str:
+            if k not in fields:
+                return k
+            else:
+                return (
+                    k
+                    + "."
+                    + str(sum([1 for kk in fields if kk.startswith(k + ".")]) + 2)
+                )
+
+        # Retrieve document form fields
+        formfields = self.get_fields()
+        if formfields is None:
+            return {}
+        ff = {}
+        for field, value in formfields.items():
+            if value.get("/FT") == "/Tx":
+                if full_qualified_name:
+                    ff[field] = value.get("/V")
+                else:
+                    ff[indexed_key(cast(str, value["/T"]), ff)] = value.get("/V")
+        return ff
+
+    def getFormTextFields(self) -> Dict[str, Any]:  # deprecated
+        """
+        Use :meth:`get_form_text_fields` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement(
+            "getFormTextFields", "get_form_text_fields", "3.0.0"
+        )
+        return self.get_form_text_fields()
+
+    def _get_named_destinations(
+        self,
+        tree: Union[TreeObject, None] = None,
+        retval: Optional[Any] = None,
+    ) -> Dict[str, Any]:
+        """
+        Retrieve the named destinations present in the document.
+
+        Args:
+            tree:
+            retval:
+
+        Returns:
+            A dictionary which maps names to
+            :class:`Destinations<pypdf.generic.Destination>`.
+        """
+        if retval is None:
+            retval = {}
+            catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+
+            # get the name tree
+            if CA.DESTS in catalog:
+                tree = cast(TreeObject, catalog[CA.DESTS])
+            elif CA.NAMES in catalog:
+                names = cast(DictionaryObject, catalog[CA.NAMES])
+                if CA.DESTS in names:
+                    tree = cast(TreeObject, names[CA.DESTS])
+
+        if tree is None:
+            return retval
+
+        if PA.KIDS in tree:
+            # recurse down the tree
+            for kid in cast(ArrayObject, tree[PA.KIDS]):
+                self._get_named_destinations(kid.get_object(), retval)
+        # TABLE 3.33 Entries in a name tree node dictionary (PDF 1.7 specs)
+        elif CA.NAMES in tree:  # KIDS and NAMES are exclusives (PDF 1.7 specs p 162)
+            names = cast(DictionaryObject, tree[CA.NAMES])
+            i = 0
+            while i < len(names):
+                key = cast(str, names[i].get_object())
+                i += 1
+                if not isinstance(key, str):
+                    continue
+                try:
+                    value = names[i].get_object()
+                except IndexError:
+                    break
+                i += 1
+                if isinstance(value, DictionaryObject) and "/D" in value:
+                    value = value["/D"]
+                dest = self._build_destination(key, value)  # type: ignore
+                if dest is not None:
+                    retval[key] = dest
+        else:  # case where Dests is in root catalog (PDF 1.7 specs, §2 about PDF1.1
+            for k__, v__ in tree.items():
+                val = v__.get_object()
+                if isinstance(val, DictionaryObject):
+                    val = val["/D"].get_object()
+                dest = self._build_destination(k__, val)
+                if dest is not None:
+                    retval[k__] = dest
+        return retval
+
+    def getNamedDestinations(
+        self,
+        tree: Union[TreeObject, None] = None,
+        retval: Optional[Any] = None,
+    ) -> Dict[str, Any]:  # deprecated
+        """
+        Use :py:attr:`named_destinations` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement(
+            "getNamedDestinations", "named_destinations", "3.0.0"
+        )
+        return self._get_named_destinations(tree, retval)
+
+    @property
+    def outline(self) -> OutlineType:
+        """
+        Read-only property for the outline present in the document.
+
+        (i.e., a collection of 'outline items' which are also known as
+        'bookmarks')
+        """
+        return self._get_outline()
+
+    @property
+    def outlines(self) -> OutlineType:  # deprecated
+        """
+        Use :py:attr:`outline` instead.
+
+        .. deprecated:: 2.9.0
+        """
+        deprecation_with_replacement("outlines", "outline", "3.0.0")
+        return self.outline
+
+    def _get_outline(
+        self, node: Optional[DictionaryObject] = None, outline: Optional[Any] = None
+    ) -> OutlineType:
+        if outline is None:
+            outline = []
+            catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+
+            # get the outline dictionary and named destinations
+            if CO.OUTLINES in catalog:
+                lines = cast(DictionaryObject, catalog[CO.OUTLINES])
+
+                if isinstance(lines, NullObject):
+                    return outline
+
+                # TABLE 8.3 Entries in the outline dictionary
+                if lines is not None and "/First" in lines:
+                    node = cast(DictionaryObject, lines["/First"])
+            self._namedDests = self._get_named_destinations()
+
+        if node is None:
+            return outline
+
+        # see if there are any more outline items
+        while True:
+            outline_obj = self._build_outline_item(node)
+            if outline_obj:
+                outline.append(outline_obj)
+
+            # check for sub-outline
+            if "/First" in node:
+                sub_outline: List[Any] = []
+                self._get_outline(cast(DictionaryObject, node["/First"]), sub_outline)
+                if sub_outline:
+                    outline.append(sub_outline)
+
+            if "/Next" not in node:
+                break
+            node = cast(DictionaryObject, node["/Next"])
+
+        return outline
+
+    def getOutlines(
+        self, node: Optional[DictionaryObject] = None, outline: Optional[Any] = None
+    ) -> OutlineType:  # deprecated
+        """
+        Use :py:attr:`outline` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getOutlines", "outline", "3.0.0")
+        return self._get_outline(node, outline)
+
+    @property
+    def threads(self) -> Optional[ArrayObject]:
+        """
+        Read-only property for the list of threads.
+
+        See §8.3.2 from PDF 1.7 spec.
+
+        It's an array of dictionaries with "/F" and "/I" properties or
+        None if there are no articles.
+        """
+        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+        if CO.THREADS in catalog:
+            return cast("ArrayObject", catalog[CO.THREADS])
+        else:
+            return None
+
+    def _get_page_number_by_indirect(
+        self, indirect_reference: Union[None, int, NullObject, IndirectObject]
+    ) -> int:
+        """
+        Generate _page_id2num.
+
+        Args:
+            indirect_reference:
+
+        Returns:
+            The page number.
+        """
+        if self._page_id2num is None:
+            self._page_id2num = {
+                x.indirect_reference.idnum: i for i, x in enumerate(self.pages)  # type: ignore
+            }
+
+        if indirect_reference is None or isinstance(indirect_reference, NullObject):
+            return -1
+        if isinstance(indirect_reference, int):
+            idnum = indirect_reference
+        else:
+            idnum = indirect_reference.idnum
+        assert self._page_id2num is not None, "hint for mypy"
+        ret = self._page_id2num.get(idnum, -1)
+        return ret
+
+    def get_page_number(self, page: PageObject) -> int:
+        """
+        Retrieve page number of a given PageObject.
+
+        Args:
+            page: The page to get page number. Should be
+                an instance of :class:`PageObject<pypdf._page.PageObject>`
+
+        Returns:
+            The page number or -1 if page is not found
+        """
+        return self._get_page_number_by_indirect(page.indirect_reference)
+
+    def getPageNumber(self, page: PageObject) -> int:  # deprecated
+        """
+        Use :meth:`get_page_number` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getPageNumber", "get_page_number", "3.0.0")
+        return self.get_page_number(page)
+
+    def get_destination_page_number(self, destination: Destination) -> int:
+        """
+        Retrieve page number of a given Destination object.
+
+        Args:
+            destination: The destination to get page number.
+
+        Returns:
+            The page number or -1 if page is not found
+        """
+        return self._get_page_number_by_indirect(destination.page)
+
+    def getDestinationPageNumber(self, destination: Destination) -> int:  # deprecated
+        """
+        Use :meth:`get_destination_page_number` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement(
+            "getDestinationPageNumber", "get_destination_page_number", "3.0.0"
+        )
+        return self.get_destination_page_number(destination)
+
+    def _build_destination(
+        self,
+        title: str,
+        array: Optional[
+            List[
+                Union[NumberObject, IndirectObject, None, NullObject, DictionaryObject]
+            ]
+        ],
+    ) -> Destination:
+        page, typ = None, None
+        # handle outline items with missing or invalid destination
+        if (
+            isinstance(array, (NullObject, str))
+            or (isinstance(array, ArrayObject) and len(array) == 0)
+            or array is None
+        ):
+            page = NullObject()
+            return Destination(title, page, Fit.fit())
+        else:
+            page, typ = array[0:2]  # type: ignore
+            array = array[2:]
+            try:
+                return Destination(title, page, Fit(fit_type=typ, fit_args=array))  # type: ignore
+            except PdfReadError:
+                logger_warning(f"Unknown destination: {title} {array}", __name__)
+                if self.strict:
+                    raise
+                # create a link to first Page
+                tmp = self.pages[0].indirect_reference
+                indirect_reference = NullObject() if tmp is None else tmp
+                return Destination(title, indirect_reference, Fit.fit())  # type: ignore
+
+    def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
+        dest, title, outline_item = None, None, None
+
+        # title required for valid outline
+        # PDF Reference 1.7: TABLE 8.4 Entries in an outline item dictionary
+        try:
+            title = cast("str", node["/Title"])
+        except KeyError:
+            if self.strict:
+                raise PdfReadError(f"Outline Entry Missing /Title attribute: {node!r}")
+            title = ""  # type: ignore
+
+        if "/A" in node:
+            # Action, PDFv1.7 Section 12.6 (only type GoTo supported)
+            action = cast(DictionaryObject, node["/A"])
+            action_type = cast(NameObject, action[GoToActionArguments.S])
+            if action_type == "/GoTo":
+                dest = action[GoToActionArguments.D]
+        elif "/Dest" in node:
+            # Destination, PDFv1.7 Section 12.3.2
+            dest = node["/Dest"]
+            # if array was referenced in another object, will be a dict w/ key "/D"
+            if isinstance(dest, DictionaryObject) and "/D" in dest:
+                dest = dest["/D"]
+
+        if isinstance(dest, ArrayObject):
+            outline_item = self._build_destination(title, dest)
+        elif isinstance(dest, str):
+            # named destination, addresses NameObject Issue #193
+            # TODO : keep named destination instead of replacing it ?
+            try:
+                outline_item = self._build_destination(
+                    title, self._namedDests[dest].dest_array
+                )
+            except KeyError:
+                # named destination not found in Name Dict
+                outline_item = self._build_destination(title, None)
+        elif dest is None:
+            # outline item not required to have destination or action
+            # PDFv1.7 Table 153
+            outline_item = self._build_destination(title, dest)
+        else:
+            if self.strict:
+                raise PdfReadError(f"Unexpected destination {dest!r}")
+            else:
+                logger_warning(
+                    f"Removed unexpected destination {dest!r} from destination",
+                    __name__,
+                )
+            outline_item = self._build_destination(title, None)  # type: ignore
+
+        # if outline item created, add color, format, and child count if present
+        if outline_item:
+            if "/C" in node:
+                # Color of outline item font in (R, G, B) with values ranging 0.0-1.0
+                outline_item[NameObject("/C")] = ArrayObject(FloatObject(c) for c in node["/C"])  # type: ignore
+            if "/F" in node:
+                # specifies style characteristics bold and/or italic
+                # with 1=italic, 2=bold, 3=both
+                outline_item[NameObject("/F")] = node["/F"]
+            if "/Count" in node:
+                # absolute value = num. visible children
+                # with positive = open/unfolded, negative = closed/folded
+                outline_item[NameObject("/Count")] = node["/Count"]
+            #  if count is 0 we will consider it as open ( in order to have always an is_open to simplify
+            outline_item[NameObject("/%is_open%")] = BooleanObject(
+                node.get("/Count", 0) >= 0
+            )
+        outline_item.node = node
+        try:
+            outline_item.indirect_reference = node.indirect_reference
+        except AttributeError:
+            pass
+        return outline_item
+
+    @property
+    def pages(self) -> List[PageObject]:
+        """Read-only property that emulates a list of :py:class:`Page<pypdf._page.Page>` objects."""
+        return _VirtualList(self._get_num_pages, self._get_page)  # type: ignore
+
+    @property
+    def page_labels(self) -> List[str]:
+        """
+        A list of labels for the pages in this document.
+
+        This property is read-only. The labels are in the order that the pages
+        appear in the document.
+        """
+        return [page_index2page_label(self, i) for i in range(len(self.pages))]
+
+    @property
+    def page_layout(self) -> Optional[str]:
+        """
+        Get the page layout currently being used.
+
+        .. list-table:: Valid ``layout`` values
+           :widths: 50 200
+
+           * - /NoLayout
+             - Layout explicitly not specified
+           * - /SinglePage
+             - Show one page at a time
+           * - /OneColumn
+             - Show one column at a time
+           * - /TwoColumnLeft
+             - Show pages in two columns, odd-numbered pages on the left
+           * - /TwoColumnRight
+             - Show pages in two columns, odd-numbered pages on the right
+           * - /TwoPageLeft
+             - Show two pages at a time, odd-numbered pages on the left
+           * - /TwoPageRight
+             - Show two pages at a time, odd-numbered pages on the right
+        """
+        trailer = cast(DictionaryObject, self.trailer[TK.ROOT])
+        if CD.PAGE_LAYOUT in trailer:
+            return cast(NameObject, trailer[CD.PAGE_LAYOUT])
+        return None
+
+    def getPageLayout(self) -> Optional[str]:  # deprecated
+        """
+        Use :py:attr:`page_layout` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getPageLayout", "page_layout", "3.0.0")
+        return self.page_layout
+
+    @property
+    def pageLayout(self) -> Optional[str]:  # deprecated
+        """
+        Use :py:attr:`page_layout` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("pageLayout", "page_layout", "3.0.0")
+        return self.page_layout
+
+    @property
+    def page_mode(self) -> Optional[PagemodeType]:
+        """
+        Get the page mode currently being used.
+
+        .. list-table:: Valid ``mode`` values
+           :widths: 50 200
+
+           * - /UseNone
+             - Do not show outline or thumbnails panels
+           * - /UseOutlines
+             - Show outline (aka bookmarks) panel
+           * - /UseThumbs
+             - Show page thumbnails panel
+           * - /FullScreen
+             - Fullscreen view
+           * - /UseOC
+             - Show Optional Content Group (OCG) panel
+           * - /UseAttachments
+             - Show attachments panel
+        """
+        try:
+            return self.trailer[TK.ROOT]["/PageMode"]  # type: ignore
+        except KeyError:
+            return None
+
+    def getPageMode(self) -> Optional[PagemodeType]:  # deprecated
+        """
+        Use :py:attr:`page_mode` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getPageMode", "page_mode", "3.0.0")
+        return self.page_mode
+
+    @property
+    def pageMode(self) -> Optional[PagemodeType]:  # deprecated
+        """
+        Use :py:attr:`page_mode` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("pageMode", "page_mode", "3.0.0")
+        return self.page_mode
+
+    def _flatten(
+        self,
+        pages: Union[None, DictionaryObject, PageObject] = None,
+        inherit: Optional[Dict[str, Any]] = None,
+        indirect_reference: Optional[IndirectObject] = None,
+    ) -> None:
+        inheritable_page_attributes = (
+            NameObject(PG.RESOURCES),
+            NameObject(PG.MEDIABOX),
+            NameObject(PG.CROPBOX),
+            NameObject(PG.ROTATE),
+        )
+        if inherit is None:
+            inherit = {}
+        if pages is None:
+            # Fix issue 327: set flattened_pages attribute only for
+            # decrypted file
+            catalog = self.trailer[TK.ROOT].get_object()
+            pages = catalog["/Pages"].get_object()  # type: ignore
+            self.flattened_pages = []
+
+        if PA.TYPE in pages:
+            t = pages[PA.TYPE]  # type: ignore
+        # if pdf has no type, considered as a page if /Kids is missing
+        elif PA.KIDS not in pages:
+            t = "/Page"
+        else:
+            t = "/Pages"
+
+        if t == "/Pages":
+            for attr in inheritable_page_attributes:
+                if attr in pages:
+                    inherit[attr] = pages[attr]
+            for page in pages[PA.KIDS]:  # type: ignore
+                addt = {}
+                if isinstance(page, IndirectObject):
+                    addt["indirect_reference"] = page
+                obj = page.get_object()
+                if obj:
+                    # damaged file may have invalid child in /Pages
+                    self._flatten(obj, inherit, **addt)
+        elif t == "/Page":
+            for attr_in, value in list(inherit.items()):
+                # if the page has it's own value, it does not inherit the
+                # parent's value:
+                if attr_in not in pages:
+                    pages[attr_in] = value
+            page_obj = PageObject(self, indirect_reference)
+            page_obj.update(pages)
+
+            # TODO: Could flattened_pages be None at this point?
+            self.flattened_pages.append(page_obj)  # type: ignore
+
+    def _get_object_from_stream(
+        self, indirect_reference: IndirectObject
+    ) -> Union[int, PdfObject, str]:
+        # indirect reference to object in object stream
+        # read the entire object stream into memory
+        stmnum, idx = self.xref_objStm[indirect_reference.idnum]
+        obj_stm: EncodedStreamObject = IndirectObject(stmnum, 0, self).get_object()  # type: ignore
+        # This is an xref to a stream, so its type better be a stream
+        assert cast(str, obj_stm["/Type"]) == "/ObjStm"
+        # /N is the number of indirect objects in the stream
+        assert idx < obj_stm["/N"]
+        stream_data = BytesIO(b_(obj_stm.get_data()))
+        for i in range(obj_stm["/N"]):  # type: ignore
+            read_non_whitespace(stream_data)
+            stream_data.seek(-1, 1)
+            objnum = NumberObject.read_from_stream(stream_data)
+            read_non_whitespace(stream_data)
+            stream_data.seek(-1, 1)
+            offset = NumberObject.read_from_stream(stream_data)
+            read_non_whitespace(stream_data)
+            stream_data.seek(-1, 1)
+            if objnum != indirect_reference.idnum:
+                # We're only interested in one object
+                continue
+            if self.strict and idx != i:
+                raise PdfReadError("Object is in wrong index.")
+            stream_data.seek(int(obj_stm["/First"] + offset), 0)  # type: ignore
+
+            # to cope with some case where the 'pointer' is on a white space
+            read_non_whitespace(stream_data)
+            stream_data.seek(-1, 1)
+
+            try:
+                obj = read_object(stream_data, self)
+            except PdfStreamError as exc:
+                # Stream object cannot be read. Normally, a critical error, but
+                # Adobe Reader doesn't complain, so continue (in strict mode?)
+                logger_warning(
+                    f"Invalid stream (index {i}) within object "
+                    f"{indirect_reference.idnum} {indirect_reference.generation}: "
+                    f"{exc}",
+                    __name__,
+                )
+
+                if self.strict:
+                    raise PdfReadError(f"Can't read object stream: {exc}")
+                # Replace with null. Hopefully it's nothing important.
+                obj = NullObject()
+            return obj
+
+        if self.strict:
+            raise PdfReadError("This is a fatal error in strict mode.")
+        return NullObject()
+
+    def _get_indirect_object(self, num: int, gen: int) -> Optional[PdfObject]:
+        """
+        Used to ease development.
+
+        This is equivalent to generic.IndirectObject(num,gen,self).get_object()
+
+        Args:
+            num: The object number of the indirect object.
+            gen: The generation number of the indirect object.
+
+        Returns:
+            A PdfObject
+        """
+        return IndirectObject(num, gen, self).get_object()
+
+    def get_object(
+        self, indirect_reference: Union[int, IndirectObject]
+    ) -> Optional[PdfObject]:
+        if isinstance(indirect_reference, int):
+            indirect_reference = IndirectObject(indirect_reference, 0, self)
+        retval = self.cache_get_indirect_object(
+            indirect_reference.generation, indirect_reference.idnum
+        )
+        if retval is not None:
+            return retval
+        if (
+            indirect_reference.generation == 0
+            and indirect_reference.idnum in self.xref_objStm
+        ):
+            retval = self._get_object_from_stream(indirect_reference)  # type: ignore
+        elif (
+            indirect_reference.generation in self.xref
+            and indirect_reference.idnum in self.xref[indirect_reference.generation]
+        ):
+            if self.xref_free_entry.get(indirect_reference.generation, {}).get(
+                indirect_reference.idnum, False
+            ):
+                return NullObject()
+            start = self.xref[indirect_reference.generation][indirect_reference.idnum]
+            self.stream.seek(start, 0)
+            try:
+                idnum, generation = self.read_object_header(self.stream)
+            except Exception:
+                if hasattr(self.stream, "getbuffer"):
+                    buf = bytes(self.stream.getbuffer())  # type: ignore
+                else:
+                    p = self.stream.tell()
+                    self.stream.seek(0, 0)
+                    buf = self.stream.read(-1)
+                    self.stream.seek(p, 0)
+                m = re.search(
+                    rf"\s{indirect_reference.idnum}\s+{indirect_reference.generation}\s+obj".encode(),
+                    buf,
+                )
+                if m is not None:
+                    logger_warning(
+                        f"Object ID {indirect_reference.idnum},{indirect_reference.generation} ref repaired",
+                        __name__,
+                    )
+                    self.xref[indirect_reference.generation][
+                        indirect_reference.idnum
+                    ] = (m.start(0) + 1)
+                    self.stream.seek(m.start(0) + 1)
+                    idnum, generation = self.read_object_header(self.stream)
+                else:
+                    idnum = -1  # exception will be raised below
+            if idnum != indirect_reference.idnum and self.xref_index:
+                # Xref table probably had bad indexes due to not being zero-indexed
+                if self.strict:
+                    raise PdfReadError(
+                        f"Expected object ID ({indirect_reference.idnum} {indirect_reference.generation}) "
+                        f"does not match actual ({idnum} {generation}); "
+                        "xref table not zero-indexed."
+                    )
+                # xref table is corrected in non-strict mode
+            elif idnum != indirect_reference.idnum and self.strict:
+                # some other problem
+                raise PdfReadError(
+                    f"Expected object ID ({indirect_reference.idnum} "
+                    f"{indirect_reference.generation}) does not match actual "
+                    f"({idnum} {generation})."
+                )
+            if self.strict:
+                assert generation == indirect_reference.generation
+            retval = read_object(self.stream, self)  # type: ignore
+
+            # override encryption is used for the /Encrypt dictionary
+            if not self._override_encryption and self._encryption is not None:
+                # if we don't have the encryption key:
+                if not self._encryption.is_decrypted():
+                    raise FileNotDecryptedError("File has not been decrypted")
+                # otherwise, decrypt here...
+                retval = cast(PdfObject, retval)
+                retval = self._encryption.decrypt_object(
+                    retval, indirect_reference.idnum, indirect_reference.generation
+                )
+        else:
+            if hasattr(self.stream, "getbuffer"):
+                buf = bytes(self.stream.getbuffer())  # type: ignore
+            else:
+                p = self.stream.tell()
+                self.stream.seek(0, 0)
+                buf = self.stream.read(-1)
+                self.stream.seek(p, 0)
+            m = re.search(
+                rf"\s{indirect_reference.idnum}\s+{indirect_reference.generation}\s+obj".encode(),
+                buf,
+            )
+            if m is not None:
+                logger_warning(
+                    f"Object {indirect_reference.idnum} {indirect_reference.generation} found",
+                    __name__,
+                )
+                if indirect_reference.generation not in self.xref:
+                    self.xref[indirect_reference.generation] = {}
+                self.xref[indirect_reference.generation][indirect_reference.idnum] = (
+                    m.start(0) + 1
+                )
+                self.stream.seek(m.end(0) + 1)
+                skip_over_whitespace(self.stream)
+                self.stream.seek(-1, 1)
+                retval = read_object(self.stream, self)  # type: ignore
+
+                # override encryption is used for the /Encrypt dictionary
+                if not self._override_encryption and self._encryption is not None:
+                    # if we don't have the encryption key:
+                    if not self._encryption.is_decrypted():
+                        raise FileNotDecryptedError("File has not been decrypted")
+                    # otherwise, decrypt here...
+                    retval = cast(PdfObject, retval)
+                    retval = self._encryption.decrypt_object(
+                        retval, indirect_reference.idnum, indirect_reference.generation
+                    )
+            else:
+                logger_warning(
+                    f"Object {indirect_reference.idnum} {indirect_reference.generation} not defined.",
+                    __name__,
+                )
+                if self.strict:
+                    raise PdfReadError("Could not find object.")
+        self.cache_indirect_object(
+            indirect_reference.generation, indirect_reference.idnum, retval
+        )
+        return retval
+
+    def getObject(
+        self, indirectReference: IndirectObject
+    ) -> Optional[PdfObject]:  # deprecated
+        """
+        Use :meth:`get_object` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getObject", "get_object", "3.0.0")
+        return self.get_object(indirectReference)
+
+    def read_object_header(self, stream: StreamType) -> Tuple[int, int]:
+        # Should never be necessary to read out whitespace, since the
+        # cross-reference table should put us in the right spot to read the
+        # object header.  In reality... some files have stupid cross reference
+        # tables that are off by whitespace bytes.
+        extra = False
+        skip_over_comment(stream)
+        extra |= skip_over_whitespace(stream)
+        stream.seek(-1, 1)
+        idnum = read_until_whitespace(stream)
+        extra |= skip_over_whitespace(stream)
+        stream.seek(-1, 1)
+        generation = read_until_whitespace(stream)
+        extra |= skip_over_whitespace(stream)
+        stream.seek(-1, 1)
+
+        # although it's not used, it might still be necessary to read
+        _obj = stream.read(3)
+
+        read_non_whitespace(stream)
+        stream.seek(-1, 1)
+        if extra and self.strict:
+            logger_warning(
+                f"Superfluous whitespace found in object header {idnum} {generation}",  # type: ignore
+                __name__,
+            )
+        return int(idnum), int(generation)
+
+    def readObjectHeader(self, stream: StreamType) -> Tuple[int, int]:  # deprecated
+        """
+        Use :meth:`read_object_header` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("readObjectHeader", "read_object_header", "3.0.0")
+        return self.read_object_header(stream)
+
+    def cache_get_indirect_object(
+        self, generation: int, idnum: int
+    ) -> Optional[PdfObject]:
+        return self.resolved_objects.get((generation, idnum))
+
+    def cacheGetIndirectObject(
+        self, generation: int, idnum: int
+    ) -> Optional[PdfObject]:  # deprecated
+        """
+        Use :meth:`cache_get_indirect_object` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement(
+            "cacheGetIndirectObject", "cache_get_indirect_object", "3.0.0"
+        )
+        return self.cache_get_indirect_object(generation, idnum)
+
+    def cache_indirect_object(
+        self, generation: int, idnum: int, obj: Optional[PdfObject]
+    ) -> Optional[PdfObject]:
+        if (generation, idnum) in self.resolved_objects:
+            msg = f"Overwriting cache for {generation} {idnum}"
+            if self.strict:
+                raise PdfReadError(msg)
+            logger_warning(msg, __name__)
+        self.resolved_objects[(generation, idnum)] = obj
+        if obj is not None:
+            obj.indirect_reference = IndirectObject(idnum, generation, self)
+        return obj
+
+    def cacheIndirectObject(
+        self, generation: int, idnum: int, obj: Optional[PdfObject]
+    ) -> Optional[PdfObject]:  # deprecated
+        """
+        Use :meth:`cache_indirect_object` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("cacheIndirectObject", "cache_indirect_object")
+        return self.cache_indirect_object(generation, idnum, obj)
+
+    def read(self, stream: StreamType) -> None:
+        self._basic_validation(stream)
+        self._find_eof_marker(stream)
+        startxref = self._find_startxref_pos(stream)
+
+        # check and eventually correct the startxref only in not strict
+        xref_issue_nr = self._get_xref_issues(stream, startxref)
+        if xref_issue_nr != 0:
+            if self.strict and xref_issue_nr:
+                raise PdfReadError("Broken xref table")
+            logger_warning(f"incorrect startxref pointer({xref_issue_nr})", __name__)
+
+        # read all cross reference tables and their trailers
+        self._read_xref_tables_and_trailers(stream, startxref, xref_issue_nr)
+
+        # if not zero-indexed, verify that the table is correct; change it if necessary
+        if self.xref_index and not self.strict:
+            loc = stream.tell()
+            for gen, xref_entry in self.xref.items():
+                if gen == 65535:
+                    continue
+                xref_k = sorted(
+                    xref_entry.keys()
+                )  # must ensure ascendant to prevent damage
+                for id in xref_k:
+                    stream.seek(xref_entry[id], 0)
+                    try:
+                        pid, _pgen = self.read_object_header(stream)
+                    except ValueError:
+                        break
+                    if pid == id - self.xref_index:
+                        # fixing index item per item is required for revised PDF.
+                        self.xref[gen][pid] = self.xref[gen][id]
+                        del self.xref[gen][id]
+                    # if not, then either it's just plain wrong, or the
+                    # non-zero-index is actually correct
+            stream.seek(loc, 0)  # return to where it was
+
+    def _basic_validation(self, stream: StreamType) -> None:
+        """Ensure file is not empty. Read at most 5 bytes."""
+        stream.seek(0, os.SEEK_SET)
+        try:
+            header_byte = stream.read(5)
+        except UnicodeDecodeError:
+            raise UnsupportedOperation("cannot read header")
+        if header_byte == b"":
+            raise EmptyFileError("Cannot read an empty file")
+        elif header_byte != b"%PDF-":
+            if self.strict:
+                raise PdfReadError(
+                    f"PDF starts with '{header_byte.decode('utf8')}', "
+                    "but '%PDF-' expected"
+                )
+            else:
+                logger_warning(f"invalid pdf header: {header_byte}", __name__)
+        stream.seek(0, os.SEEK_END)
+
+    def _find_eof_marker(self, stream: StreamType) -> None:
+        """
+        Jump to the %%EOF marker.
+
+        According to the specs, the %%EOF marker should be at the very end of
+        the file. Hence for standard-compliant PDF documents this function will
+        read only the last part (DEFAULT_BUFFER_SIZE).
+        """
+        HEADER_SIZE = 8  # to parse whole file, Header is e.g. '%PDF-1.6'
+        line = b""
+        while line[:5] != b"%%EOF":
+            if stream.tell() < HEADER_SIZE:
+                if self.strict:
+                    raise PdfReadError("EOF marker not found")
+                else:
+                    logger_warning("EOF marker not found", __name__)
+            line = read_previous_line(stream)
+
+    def _find_startxref_pos(self, stream: StreamType) -> int:
+        """
+        Find startxref entry - the location of the xref table.
+
+        Args:
+            stream:
+
+        Returns:
+            The bytes offset
+        """
+        line = read_previous_line(stream)
+        try:
+            startxref = int(line)
+        except ValueError:
+            # 'startxref' may be on the same line as the location
+            if not line.startswith(b"startxref"):
+                raise PdfReadError("startxref not found")
+            startxref = int(line[9:].strip())
+            logger_warning("startxref on same line as offset", __name__)
+        else:
+            line = read_previous_line(stream)
+            if line[:9] != b"startxref":
+                raise PdfReadError("startxref not found")
+        return startxref
+
+    def _read_standard_xref_table(self, stream: StreamType) -> None:
+        # standard cross-reference table
+        ref = stream.read(3)
+        if ref != b"ref":
+            raise PdfReadError("xref table read error")
+        read_non_whitespace(stream)
+        stream.seek(-1, 1)
+        first_time = True  # check if the first time looking at the xref table
+        while True:
+            num = cast(int, read_object(stream, self))
+            if first_time and num != 0:
+                self.xref_index = num
+                if self.strict:
+                    logger_warning(
+                        "Xref table not zero-indexed. ID numbers for objects will be corrected.",
+                        __name__,
+                    )
+                    # if table not zero indexed, could be due to error from when PDF was created
+                    # which will lead to mismatched indices later on, only warned and corrected if self.strict==True
+            first_time = False
+            read_non_whitespace(stream)
+            stream.seek(-1, 1)
+            size = cast(int, read_object(stream, self))
+            read_non_whitespace(stream)
+            stream.seek(-1, 1)
+            cnt = 0
+            while cnt < size:
+                line = stream.read(20)
+
+                # It's very clear in section 3.4.3 of the PDF spec
+                # that all cross-reference table lines are a fixed
+                # 20 bytes (as of PDF 1.7). However, some files have
+                # 21-byte entries (or more) due to the use of \r\n
+                # (CRLF) EOL's. Detect that case, and adjust the line
+                # until it does not begin with a \r (CR) or \n (LF).
+                while line[0] in b"\x0D\x0A":
+                    stream.seek(-20 + 1, 1)
+                    line = stream.read(20)
+
+                # On the other hand, some malformed PDF files
+                # use a single character EOL without a preceding
+                # space.  Detect that case, and seek the stream
+                # back one character.  (0-9 means we've bled into
+                # the next xref entry, t means we've bled into the
+                # text "trailer"):
+                if line[-1] in b"0123456789t":
+                    stream.seek(-1, 1)
+
+                try:
+                    offset_b, generation_b = line[:16].split(b" ")
+                    entry_type_b = line[17:18]
+
+                    offset, generation = int(offset_b), int(generation_b)
+                except Exception:
+                    # if something wrong occurred
+                    if hasattr(stream, "getbuffer"):
+                        buf = bytes(stream.getbuffer())  # type: ignore
+                    else:
+                        p = stream.tell()
+                        stream.seek(0, 0)
+                        buf = stream.read(-1)
+                        stream.seek(p)
+
+                    f = re.search(f"{num}\\s+(\\d+)\\s+obj".encode(), buf)
+                    if f is None:
+                        logger_warning(
+                            f"entry {num} in Xref table invalid; object not found",
+                            __name__,
+                        )
+                        generation = 65535
+                        offset = -1
+                    else:
+                        logger_warning(
+                            f"entry {num} in Xref table invalid but object found",
+                            __name__,
+                        )
+                        generation = int(f.group(1))
+                        offset = f.start()
+
+                if generation not in self.xref:
+                    self.xref[generation] = {}
+                    self.xref_free_entry[generation] = {}
+                if num in self.xref[generation]:
+                    # It really seems like we should allow the last
+                    # xref table in the file to override previous
+                    # ones. Since we read the file backwards, assume
+                    # any existing key is already set correctly.
+                    pass
+                else:
+                    self.xref[generation][num] = offset
+                    try:
+                        self.xref_free_entry[generation][num] = entry_type_b == b"f"
+                    except Exception:
+                        pass
+                    try:
+                        self.xref_free_entry[65535][num] = entry_type_b == b"f"
+                    except Exception:
+                        pass
+                cnt += 1
+                num += 1
+            read_non_whitespace(stream)
+            stream.seek(-1, 1)
+            trailer_tag = stream.read(7)
+            if trailer_tag != b"trailer":
+                # more xrefs!
+                stream.seek(-7, 1)
+            else:
+                break
+
+    def _read_xref_tables_and_trailers(
+        self, stream: StreamType, startxref: Optional[int], xref_issue_nr: int
+    ) -> None:
+        self.xref: Dict[int, Dict[Any, Any]] = {}
+        self.xref_free_entry: Dict[int, Dict[Any, Any]] = {}
+        self.xref_objStm: Dict[int, Tuple[Any, Any]] = {}
+        self.trailer = DictionaryObject()
+        while startxref is not None:
+            # load the xref table
+            stream.seek(startxref, 0)
+            x = stream.read(1)
+            if x in b"\r\n":
+                x = stream.read(1)
+            if x == b"x":
+                startxref = self._read_xref(stream)
+            elif xref_issue_nr:
+                try:
+                    self._rebuild_xref_table(stream)
+                    break
+                except Exception:
+                    xref_issue_nr = 0
+            elif x.isdigit():
+                try:
+                    xrefstream = self._read_pdf15_xref_stream(stream)
+                except Exception as e:
+                    if TK.ROOT in self.trailer:
+                        logger_warning(
+                            f"Previous trailer can not be read {e.args}",
+                            __name__,
+                        )
+                        break
+                    else:
+                        raise PdfReadError(f"trailer can not be read {e.args}")
+                trailer_keys = TK.ROOT, TK.ENCRYPT, TK.INFO, TK.ID, TK.SIZE
+                for key in trailer_keys:
+                    if key in xrefstream and key not in self.trailer:
+                        self.trailer[NameObject(key)] = xrefstream.raw_get(key)
+                if "/XRefStm" in xrefstream:
+                    p = stream.tell()
+                    stream.seek(cast(int, xrefstream["/XRefStm"]) + 1, 0)
+                    self._read_pdf15_xref_stream(stream)
+                    stream.seek(p, 0)
+                if "/Prev" in xrefstream:
+                    startxref = cast(int, xrefstream["/Prev"])
+                else:
+                    break
+            else:
+                startxref = self._read_xref_other_error(stream, startxref)
+
+    def _read_xref(self, stream: StreamType) -> Optional[int]:
+        self._read_standard_xref_table(stream)
+        read_non_whitespace(stream)
+        stream.seek(-1, 1)
+        new_trailer = cast(Dict[str, Any], read_object(stream, self))
+        for key, value in new_trailer.items():
+            if key not in self.trailer:
+                self.trailer[key] = value
+        if "/XRefStm" in new_trailer:
+            p = stream.tell()
+            stream.seek(cast(int, new_trailer["/XRefStm"]) + 1, 0)
+            try:
+                self._read_pdf15_xref_stream(stream)
+            except Exception:
+                logger_warning(
+                    f"XRef object at {new_trailer['/XRefStm']} can not be read, some object may be missing",
+                    __name__,
+                )
+            stream.seek(p, 0)
+        if "/Prev" in new_trailer:
+            startxref = new_trailer["/Prev"]
+            return startxref
+        else:
+            return None
+
+    def _read_xref_other_error(
+        self, stream: StreamType, startxref: int
+    ) -> Optional[int]:
+        # some PDFs have /Prev=0 in the trailer, instead of no /Prev
+        if startxref == 0:
+            if self.strict:
+                raise PdfReadError(
+                    "/Prev=0 in the trailer (try opening with strict=False)"
+                )
+            logger_warning(
+                "/Prev=0 in the trailer - assuming there is no previous xref table",
+                __name__,
+            )
+            return None
+        # bad xref character at startxref.  Let's see if we can find
+        # the xref table nearby, as we've observed this error with an
+        # off-by-one before.
+        stream.seek(-11, 1)
+        tmp = stream.read(20)
+        xref_loc = tmp.find(b"xref")
+        if xref_loc != -1:
+            startxref -= 10 - xref_loc
+            return startxref
+        # No explicit xref table, try finding a cross-reference stream.
+        stream.seek(startxref, 0)
+        for look in range(25):  # value extended to cope with more linearized files
+            if stream.read(1).isdigit():
+                # This is not a standard PDF, consider adding a warning
+                startxref += look
+                return startxref
+        # no xref table found at specified location
+        if "/Root" in self.trailer and not self.strict:
+            # if Root has been already found, just raise warning
+            logger_warning("Invalid parent xref., rebuild xref", __name__)
+            try:
+                self._rebuild_xref_table(stream)
+                return None
+            except Exception:
+                raise PdfReadError("can not rebuild xref")
+        raise PdfReadError("Could not find xref table at specified location")
+
+    def _read_pdf15_xref_stream(
+        self, stream: StreamType
+    ) -> Union[ContentStream, EncodedStreamObject, DecodedStreamObject]:
+        # PDF 1.5+ Cross-Reference Stream
+        stream.seek(-1, 1)
+        idnum, generation = self.read_object_header(stream)
+        xrefstream = cast(ContentStream, read_object(stream, self))
+        assert cast(str, xrefstream["/Type"]) == "/XRef"
+        self.cache_indirect_object(generation, idnum, xrefstream)
+        stream_data = BytesIO(b_(xrefstream.get_data()))
+        # Index pairs specify the subsections in the dictionary. If
+        # none create one subsection that spans everything.
+        idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
+        entry_sizes = cast(Dict[Any, Any], xrefstream.get("/W"))
+        assert len(entry_sizes) >= 3
+        if self.strict and len(entry_sizes) > 3:
+            raise PdfReadError(f"Too many entry sizes: {entry_sizes}")
+
+        def get_entry(i: int) -> Union[int, Tuple[int, ...]]:
+            # Reads the correct number of bytes for each entry. See the
+            # discussion of the W parameter in PDF spec table 17.
+            if entry_sizes[i] > 0:
+                d = stream_data.read(entry_sizes[i])
+                return convert_to_int(d, entry_sizes[i])
+
+            # PDF Spec Table 17: A value of zero for an element in the
+            # W array indicates...the default value shall be used
+            if i == 0:
+                return 1  # First value defaults to 1
+            else:
+                return 0
+
+        def used_before(num: int, generation: Union[int, Tuple[int, ...]]) -> bool:
+            # We move backwards through the xrefs, don't replace any.
+            return num in self.xref.get(generation, []) or num in self.xref_objStm  # type: ignore
+
+        # Iterate through each subsection
+        self._read_xref_subsections(idx_pairs, get_entry, used_before)
+        return xrefstream
+
+    @staticmethod
+    def _get_xref_issues(stream: StreamType, startxref: int) -> int:
+        """
+        Return an int which indicates an issue. 0 means there is no issue.
+
+        Args:
+            stream:
+            startxref:
+
+        Returns:
+            0 means no issue, other values represent specific issues.
+        """
+        stream.seek(startxref - 1, 0)  # -1 to check character before
+        line = stream.read(1)
+        if line == b"j":
+            line = stream.read(1)
+        if line not in b"\r\n \t":
+            return 1
+        line = stream.read(4)
+        if line != b"xref":
+            # not an xref so check if it is an XREF object
+            line = b""
+            while line in b"0123456789 \t":
+                line = stream.read(1)
+                if line == b"":
+                    return 2
+            line += stream.read(2)  # 1 char already read, +2 to check "obj"
+            if line.lower() != b"obj":
+                return 3
+        return 0
+
+    def _rebuild_xref_table(self, stream: StreamType) -> None:
+        self.xref = {}
+        stream.seek(0, 0)
+        f_ = stream.read(-1)
+
+        for m in re.finditer(rb"[\r\n \t][ \t]*(\d+)[ \t]+(\d+)[ \t]+obj", f_):
+            idnum = int(m.group(1))
+            generation = int(m.group(2))
+            if generation not in self.xref:
+                self.xref[generation] = {}
+            self.xref[generation][idnum] = m.start(1)
+        stream.seek(0, 0)
+        for m in re.finditer(rb"[\r\n \t][ \t]*trailer[\r\n \t]*(<<)", f_):
+            stream.seek(m.start(1), 0)
+            new_trailer = cast(Dict[Any, Any], read_object(stream, self))
+            # Here, we are parsing the file from start to end, the new data have to erase the existing.
+            for key, value in list(new_trailer.items()):
+                self.trailer[key] = value
+
+    def _read_xref_subsections(
+        self,
+        idx_pairs: List[int],
+        get_entry: Callable[[int], Union[int, Tuple[int, ...]]],
+        used_before: Callable[[int, Union[int, Tuple[int, ...]]], bool],
+    ) -> None:
+        for start, size in self._pairs(idx_pairs):
+            # The subsections must increase
+            for num in range(start, start + size):
+                # The first entry is the type
+                xref_type = get_entry(0)
+                # The rest of the elements depend on the xref_type
+                if xref_type == 0:
+                    # linked list of free objects
+                    next_free_object = get_entry(1)  # noqa: F841
+                    next_generation = get_entry(2)  # noqa: F841
+                elif xref_type == 1:
+                    # objects that are in use but are not compressed
+                    byte_offset = get_entry(1)
+                    generation = get_entry(2)
+                    if generation not in self.xref:
+                        self.xref[generation] = {}  # type: ignore
+                    if not used_before(num, generation):
+                        self.xref[generation][num] = byte_offset  # type: ignore
+                elif xref_type == 2:
+                    # compressed objects
+                    objstr_num = get_entry(1)
+                    obstr_idx = get_entry(2)
+                    generation = 0  # PDF spec table 18, generation is 0
+                    if not used_before(num, generation):
+                        self.xref_objStm[num] = (objstr_num, obstr_idx)
+                elif self.strict:
+                    raise PdfReadError(f"Unknown xref type: {xref_type}")
+
+    def _pairs(self, array: List[int]) -> Iterable[Tuple[int, int]]:
+        i = 0
+        while True:
+            yield array[i], array[i + 1]
+            i += 2
+            if (i + 1) >= len(array):
+                break
+
+    def read_next_end_line(
+        self, stream: StreamType, limit_offset: int = 0
+    ) -> bytes:  # deprecated
+        """.. deprecated:: 2.1.0"""
+        deprecate_no_replacement("read_next_end_line", removed_in="4.0.0")
+        line_parts = []
+        while True:
+            # Prevent infinite loops in malformed PDFs
+            if stream.tell() == 0 or stream.tell() == limit_offset:
+                raise PdfReadError("Could not read malformed PDF file")
+            x = stream.read(1)
+            if stream.tell() < 2:
+                raise PdfReadError("EOL marker not found")
+            stream.seek(-2, 1)
+            if x in (b"\n", b"\r"):  # \n = LF; \r = CR
+                crlf = False
+                while x in (b"\n", b"\r"):
+                    x = stream.read(1)
+                    if x in (b"\n", b"\r"):  # account for CR+LF
+                        stream.seek(-1, 1)
+                        crlf = True
+                    if stream.tell() < 2:
+                        raise PdfReadError("EOL marker not found")
+                    stream.seek(-2, 1)
+                stream.seek(
+                    2 if crlf else 1, 1
+                )  # if using CR+LF, go back 2 bytes, else 1
+                break
+            else:
+                line_parts.append(x)
+        line_parts.reverse()
+        return b"".join(line_parts)
+
+    def readNextEndLine(
+        self, stream: StreamType, limit_offset: int = 0
+    ) -> bytes:  # deprecated
+        """.. deprecated:: 1.28.0"""
+        deprecation_no_replacement("readNextEndLine", "3.0.0")
+        return self.read_next_end_line(stream, limit_offset)
+
+    def decrypt(self, password: Union[str, bytes]) -> PasswordType:
+        """
+        When using an encrypted / secured PDF file with the PDF Standard
+        encryption handler, this function will allow the file to be decrypted.
+        It checks the given password against the document's user password and
+        owner password, and then stores the resulting decryption key if either
+        password is correct.
+
+        It does not matter which password was matched.  Both passwords provide
+        the correct decryption key that will allow the document to be used with
+        this library.
+
+        Args:
+            password: The password to match.
+
+        Returns:
+            An indicator if the document was decrypted and weather it was the
+            owner password or the user password.
+        """
+        if not self._encryption:
+            raise PdfReadError("Not encrypted file")
+        # TODO: raise Exception for wrong password
+        return self._encryption.verify(password)
+
+    def decode_permissions(self, permissions_code: int) -> Dict[str, bool]:
+        # Takes the permissions as an integer, returns the allowed access
+        permissions = {}
+        permissions["print"] = permissions_code & (1 << 3 - 1) != 0  # bit 3
+        permissions["modify"] = permissions_code & (1 << 4 - 1) != 0  # bit 4
+        permissions["copy"] = permissions_code & (1 << 5 - 1) != 0  # bit 5
+        permissions["annotations"] = permissions_code & (1 << 6 - 1) != 0  # bit 6
+        permissions["forms"] = permissions_code & (1 << 9 - 1) != 0  # bit 9
+        permissions["accessability"] = permissions_code & (1 << 10 - 1) != 0  # bit 10
+        permissions["assemble"] = permissions_code & (1 << 11 - 1) != 0  # bit 11
+        permissions["print_high_quality"] = (
+            permissions_code & (1 << 12 - 1) != 0
+        )  # bit 12
+        return permissions
+
+    @property
+    def is_encrypted(self) -> bool:
+        """
+        Read-only boolean property showing whether this PDF file is encrypted.
+
+        Note that this property, if true, will remain true even after the
+        :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called.
+        """
+        return TK.ENCRYPT in self.trailer
+
+    def getIsEncrypted(self) -> bool:  # deprecated
+        """
+        Use :py:attr:`is_encrypted` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("getIsEncrypted", "is_encrypted", "3.0.0")
+        return self.is_encrypted
+
+    @property
+    def isEncrypted(self) -> bool:  # deprecated
+        """
+        Use :py:attr:`is_encrypted` instead.
+
+        .. deprecated:: 1.28.0
+        """
+        deprecation_with_replacement("isEncrypted", "is_encrypted", "3.0.0")
+        return self.is_encrypted
+
+    @property
+    def xfa(self) -> Optional[Dict[str, Any]]:
+        tree: Optional[TreeObject] = None
+        retval: Dict[str, Any] = {}
+        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+
+        if "/AcroForm" not in catalog or not catalog["/AcroForm"]:
+            return None
+
+        tree = cast(TreeObject, catalog["/AcroForm"])
+
+        if "/XFA" in tree:
+            fields = cast(ArrayObject, tree["/XFA"])
+            i = iter(fields)
+            for f in i:
+                tag = f
+                f = next(i)
+                if isinstance(f, IndirectObject):
+                    field = cast(Optional[EncodedStreamObject], f.get_object())
+                    if field:
+                        es = zlib.decompress(b_(field._data))
+                        retval[tag] = es
+        return retval
+
+    def add_form_topname(self, name: str) -> Optional[DictionaryObject]:
+        """
+        Add a top level form that groups all form fields below it.
+
+        Args:
+            name: text string of the "/T" Attribute of the created object
+
+        Returns:
+            The created object. ``None`` means no object was created.
+        """
+        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+
+        if "/AcroForm" not in catalog or not isinstance(
+            catalog["/AcroForm"], DictionaryObject
+        ):
+            return None
+        acroform = cast(DictionaryObject, catalog[NameObject("/AcroForm")])
+        if "/Fields" not in acroform:
+            # TODO: :No error returns but may be extended for XFA Forms
+            return None
+
+        interim = DictionaryObject()
+        interim[NameObject("/T")] = TextStringObject(name)
+        interim[NameObject("/Kids")] = acroform[NameObject("/Fields")]
+        self.cache_indirect_object(
+            0,
+            max([i for (g, i) in self.resolved_objects if g == 0]) + 1,
+            interim,
+        )
+        arr = ArrayObject()
+        arr.append(interim.indirect_reference)
+        acroform[NameObject("/Fields")] = arr
+        for o in cast(ArrayObject, interim["/Kids"]):
+            obj = o.get_object()
+            if "/Parent" in obj:
+                logger_warning(
+                    f"Top Level Form Field {obj.indirect_reference} have a non-expected parent",
+                    __name__,
+                )
+            obj[NameObject("/Parent")] = interim.indirect_reference
+        return interim
+
+    def rename_form_topname(self, name: str) -> Optional[DictionaryObject]:
+        """
+        Rename top level form field that all form fields below it.
+
+        Args:
+            name: text string of the "/T" field of the created object
+
+        Returns:
+            The modified object. ``None`` means no object was modified.
+        """
+        catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
+
+        if "/AcroForm" not in catalog or not isinstance(
+            catalog["/AcroForm"], DictionaryObject
+        ):
+            return None
+        acroform = cast(DictionaryObject, catalog[NameObject("/AcroForm")])
+        if "/Fields" not in acroform:
+            return None
+
+        interim = cast(
+            DictionaryObject,
+            cast(ArrayObject, acroform[NameObject("/Fields")])[0].get_object(),
+        )
+        interim[NameObject("/T")] = TextStringObject(name)
+        return interim
+
+    def _get_embedded_files_root(self) -> Optional[NameTree]:
+        """
+        Returns the EmbeddedFiles root as a NameTree Object
+        if the root does not exists, return None
+        """
+        catalog = cast(DictionaryObject, self.trailer["/Root"])
+        if "/Names" not in catalog:
+            return None
+        ef = cast(DictionaryObject, catalog["/Names"]).get("/EmbeddedFiles", None)
+        if ef is None:
+            return None
+        efo = ef.get_object()
+        # not for reader
+        """
+            if not isinstance(efo,NameTree):
+            if isinstance(ef,IndirectObject):
+                ef.replace_object(efo)
+            else:
+                cast(DictionaryObject,catalog["/Names"])[
+                    NameObject("/EmbeddedFiles")] = NameTree(efo)
+        """
+        return NameTree(efo)
+
+    @property
+    def embedded_files(self) -> Optional[Mapping[str, List[PdfObject]]]:
+        ef = self._get_embedded_files_root()
+        if ef:
+            return ef.list_items()
+        else:
+            return None
+
+    @property
+    def attachments(self) -> Mapping[str, List[Union[bytes, Dict[str, bytes]]]]:
+        ef = self._get_embedded_files_root()
+        if ef:
+            d: Dict[str, List[Union[bytes, Dict[str, bytes]]]] = {}
+            for k, v in ef.list_items().items():
+                if isinstance(v, list):
+                    if k not in d:
+                        d[k] = []  # type: ignore
+                    for e in v:
+                        e = cast(DictionaryObject, e.get_object())
+                        if "/EF" in e:
+                            d[k].append(e["/EF"]["/F"].get_data())  # type: ignore
+                        elif "/RF" in e:
+                            r = cast(
+                                ArrayObject, cast(DictionaryObject, e["/RF"])["/F"]
+                            )
+                            di: Dict[str, bytes] = {}
+                            i = 0
+                            while i < len(r):
+                                di[cast(str, r[i])] = r[i + 1].get_object().get_data()
+                                i += 2
+                            d[k].append(di)
+            return d
+        else:
+            return {}
+
+    def _list_attachments(self) -> List[str]:
+        """
+        Retrieves the list of filenames of file attachments.
+
+        Returns:
+            list of filenames
+        """
+        ef = self._get_embedded_files_root()
+        if ef:
+            lst = ef.list_keys()
+        else:
+            lst = []
+        """
+        for ip, p in enumerate(self.pages):
+            for a in [_a.get_object()
+                      for _a in p.get("/Annots",[])]:
+                if _a.get_object().get("/Subtype","") != "/FileAttachements":
+                    continue
+                lst.append(f"$page_{ip}.{get_name_from_file_specification(_a)}")
+        """
+        return lst
+
+    def _get_attachment_list(self, name: str) -> List[Union[bytes, Dict[str, bytes]]]:
+        out = self._get_attachments(name)[name]
+        if isinstance(out, list):
+            return out
+        return [out]
+
+    def _get_attachments(
+        self, filename: Optional[str] = None
+    ) -> Dict[str, List[Union[bytes, Dict[str, bytes]]]]:
+        """
+        Retrieves all or selected file attachments of the PDF as a dictionary of file names
+        and the file data as a bytestring.
+
+        Args:
+            filename: If filename is None, then a dictionary of all attachments
+                will be returned, where the key is the filename and the value
+                is the content. Otherwise, a dictionary with just a single key
+                - the filename - and its content will be returned.
+
+        Returns:
+            dictionary of filename -> Union[bytestring or List[ByteString]]
+            if the filename exists multiple times a List of the different version will be provided
+        """
+        ef = self._get_embedded_files_root()
+        if ef is None:
+            return {}
+        if filename is None:
+            return {k: v if len(v) > 1 else v[0] for k, v in self.attachments.items()}  # type: ignore
+        else:
+            lst = ef.list_get(filename)
+            return {
+                filename: [(x.get_object())["/EF"].get_object(  # type: ignore
+                    )["/F"].get_object().get_data() for x in lst]  # type: ignore
+                if isinstance(lst, list)
+                else (lst.get_object())["/EF"].get_object()["/F"].get_object().get_data()  # type: ignore
+            }
+
+
+class PdfFileReader(PdfReader):  # deprecated
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        deprecation_with_replacement("PdfFileReader", "PdfReader", "3.0.0")
+        if "strict" not in kwargs and len(args) < 2:
+            kwargs["strict"] = True  # maintain the default
+        super().__init__(*args, **kwargs)
diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index 0ea864305..bea4c11ab 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -297,15 +297,13 @@ def _replace_object(
         obj: PdfObject,
     ) -> PdfObject:
         if isinstance(indirect_reference, IndirectObject):
-            assert indirect_reference.pdf == self
+            if indirect_reference.pdf != self:
+                raise ValueError("pdf must be self")
             indirect_reference = indirect_reference.idnum
         gen = self._objects[indirect_reference - 1].indirect_reference.generation  # type: ignore
         self._objects[indirect_reference - 1] = obj
-        return self._objects[indirect_reference - 1]
-        if indirect_reference.pdf != self:
-            raise ValueError("pdf must be self")
         obj.indirect_reference = IndirectObject(indirect_reference, gen, self)
-        return self._objects[indirect_reference.idnum - 1]  # type: ignore
+        return self._objects[indirect_reference - 1]  # type: ignore
 
     def _add_page(
         self,
@@ -744,25 +742,36 @@ def embedded_files(self) -> Optional[Mapping[str, List[PdfObject]]]:
         else:
             return None
 
+    def _list_attachments(self) -> List[str]:
+        ef = self._get_embedded_files_root()
+        if ef:
+            return ef.list_keys()
+        else:
+            return []
+
     @property
-    def attachments(self) -> Mapping[str, Union[List[bytes], List[Dict[str, bytes]]]]:
+    def attachments(self) -> Mapping[str, List[Union[bytes, Dict[str, bytes]]]]:
         ef = self._get_embedded_files_root()
         if ef:
-            d = {}
+            d: Dict[str, List[Union[bytes, Dict[str, bytes]]]] = {}
             for k, v in ef.list_items().items():
                 if isinstance(v, list):
                     if k not in d:
-                        d[k] = []
+                        d[k] = []  # type: ignore
                     for e in v:
-                        e = e.get_object()
+                        e = cast(DictionaryObject, e.get_object())
                         if "/EF" in e:
                             d[k].append(e["/EF"]["/F"].get_data())  # type: ignore
                         elif "/RF" in e:
-                            r = cast(ArrayObject, e["/RF"]["/F"])
+                            r = cast(
+                                ArrayObject, cast(DictionaryObject, e["/RF"])["/F"]
+                            )
                             di = {}
                             i = 0
                             while i < len(r):
-                                di[r[i]] = r[i + 1].get_object().get_data()
+                                di[cast(str, r[i])] = cast(
+                                    bytes, r[i + 1].get_object().get_data()
+                                )
                                 i += 2
                             d[k].append(di)
             return d
@@ -773,9 +782,10 @@ def add_attachment(
         self,
         filename: str,
         data: Union[str, bytes, List[Tuple[str, bytes]]],
+        overwrite: bool = True,
         fname: Optional[str] = None,
         desc: str = "",
-    ) -> DictionaryObject:
+    ) -> Optional[DictionaryObject]:
         """
         Embed a file inside the PDF.
 
@@ -793,6 +803,8 @@ def add_attachment(
         Returns:
             The filespec DictionaryObject
         """
+        if not overwrite and filename in self._list_attachments():
+            return None
         if fname is None:
             st = filename.replace("/", "\\/").replace("\\\\/", "\\/")
             fname = st.encode().decode("ansi", errors="xmlcharreplace")
@@ -862,7 +874,7 @@ def add_attachment(
             filespec[NameObject(FileSpecificationDictionaryEntries.EF)] = ef_entry
 
         nm = self._get_embedded_files_root() or self._create_attachment_root()
-        nm.list_add(filename, self._add_object(filespec))
+        nm.list_add(filename, filespec, overwrite=True)
         return filespec
 
     def addAttachment(self, fname: str, fdata: Union[str, bytes]) -> None:  # deprecated
@@ -872,7 +884,7 @@ def addAttachment(self, fname: str, fdata: Union[str, bytes]) -> None:  # deprec
         .. deprecated:: 1.28.0
         """
         deprecation_with_replacement("addAttachment", "add_attachment", "3.0.0")
-        return self.add_attachment(fname, fdata)
+        self.add_attachment(fname, fdata)
 
     def append_pages_from_reader(
         self,
diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 6e78f2543..a0c56c69f 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1457,11 +1457,14 @@ class NameTree(DictionaryObject):
     """
 
     def __init__(self, obj: Optional[PdfObject] = None) -> None:
+        DictionaryObject.__init__(self)
+        if obj is None:
+            self[NameObject("/Names")] = ArrayObject()
+            return
         if not isinstance(obj, DictionaryObject) or all(
             x not in obj for x in ("/Names", "/Kids")
         ):
             raise ValueError("source object is not a valid source object")
-        DictionaryObject.__init__(self)
         obj = cast(DictionaryObject, obj)
         if obj is not None:
             self.update(obj)
@@ -1603,16 +1606,20 @@ def list_add(
 
         def _update_limits(
             obj: DictionaryObject,
-            lo: Optional[TextStringObject],
-            hi: Optional[TextStringObject],
+            lo: Optional[Union[str, TextStringObject]],
+            hi: Optional[Union[str, TextStringObject]],
         ) -> bool:
             if "/Limits" not in obj:
                 return False
             a = cast("ArrayObject", obj["/Limits"])
             if lo is not None and lo < a[0]:
+                if not isinstance(lo, TextStringObject):
+                    lo = TextStringObject(lo)
                 a[0] = lo
                 return True
             if hi is not None and hi > a[0]:
+                if not isinstance(hi, TextStringObject):
+                    lo = TextStringObject(hi)
                 a[1] = hi
                 return True
             return False
@@ -1626,17 +1633,18 @@ def _add_in(
             o = cast(DictionaryObject, o)
             if "/Names" in o:
                 _l = cast(ArrayObject, o["/Names"])
-                li = o.get("/Limits", [_l[0], _l[-2]])
-                if not appb and key < li[0]:
-                    return None
-                if not app and key > li[1]:
-                    return None
+                if len(_l) > 0:
+                    li = o.get("/Limits", [_l[0], _l[-2]])
+                    if not appb and key < li[0]:
+                        return None
+                    if not app and key > li[1]:
+                        return None
                 i = 0
                 while i < len(_l):
                     if _l[i] == key:
-                        if not overwrite:
-                            continue
                         d = _l[i + 1]
+                        if not overwrite:
+                            return d
                         if isinstance(d, IndirectObject):
                             d.replace_object(data)
                         else:  # pragma: no cover
diff --git a/tests/test_writer.py b/tests/test_writer.py
index cab469903..cc0574459 100644
--- a/tests/test_writer.py
+++ b/tests/test_writer.py
@@ -1,1864 +1,1871 @@
-"""Test the pypdf._writer module."""
-import re
-import shutil
-import subprocess
-from io import BytesIO
-from pathlib import Path
-
-import pytest
-
-from pypdf import (
-    ObjectDeletionFlag,
-    PageObject,
-    PdfMerger,
-    PdfReader,
-    PdfWriter,
-    Transformation,
-)
-from pypdf.errors import DeprecationError, PageSizeNotDefinedError, PyPdfError
-from pypdf.generic import (
-    ArrayObject,
-    ContentStream,
-    DictionaryObject,
-    Fit,
-    IndirectObject,
-    NameObject,
-    NullObject,
-    NumberObject,
-    RectangleObject,
-    StreamObject,
-    TextStringObject,
-)
-
-from . import get_data_from_url, is_sublist
-from .test_images import image_similarity
-
-TESTS_ROOT = Path(__file__).parent.resolve()
-PROJECT_ROOT = TESTS_ROOT.parent
-RESOURCE_ROOT = PROJECT_ROOT / "resources"
-SAMPLE_ROOT = Path(PROJECT_ROOT) / "sample-files"
-GHOSTSCRIPT_BINARY = shutil.which("gs")
-
-
-def test_writer_exception_non_binary(tmp_path, caplog):
-    src = RESOURCE_ROOT / "pdflatex-outline.pdf"
-
-    reader = PdfReader(src)
-    writer = PdfWriter()
-    writer.add_page(reader.pages[0])
-
-    with open(tmp_path / "out.txt", "w") as fp, pytest.raises(TypeError):
-        writer.write_stream(fp)
-    ending = "to write to is not in binary mode. It may not be written to correctly.\n"
-    assert caplog.text.endswith(ending)
-
-
-def test_writer_clone():
-    src = RESOURCE_ROOT / "pdflatex-outline.pdf"
-
-    reader = PdfReader(src)
-    writer = PdfWriter(clone_from=reader)
-    assert len(writer.pages) == 4
-    assert "PageObject" in str(type(writer.pages[0]))
-
-    writer = PdfWriter(clone_from=src)
-    assert len(writer.pages) == 4
-    assert "PageObject" in str(type(writer.pages[0]))
-
-
-def test_writer_clone_bookmarks():
-    # Arrange
-    src = RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf"
-    reader = PdfReader(src)
-    writer = PdfWriter()
-
-    # Act + test cat
-    cat = ""
-
-    def cat1(p) -> None:
-        nonlocal cat
-        cat += p.__repr__()
-
-    writer.clone_document_from_reader(reader, cat1)
-    assert "/Page" in cat
-    assert writer.pages[0].raw_get("/Parent") == writer._pages
-    writer.add_outline_item("Page 1", 0)
-    writer.add_outline_item("Page 2", 1)
-
-    # Assert
-    bytes_stream = BytesIO()
-    writer.write(bytes_stream)
-    bytes_stream.seek(0)
-    reader2 = PdfReader(bytes_stream)
-    assert len(reader2.pages) == len(reader.pages)
-    assert len(reader2.outline) == 2
-
-    # test with append
-    writer = PdfWriter()
-    writer.append(reader)
-    writer.add_outline_item("Page 1", 0)
-    writer.add_outline_item("Page 2", 1)
-
-    # Assert
-    bytes_stream = BytesIO()
-    writer.write(bytes_stream)
-    bytes_stream.seek(0)
-    reader2 = PdfReader(bytes_stream)
-    assert len(reader2.pages) == len(reader.pages)
-    assert len(reader2.outline) == 2
-
-
-def writer_operate(writer: PdfWriter) -> None:
-    """
-    To test the writer that initialized by each of the four usages.
-
-    Args:
-        writer: A PdfWriter object
-    """
-    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
-    pdf_outline_path = RESOURCE_ROOT / "pdflatex-outline.pdf"
-
-    reader = PdfReader(pdf_path)
-    reader_outline = PdfReader(pdf_outline_path)
-
-    page = reader.pages[0]
-    with pytest.raises(PageSizeNotDefinedError) as exc:
-        writer.add_blank_page()
-    assert exc.value.args == ()
-    writer.insert_page(page, 1)
-    writer.insert_page(reader_outline.pages[0], 0)
-    writer.add_outline_item_destination(page)
-    writer.remove_links()
-    writer.add_outline_item_destination(page)
-    oi = writer.add_outline_item(
-        "An outline item", 0, None, (255, 0, 15), True, True, Fit.fit_box_vertically(10)
-    )
-    writer.add_outline_item(
-        "The XYZ fit", 0, oi, (255, 0, 15), True, True, Fit.xyz(left=10, top=20, zoom=3)
-    )
-    writer.add_outline_item(
-        "The XYZ fit no args", 0, oi, (255, 0, 15), True, True, Fit.xyz()
-    )
-    writer.add_outline_item(
-        "The FitH fit", 0, oi, (255, 0, 15), True, True, Fit.fit_horizontally(top=10)
-    )
-    writer.add_outline_item(
-        "The FitV fit", 0, oi, (255, 0, 15), True, True, Fit.fit_vertically(left=10)
-    )
-    writer.add_outline_item(
-        "The FitR fit",
-        0,
-        oi,
-        (255, 0, 15),
-        True,
-        True,
-        Fit.fit_rectangle(left=10, bottom=20, right=30, top=40),
-    )
-    writer.add_outline_item(
-        "The FitB fit", 0, oi, (255, 0, 15), True, True, Fit.fit_box()
-    )
-    writer.add_outline_item(
-        "The FitBH fit",
-        0,
-        oi,
-        (255, 0, 15),
-        True,
-        True,
-        Fit.fit_box_horizontally(top=10),
-    )
-    writer.add_outline_item(
-        "The FitBV fit",
-        0,
-        oi,
-        (255, 0, 15),
-        True,
-        True,
-        Fit.fit_box_vertically(left=10),
-    )
-    writer.add_blank_page()
-    writer.add_uri(2, "https://example.com", RectangleObject([0, 0, 100, 100]))
-    with pytest.warns(
-        DeprecationWarning, match="'pagenum' argument of add_uri is deprecated"
-    ):
-        writer.add_uri(
-            2, "https://example.com", RectangleObject([0, 0, 100, 100]), pagenum=2
-        )
-    with pytest.raises(DeprecationError):
-        writer.add_link(2, 1, RectangleObject([0, 0, 100, 100]))
-    assert writer._get_page_layout() is None
-    writer.page_layout = "broken"
-    assert writer.page_layout == "broken"
-    writer.page_layout = NameObject("/SinglePage")
-    assert writer._get_page_layout() == "/SinglePage"
-    assert writer._get_page_mode() is None
-    writer.set_page_mode("/UseNone")
-    assert writer._get_page_mode() == "/UseNone"
-    writer.set_page_mode(NameObject("/UseOC"))
-    assert writer._get_page_mode() == "/UseOC"
-    writer.insert_blank_page(width=100, height=100)
-    writer.insert_blank_page()  # without parameters
-
-    writer.remove_images()
-
-    writer.add_metadata(reader.metadata)
-    writer.add_metadata({"/Author": "Martin Thoma"})
-    writer.add_metadata({"/MyCustom": 1234})
-
-    writer.add_attachment("foobar.gif", b"foobarcontent")
-
-    # Check that every key in _idnum_hash is correct
-    objects_hash = [o.hash_value() for o in writer._objects]
-    for k, v in writer._idnum_hash.items():
-        assert v.pdf == writer
-        assert k in objects_hash, f"Missing {v}"
-
-
-tmp_path = "dont_commit_writer.pdf"
-
-
-@pytest.mark.parametrize(
-    ("write_data_here", "needs_cleanup"),
-    [
-        ("dont_commit_writer.pdf", True),
-        (Path("dont_commit_writer.pdf"), True),
-        (BytesIO(), False),
-    ],
-)
-def test_writer_operations_by_traditional_usage(write_data_here, needs_cleanup):
-    writer = PdfWriter()
-
-    writer_operate(writer)
-
-    # finally, write "output" to pypdf-output.pdf
-    if needs_cleanup:
-        with open(write_data_here, "wb") as output_stream:
-            writer.write(output_stream)
-    else:
-        output_stream = write_data_here
-        writer.write(output_stream)
-
-    if needs_cleanup:
-        Path(write_data_here).unlink()
-
-
-@pytest.mark.parametrize(
-    ("write_data_here", "needs_cleanup"),
-    [
-        ("dont_commit_writer.pdf", True),
-        (Path("dont_commit_writer.pdf"), True),
-        (BytesIO(), False),
-    ],
-)
-def test_writer_operations_by_semi_traditional_usage(write_data_here, needs_cleanup):
-    with PdfWriter() as writer:
-        writer_operate(writer)
-
-        # finally, write "output" to pypdf-output.pdf
-        if needs_cleanup:
-            with open(write_data_here, "wb") as output_stream:
-                writer.write(output_stream)
-        else:
-            output_stream = write_data_here
-            writer.write(output_stream)
-
-    if needs_cleanup:
-        Path(write_data_here).unlink()
-
-
-@pytest.mark.parametrize(
-    ("write_data_here", "needs_cleanup"),
-    [
-        ("dont_commit_writer.pdf", True),
-        (Path("dont_commit_writer.pdf"), True),
-        (BytesIO(), False),
-    ],
-)
-def test_writer_operations_by_semi_new_traditional_usage(
-    write_data_here, needs_cleanup
-):
-    with PdfWriter() as writer:
-        writer_operate(writer)
-
-        # finally, write "output" to pypdf-output.pdf
-        writer.write(write_data_here)
-
-    if needs_cleanup:
-        Path(write_data_here).unlink()
-
-
-@pytest.mark.parametrize(
-    ("write_data_here", "needs_cleanup"),
-    [
-        ("dont_commit_writer.pdf", True),
-        (Path("dont_commit_writer.pdf"), True),
-        (BytesIO(), False),
-    ],
-)
-def test_writer_operation_by_new_usage(write_data_here, needs_cleanup):
-    # This includes write "output" to pypdf-output.pdf
-    with PdfWriter(write_data_here) as writer:
-        writer_operate(writer)
-
-    if needs_cleanup:
-        Path(write_data_here).unlink()
-
-
-@pytest.mark.parametrize(
-    "input_path",
-    [
-        "side-by-side-subfig.pdf",
-        "reportlab-inline-image.pdf",
-    ],
-)
-def test_remove_images(pdf_file_path, input_path):
-    pdf_path = RESOURCE_ROOT / input_path
-
-    reader = PdfReader(pdf_path)
-    writer = PdfWriter()
-
-    page = reader.pages[0]
-    writer.insert_page(page, 0)
-    writer.remove_images()
-    page_contents_stream = writer.pages[0]["/Contents"]._data
-    assert len(page_contents_stream.strip())
-
-    # finally, write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-    with open(pdf_file_path, "rb") as input_stream:
-        reader = PdfReader(input_stream)
-        if input_path == "side-by-side-subfig.pdf":
-            extracted_text = reader.pages[0].extract_text()
-            assert extracted_text
-            assert "Lorem ipsum dolor sit amet" in extracted_text
-
-
-@pytest.mark.enable_socket()
-def test_remove_images_sub_level():
-    """Cf #2035"""
-    url = "https://github.com/py-pdf/pypdf/files/12394781/2210.03142-1.pdf"
-    name = "iss2103.pdf"
-    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
-    writer.remove_images()
-    assert (
-        len(
-            [
-                o.get_object()
-                for o in writer.pages[0]["/Resources"]["/XObject"]["/Fm1"][
-                    "/Resources"
-                ]["/XObject"]["/Im1"]["/Resources"]["/XObject"].values()
-                if not isinstance(o.get_object(), NullObject)
-            ]
-        )
-        == 0
-    )
-
-
-@pytest.mark.parametrize(
-    "input_path",
-    [
-        "side-by-side-subfig.pdf",
-        "reportlab-inline-image.pdf",
-    ],
-)
-def test_remove_text(input_path, pdf_file_path):
-    pdf_path = RESOURCE_ROOT / input_path
-
-    reader = PdfReader(pdf_path)
-    writer = PdfWriter()
-
-    page = reader.pages[0]
-    writer.insert_page(page, 0)
-    writer.remove_text()
-
-    # finally, write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_remove_text_all_operators(pdf_file_path):
-    stream = (
-        b"BT "
-        b"/F0 36 Tf "
-        b"50 706 Td "
-        b"36 TL "
-        b"(The Tj operator) Tj "
-        b'1 2 (The double quote operator) " '
-        b"(The single quote operator) ' "
-        b"ET"
-    )
-    pdf_data = (
-        b"%%PDF-1.7\n"
-        b"1 0 obj << /Count 1 /Kids [5 0 R] /Type /Pages >> endobj\n"
-        b"2 0 obj << >> endobj\n"
-        b"3 0 obj << >> endobj\n"
-        b"4 0 obj << /Length %d >>\n"
-        b"stream\n" + (b"%s\n" % stream) + b"endstream\n"
-        b"endobj\n"
-        b"5 0 obj << /Contents 4 0 R /CropBox [0.0 0.0 2550.0 3508.0]\n"
-        b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
-        b" /Resources << /Font << >> >>"
-        b" /Rotate 0 /Type /Page >> endobj\n"
-        b"6 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
-        b"xref 1 6\n"
-        b"%010d 00000 n\n"
-        b"%010d 00000 n\n"
-        b"%010d 00000 n\n"
-        b"%010d 00000 n\n"
-        b"%010d 00000 n\n"
-        b"%010d 00000 n\n"
-        b"trailer << /Root 6 0 R /Size 6 >>\n"
-        b"startxref\n%d\n"
-        b"%%%%EOF"
-    )
-    startx_correction = -1
-    pdf_data = pdf_data % (
-        len(stream),
-        pdf_data.find(b"1 0 obj") + startx_correction,
-        pdf_data.find(b"2 0 obj") + startx_correction,
-        pdf_data.find(b"3 0 obj") + startx_correction,
-        pdf_data.find(b"4 0 obj") + startx_correction,
-        pdf_data.find(b"5 0 obj") + startx_correction,
-        pdf_data.find(b"6 0 obj") + startx_correction,
-        # startx_correction should be -1 due to double % at the beginning
-        # inducing an error on startxref computation
-        pdf_data.find(b"xref"),
-    )
-    pdf_stream = BytesIO(pdf_data)
-
-    reader = PdfReader(pdf_stream, strict=False)
-    writer = PdfWriter()
-
-    page = reader.pages[0]
-    writer.insert_page(page, 0)
-    writer.remove_text()
-
-    # finally, write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_write_metadata(pdf_file_path):
-    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
-
-    reader = PdfReader(pdf_path)
-    writer = PdfWriter()
-
-    writer.add_page(reader.pages[0])
-    for page in reader.pages:
-        writer.add_page(page)
-
-    metadata = reader.metadata
-    writer.add_metadata(metadata)
-
-    writer.add_metadata({"/Title": "The Crazy Ones"})
-
-    # finally, write data to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-    # Check if the title was set
-    reader = PdfReader(pdf_file_path)
-    metadata = reader.metadata
-    assert metadata.get("/Title") == "The Crazy Ones"
-
-
-def test_fill_form(pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
-    writer = PdfWriter()
-
-    writer.append(reader, [0])
-    writer.append(RESOURCE_ROOT / "crazyones.pdf", [0])
-
-    writer.update_page_form_field_values(
-        writer.pages[0], {"foo": "some filled in text"}, flags=1
-    )
-
-    # check if no fields to fill in the page
-    writer.update_page_form_field_values(
-        writer.pages[1], {"foo": "some filled in text"}, flags=1
-    )
-
-    writer.update_page_form_field_values(
-        writer.pages[0], {"foo": "some filled in text"}
-    )
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_fill_form_with_qualified():
-    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
-    reader.add_form_topname("top")
-
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.add_page(reader.pages[0])
-    writer.update_page_form_field_values(
-        writer.pages[0], {"top.foo": "filling"}, flags=1
-    )
-    b = BytesIO()
-    writer.write(b)
-
-    reader2 = PdfReader(b)
-    fields = reader2.get_fields()
-    assert fields["top.foo"]["/V"] == "filling"
-
-
-@pytest.mark.parametrize(
-    ("use_128bit", "user_password", "owner_password"),
-    [(True, "userpwd", "ownerpwd"), (False, "userpwd", "ownerpwd")],
-)
-def test_encrypt(use_128bit, user_password, owner_password, pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
-    writer = PdfWriter()
-
-    page = reader.pages[0]
-    orig_text = page.extract_text()
-
-    writer.add_page(page)
-
-    with pytest.raises(ValueError, match="owner_pwd of encrypt is deprecated."):
-        writer.encrypt(
-            owner_pwd=user_password,
-            owner_password=owner_password,
-            user_password=user_password,
-            use_128bit=use_128bit,
-        )
-    with pytest.raises(ValueError, match="'user_pwd' argument is deprecated"):
-        writer.encrypt(
-            owner_password=owner_password,
-            user_password=user_password,
-            user_pwd=user_password,
-            use_128bit=use_128bit,
-        )
-    writer.encrypt(
-        user_password=user_password,
-        owner_password=owner_password,
-        use_128bit=use_128bit,
-    )
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-    # Test that the data is not there in clear text
-    with open(pdf_file_path, "rb") as input_stream:
-        data = input_stream.read()
-    assert b"foo" not in data
-
-    # Test the user password (str):
-    reader = PdfReader(pdf_file_path, password="userpwd")
-    new_text = reader.pages[0].extract_text()
-    assert reader.metadata.get("/Producer") == "pypdf"
-    assert new_text == orig_text
-
-    # Test the owner password (str):
-    reader = PdfReader(pdf_file_path, password="ownerpwd")
-    new_text = reader.pages[0].extract_text()
-    assert reader.metadata.get("/Producer") == "pypdf"
-    assert new_text == orig_text
-
-    # Test the user password (bytes):
-    reader = PdfReader(pdf_file_path, password=b"userpwd")
-    new_text = reader.pages[0].extract_text()
-    assert reader.metadata.get("/Producer") == "pypdf"
-    assert new_text == orig_text
-
-    # Test the owner password (stbytesr):
-    reader = PdfReader(pdf_file_path, password=b"ownerpwd")
-    new_text = reader.pages[0].extract_text()
-    assert reader.metadata.get("/Producer") == "pypdf"
-    assert new_text == orig_text
-
-
-def test_add_outline_item(pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
-    writer = PdfWriter()
-
-    for page in reader.pages:
-        writer.add_page(page)
-
-    outline_item = writer.add_outline_item(
-        "An outline item",
-        1,
-        None,
-        (255, 0, 15),
-        True,
-        True,
-        Fit.fit(),
-        is_open=False,
-    )
-    _o2a = writer.add_outline_item(
-        "Another", 2, outline_item, None, False, False, Fit.fit()
-    )
-    _o2b = writer.add_outline_item(
-        "Another bis", 2, outline_item, None, False, False, Fit.fit()
-    )
-    outline_item2 = writer.add_outline_item(
-        "An outline item 2",
-        1,
-        None,
-        (255, 0, 15),
-        True,
-        True,
-        Fit.fit(),
-        is_open=True,
-    )
-    _o3a = writer.add_outline_item(
-        "Another 2", 2, outline_item2, None, False, False, Fit.fit()
-    )
-    _o3b = writer.add_outline_item(
-        "Another 2bis", 2, outline_item2, None, False, False, Fit.fit()
-    )
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "w+b") as output_stream:
-        writer.write(output_stream)
-        output_stream.seek(0)
-        reader = PdfReader(output_stream)
-        assert reader.trailer["/Root"]["/Outlines"]["/Count"] == 3
-        assert reader.outline[0]["/Count"] == -2
-        assert reader.outline[0]["/%is_open%"] == False  # noqa
-        assert reader.outline[2]["/Count"] == 2
-        assert reader.outline[2]["/%is_open%"] == True  # noqa
-        assert reader.outline[1][0]["/Count"] == 0
-
-
-def test_add_named_destination(pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
-    writer = PdfWriter()
-    assert writer.get_named_dest_root() == []
-
-    for page in reader.pages:
-        writer.add_page(page)
-
-    assert writer.get_named_dest_root() == []
-
-    writer.add_named_destination(TextStringObject("A named dest"), 2)
-    writer.add_named_destination(TextStringObject("A named dest2"), 2)
-
-    with pytest.warns(DeprecationWarning, match="pagenum is deprecated as an argument"):
-        writer.add_named_destination(TextStringObject("A named dest3"), pagenum=2)
-
-    with pytest.raises(ValueError):
-        writer.add_named_destination(
-            TextStringObject("A named dest3"), pagenum=2, page_number=2
-        )
-
-    root = writer.get_named_dest_root()
-    assert root[0] == "A named dest"
-    assert root[1].pdf == writer
-    assert root[1].get_object()["/S"] == NameObject("/GoTo")
-    assert root[1].get_object()["/D"][0] == writer.pages[2].indirect_reference
-    assert root[2] == "A named dest2"
-    assert root[3].pdf == writer
-    assert root[3].get_object()["/S"] == NameObject("/GoTo")
-    assert root[3].get_object()["/D"][0] == writer.pages[2].indirect_reference
-    assert root[4] == "A named dest3"
-
-    # test get_object
-
-    assert writer.get_object(root[1].idnum) == writer.get_object(root[1])
-    with pytest.raises(ValueError) as exc:
-        writer.get_object(reader.pages[0].indirect_reference)
-    assert exc.value.args[0] == "pdf must be self"
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_add_named_destination_sort_order(pdf_file_path):
-    """
-    Issue #1927 does not appear.
-
-    add_named_destination() maintains the named destination list sort order
-    """
-    writer = PdfWriter()
-
-    assert writer.get_named_dest_root() == []
-
-    writer.add_blank_page(200, 200)
-    writer.add_named_destination("b", 0)
-    # "a" should be moved before "b" on insert
-    writer.add_named_destination("a", 0)
-
-    root = writer.get_named_dest_root()
-
-    assert len(root) == 4
-    assert (
-        root[0] == "a"
-    ), '"a" was not inserted before "b" in the named destination root'
-    assert root[2] == "b"
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_add_uri(pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
-    writer = PdfWriter()
-
-    for page in reader.pages:
-        writer.add_page(page)
-
-    writer.add_uri(
-        1,
-        "http://www.example.com",
-        RectangleObject([0, 0, 100, 100]),
-        border=[1, 2, 3, [4]],
-    )
-    writer.add_uri(
-        2,
-        "https://pypdf.readthedocs.io/en/latest/",
-        RectangleObject([20, 30, 50, 80]),
-        border=[1, 2, 3],
-    )
-    writer.add_uri(
-        3,
-        "https://pypdf.readthedocs.io/en/latest/user/adding-pdf-annotations.html",
-        "[ 200 300 250 350 ]",
-        border=[0, 0, 0],
-    )
-    writer.add_uri(
-        3,
-        "https://pypdf.readthedocs.io/en/latest/user/adding-pdf-annotations.html",
-        [100, 200, 150, 250],
-        border=[0, 0, 0],
-    )
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_add_link(pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
-    writer = PdfWriter()
-
-    for page in reader.pages:
-        writer.add_page(page)
-
-    with pytest.raises(
-        DeprecationError,
-        match=(
-            re.escape(
-                "add_link is deprecated and was removed in pypdf 3.0.0. "
-                "Use add_annotation(pypdf.annotations.Link(...)) instead."
-            )
-        ),
-    ):
-        writer.add_link(
-            1,
-            2,
-            RectangleObject([0, 0, 100, 100]),
-            border=[1, 2, 3, [4]],
-            fit="/Fit",
-        )
-        writer.add_link(
-            2, 3, RectangleObject([20, 30, 50, 80]), [1, 2, 3], "/FitH", None
-        )
-        writer.add_link(
-            3,
-            0,
-            "[ 200 300 250 350 ]",
-            [0, 0, 0],
-            "/XYZ",
-            0,
-            0,
-            2,
-        )
-        writer.add_link(
-            3,
-            0,
-            [100, 200, 150, 250],
-            border=[0, 0, 0],
-        )
-
-    # write "output" to pypdf-output.pdf
-    with open(pdf_file_path, "wb") as output_stream:
-        writer.write(output_stream)
-
-
-def test_io_streams():
-    """This is the example from the docs ("Streaming data")."""
-    filepath = RESOURCE_ROOT / "pdflatex-outline.pdf"
-    with open(filepath, "rb") as fh:
-        bytes_stream = BytesIO(fh.read())
-
-    # Read from bytes stream
-    reader = PdfReader(bytes_stream)
-    assert len(reader.pages) == 4
-
-    # Write to bytes stream
-    writer = PdfWriter()
-    with BytesIO() as output_stream:
-        writer.write(output_stream)
-
-
-def test_regression_issue670(pdf_file_path):
-    filepath = RESOURCE_ROOT / "crazyones.pdf"
-    reader = PdfReader(filepath, strict=False)
-    for _ in range(2):
-        writer = PdfWriter()
-        writer.add_page(reader.pages[0])
-        with open(pdf_file_path, "wb") as f_pdf:
-            writer.write(f_pdf)
-
-
-def test_issue301():
-    """Test with invalid stream length object."""
-    with open(RESOURCE_ROOT / "issue-301.pdf", "rb") as f:
-        reader = PdfReader(f)
-        writer = PdfWriter()
-        writer.append_pages_from_reader(reader)
-        b = BytesIO()
-        writer.write(b)
-
-
-def test_append_pages_from_reader_append():
-    """Use append_pages_from_reader with a callable."""
-    with open(RESOURCE_ROOT / "issue-301.pdf", "rb") as f:
-        reader = PdfReader(f)
-        writer = PdfWriter()
-        writer.append_pages_from_reader(reader, callable)
-        b = BytesIO()
-        writer.write(b)
-
-
-@pytest.mark.enable_socket()
-@pytest.mark.slow()
-@pytest.mark.filterwarnings("ignore::DeprecationWarning")
-def test_sweep_indirect_references_nullobject_exception(pdf_file_path):
-    # TODO: Check this more closely... this looks weird
-    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
-    name = "tika-924666.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    merger = PdfMerger()
-    merger.append(reader)
-    merger.write(pdf_file_path)
-
-
-@pytest.mark.enable_socket()
-@pytest.mark.slow()
-@pytest.mark.parametrize(
-    ("url", "name"),
-    [
-        (
-            "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf",
-            "test_sweep_indirect_references_nullobject_exception.pdf",
-        ),
-        (
-            "https://corpora.tika.apache.org/base/docs/govdocs1/922/922840.pdf",
-            "test_write_outline_item_on_page_fitv.pdf",
-        ),
-        ("https://github.com/py-pdf/pypdf/files/10715624/test.pdf", "iss1627.pdf"),
-    ],
-)
-@pytest.mark.filterwarnings("ignore::DeprecationWarning")
-def test_some_appends(pdf_file_path, url, name):
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    # PdfMerger
-    merger = PdfMerger()
-    merger.append(reader)
-    merger.write(pdf_file_path)
-    # PdfWriter
-    merger = PdfWriter()
-    merger.append(reader)
-    merger.write(pdf_file_path)
-
-
-def test_pdf_header():
-    writer = PdfWriter()
-    assert writer.pdf_header == b"%PDF-1.3"
-
-    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
-    writer.add_page(reader.pages[0])
-    assert writer.pdf_header == b"%PDF-1.5"
-
-    writer.pdf_header = b"%PDF-1.6"
-    assert writer.pdf_header == b"%PDF-1.6"
-
-
-def test_write_dict_stream_object(pdf_file_path):
-    stream = (
-        b"BT "
-        b"/F0 36 Tf "
-        b"50 706 Td "
-        b"36 TL "
-        b"(The Tj operator) Tj "
-        b'1 2 (The double quote operator) " '
-        b"(The single quote operator) ' "
-        b"ET"
-    )
-
-    stream_object = StreamObject()
-    stream_object[NameObject("/Type")] = NameObject("/Text")
-    stream_object._data = stream
-
-    writer = PdfWriter()
-
-    page_object = PageObject.create_blank_page(writer, 1000, 1000)
-    # Construct dictionary object (PageObject) with stream object
-    # Writer will replace this stream object with indirect object
-    page_object[NameObject("/Test")] = stream_object
-
-    page_object = writer.add_page(page_object)
-    with open(pdf_file_path, "wb") as fp:
-        writer.write(fp)
-
-    for k, v in page_object.items():
-        if k == "/Test":
-            assert str(v) != str(stream_object)
-            assert isinstance(v, IndirectObject)
-            assert str(v.get_object()) == str(stream_object)
-            break
-    else:
-        pytest.fail("/Test not found")
-
-    # Check that every key in _idnum_hash is correct
-    objects_hash = [o.hash_value() for o in writer._objects]
-    for k, v in writer._idnum_hash.items():
-        assert v.pdf == writer
-        assert k in objects_hash, "Missing %s" % v
-
-
-def test_add_single_annotation(pdf_file_path):
-    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
-    reader = PdfReader(pdf_path)
-    page = reader.pages[0]
-    writer = PdfWriter()
-    writer.add_page(page)
-
-    annot_dict = {
-        "/Type": "/Annot",
-        "/Subtype": "/Text",
-        "/Rect": [270.75, 596.25, 294.75, 620.25],
-        "/Contents": "Note in second paragraph",
-        "/C": [1, 1, 0],
-        "/M": "D:20220406191858+02'00",
-        "/Popup": {
-            "/Type": "/Annot",
-            "/Subtype": "/Popup",
-            "/Rect": [294.75, 446.25, 494.75, 596.25],
-            "/M": "D:20220406191847+02'00",
-        },
-        "/T": "moose",
-    }
-    writer.add_annotation(0, annot_dict)
-
-    # Inspect manually by adding 'assert False' and viewing the PDF
-    with open(pdf_file_path, "wb") as fp:
-        writer.write(fp)
-
-
-def test_deprecation_bookmark_decorator():
-    reader = PdfReader(RESOURCE_ROOT / "outlines-with-invalid-destinations.pdf")
-    page = reader.pages[0]
-    outline_item = reader.outline[0]
-    writer = PdfWriter()
-    writer.add_page(page)
-    with pytest.raises(
-        DeprecationError,
-        match="bookmark is deprecated as an argument. Use outline_item instead",
-    ):
-        writer.add_outline_item_dict(bookmark=outline_item)
-
-
-@pytest.mark.samples()
-def test_colors_in_outline_item(pdf_file_path):
-    reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf")
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    purple_rgb = (0.5019607843137255, 0.0, 0.5019607843137255)
-    writer.add_outline_item("First Outline Item", page_number=2, color="800080")
-    writer.add_outline_item("Second Outline Item", page_number=3, color="#800080")
-    writer.add_outline_item("Third Outline Item", page_number=4, color=purple_rgb)
-
-    with open(pdf_file_path, "wb") as f:
-        writer.write(f)
-
-    reader2 = PdfReader(pdf_file_path)
-    for outline_item in reader2.outline:
-        # convert float to string because of mutability
-        assert ["%.5f" % c for c in outline_item.color] == [
-            "%.5f" % p for p in purple_rgb
-        ]
-
-
-@pytest.mark.samples()
-def test_write_empty_stream():
-    reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf")
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-
-    with pytest.raises(ValueError) as exc:
-        writer.write("")
-    assert exc.value.args[0] == "Output(stream=) is empty."
-
-
-def test_startup_dest():
-    pdf_file_writer = PdfWriter()
-    pdf_file_writer.append_pages_from_reader(PdfReader(RESOURCE_ROOT / "issue-604.pdf"))
-
-    assert pdf_file_writer.open_destination is None
-    pdf_file_writer.open_destination = pdf_file_writer.pages[9]
-    # checked also using Acrobrat to verify the good page is opened
-    op = pdf_file_writer._root_object["/OpenAction"]
-    assert op[0] == pdf_file_writer.pages[9].indirect_reference
-    assert op[1] == "/Fit"
-    op = pdf_file_writer.open_destination
-    assert op.raw_get("/Page") == pdf_file_writer.pages[9].indirect_reference
-    assert op["/Type"] == "/Fit"
-    pdf_file_writer.open_destination = op
-    assert pdf_file_writer.open_destination == op
-
-    # irrelevant, just for coverage
-    pdf_file_writer._root_object[NameObject("/OpenAction")][0] = NumberObject(0)
-    pdf_file_writer.open_destination
-    with pytest.raises(Exception) as exc:
-        del pdf_file_writer._root_object[NameObject("/OpenAction")][0]
-        pdf_file_writer.open_destination
-    assert "Invalid Destination" in str(exc.value)
-
-    pdf_file_writer.open_destination = "Test"
-    # checked also using Acrobrat to verify open_destination
-    op = pdf_file_writer._root_object["/OpenAction"]
-    assert isinstance(op, TextStringObject)
-    assert op == "Test"
-    op = pdf_file_writer.open_destination
-    assert isinstance(op, TextStringObject)
-    assert op == "Test"
-
-    # irrelevant, this is just for coverage
-    pdf_file_writer._root_object[NameObject("/OpenAction")] = NumberObject(0)
-    assert pdf_file_writer.open_destination is None
-    pdf_file_writer.open_destination = None
-    assert "/OpenAction" not in pdf_file_writer._root_object
-    pdf_file_writer.open_destination = None
-
-
-@pytest.mark.enable_socket()
-def test_iss471():
-    url = "https://github.com/py-pdf/pypdf/files/9139245/book.pdf"
-    name = "book_471.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-
-    writer = PdfWriter()
-    writer.append(reader, excluded_fields=[])
-    assert isinstance(
-        writer.pages[0]["/Annots"][0].get_object()["/Dest"], TextStringObject
-    )
-
-
-@pytest.mark.enable_socket()
-def test_reset_translation():
-    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
-    name = "tika-924666.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader, (0, 10))
-    nb = len(writer._objects)
-    writer.append(reader, (0, 10))
-    assert (
-        len(writer._objects) == nb + 11
-    )  # +10 (pages) +1 because of the added outline
-    nb += 1
-    writer.reset_translation(reader)
-    writer.append(reader, (0, 10))
-    assert len(writer._objects) >= nb + 200
-    nb = len(writer._objects)
-    writer.reset_translation(reader.pages[0].indirect_reference)
-    writer.append(reader, (0, 10))
-    assert len(writer._objects) >= nb + 200
-    nb = len(writer._objects)
-    writer.reset_translation()
-    writer.append(reader, (0, 10))
-    assert len(writer._objects) >= nb + 200
-    nb = len(writer.pages)
-    writer.append(reader, [reader.pages[0], reader.pages[0]])
-    assert len(writer.pages) == nb + 2
-
-
-def test_threads_empty():
-    writer = PdfWriter()
-    thr = writer.threads
-    assert isinstance(thr, ArrayObject)
-    assert len(thr) == 0
-    thr2 = writer.threads
-    assert thr == thr2
-
-
-@pytest.mark.enable_socket()
-def test_append_without_annots_and_articles():
-    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
-    name = "tika-924666.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader, None, (0, 10), True, ["/B"])
-    writer.reset_translation()
-    writer.append(reader, (0, 10), True, ["/B"])
-    assert writer.threads == []
-    writer = PdfWriter()
-    writer.append(reader, None, (0, 10), True, ["/Annots"])
-    assert "/Annots" not in writer.pages[5]
-    writer = PdfWriter()
-    writer.append(reader, None, (0, 10), True, [])
-    assert "/Annots" in writer.pages[5]
-    assert len(writer.threads) >= 1
-
-
-@pytest.mark.enable_socket()
-def test_append_multiple():
-    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
-    name = "tika-924666.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(
-        reader, [0, 0, 0]
-    )  # to demonstre multiple insertion of same page at once
-    writer.append(reader, [0, 0, 0])  # second pack
-    pages = writer._root_object["/Pages"]["/Kids"]
-    assert pages[0] not in pages[1:]  # page not repeated
-    assert pages[-1] not in pages[0:-1]  # page not repeated
-
-
-@pytest.mark.samples()
-def test_set_page_label(pdf_file_path):
-    src = RESOURCE_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"  # File without labels
-    reader = PdfReader(src)
-
-    expected = [
-        "i",
-        "ii",
-        "1",
-        "2",
-        "A",
-        "B",
-        "1",
-        "2",
-        "3",
-        "4",
-        "A",
-        "i",
-        "I",
-        "II",
-        "1",
-        "2",
-        "3",
-        "I",
-        "II",
-    ]
-
-    # Tests full lenght with labels assigned at first and last elements
-    # Tests different labels assigned to consecutive ranges
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.set_page_label(0, 1, "/r")
-    writer.set_page_label(4, 5, "/A")
-    writer.set_page_label(10, 10, "/A")
-    writer.set_page_label(11, 11, "/r")
-    writer.set_page_label(12, 13, "/R")
-    writer.set_page_label(17, 18, "/R")
-    writer.write(pdf_file_path)
-    assert PdfReader(pdf_file_path).page_labels == expected
-
-    writer = PdfWriter()  # Same labels, different set order
-    writer.clone_document_from_reader(reader)
-    writer.set_page_label(17, 18, "/R")
-    writer.set_page_label(4, 5, "/A")
-    writer.set_page_label(10, 10, "/A")
-    writer.set_page_label(0, 1, "/r")
-    writer.set_page_label(12, 13, "/R")
-    writer.set_page_label(11, 11, "/r")
-    writer.write(pdf_file_path)
-    assert PdfReader(pdf_file_path).page_labels == expected
-
-    # Tests labels assigned only in the middle
-    # Tests label assigned to a range already containing labled ranges
-    expected = ["1", "2", "i", "ii", "iii", "iv", "v", "1"]
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.set_page_label(3, 4, "/a")
-    writer.set_page_label(5, 5, "/A")
-    writer.set_page_label(2, 6, "/r")
-    writer.write(pdf_file_path)
-    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
-
-    # Tests labels assigned inside a previously existing range
-    expected = ["1", "2", "i", "a", "b", "A", "1", "1", "2"]
-    # Ones repeat because user didnt cover the entire original range
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.set_page_label(2, 6, "/r")
-    writer.set_page_label(3, 4, "/a")
-    writer.set_page_label(5, 5, "/A")
-    writer.write(pdf_file_path)
-    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
-
-    # Tests invalid user input
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    with pytest.raises(
-        ValueError, match="at least one between style and prefix must be given"
-    ):
-        writer.set_page_label(0, 5, start=2)
-    with pytest.raises(
-        ValueError, match="page_index_from must be equal or greater then 0"
-    ):
-        writer.set_page_label(-1, 5, "/r")
-    with pytest.raises(
-        ValueError, match="page_index_to must be equal or greater then page_index_from"
-    ):
-        writer.set_page_label(5, 0, "/r")
-    with pytest.raises(ValueError, match="page_index_to exceeds number of pages"):
-        writer.set_page_label(0, 19, "/r")
-    with pytest.raises(
-        ValueError, match="if given, start must be equal or greater than one"
-    ):
-        writer.set_page_label(0, 5, "/r", start=-1)
-
-    pdf_file_path.unlink()
-
-    src = (
-        SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf"
-    )  # File with pre existing labels
-    reader = PdfReader(src)
-
-    # Tests adding labels to existing ones
-    expected = ["i", "ii", "A", "B", "1"]
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.set_page_label(2, 3, "/A")
-    writer.write(pdf_file_path)
-    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
-
-    # Tests replacing existing lables
-    expected = ["A", "B", "1", "1", "2"]
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.set_page_label(0, 1, "/A")
-    writer.write(pdf_file_path)
-    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
-
-    pdf_file_path.unlink()
-
-    # Tests prefix and start.
-    src = RESOURCE_ROOT / "issue-604.pdf"  # File without page labels
-    reader = PdfReader(src)
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-
-    writer.set_page_label(0, 0, prefix="FRONT")
-    writer.set_page_label(1, 2, "/D", start=2)
-    writer.set_page_label(3, 6, prefix="UPDATES")
-    writer.set_page_label(7, 10, "/D", prefix="THYR-")
-    writer.set_page_label(11, 21, "/D", prefix="PAP-")
-    writer.set_page_label(22, 30, "/D", prefix="FOLL-")
-    writer.set_page_label(31, 39, "/D", prefix="HURT-")
-    writer.write(pdf_file_path)
-
-
-@pytest.mark.enable_socket()
-def test_iss1601():
-    url = "https://github.com/py-pdf/pypdf/files/10579503/badges-38.pdf"
-    name = "badge-38.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    original_cs_operations = ContentStream(
-        reader.pages[0].get_contents(), reader
-    ).operations
-    writer = PdfWriter()
-    page_1 = writer.add_blank_page(
-        reader.pages[0].mediabox[2], reader.pages[0].mediabox[3]
-    )
-    page_1.merge_transformed_page(reader.pages[0], Transformation())
-    page_1_cs_operations = page_1.get_contents().operations
-    assert is_sublist(original_cs_operations, page_1_cs_operations)
-    page_1 = writer.add_blank_page(
-        reader.pages[0].mediabox[2], reader.pages[0].mediabox[3]
-    )
-    page_1.merge_page(reader.pages[0])
-    page_1_cs_operations = page_1.get_contents().operations
-    assert is_sublist(original_cs_operations, page_1_cs_operations)
-
-
-def test_attachments():
-    writer = PdfWriter()
-    writer.add_blank_page(100, 100)
-    b = BytesIO()
-    writer.write(b)
-    b.seek(0)
-    reader = PdfReader(b)
-    b = None
-    assert reader.attachments == {}
-    assert reader._list_attachments() == []
-    assert reader._get_attachments() == {}
-    to_add = [
-        ("foobar.txt", b"foobarcontent"),
-        ("foobar2.txt", b"foobarcontent2"),
-        ("foobar2.txt", b"2nd_foobarcontent"),
-    ]
-    for name, content in to_add:
-        writer.add_attachment(name, content)
-
-    b = BytesIO()
-    writer.write(b)
-    b.seek(0)
-    reader = PdfReader(b)
-    b = None
-    assert sorted(reader.attachments.keys()) == sorted({name for name, _ in to_add})
-    assert reader.attachments == {
-        "foobar.txt": [b"foobarcontent"],
-        "foobar2.txt": [b"foobarcontent2", b"2nd_foobarcontent"],
-    }
-    assert reader._list_attachments() == [name for name, _ in to_add]
-
-    # We've added the same key twice - hence only 2 and not 3:
-    att = reader._get_attachments()
-    assert len(att) == 2  # we have 2 keys, but 3 attachments!
-
-    # The content for foobar.txt is clear and just a single value:
-    assert att["foobar.txt"] == b"foobarcontent"
-
-    # The content for foobar2.txt is a list!
-    att = reader._get_attachments("foobar2.txt")
-    assert len(att) == 1
-    assert att["foobar2.txt"] == [b"foobarcontent2", b"2nd_foobarcontent"]
-
-    # Let's do both cases with the public interface:
-    assert reader.attachments["foobar.txt"][0] == b"foobarcontent"
-    assert reader.attachments["foobar2.txt"][0] == b"foobarcontent2"
-    assert reader.attachments["foobar2.txt"][1] == b"2nd_foobarcontent"
-
-
-@pytest.mark.enable_socket()
-def test_iss1614():
-    # test of an annotation(link) directly stored in the /Annots in the page
-    url = "https://github.com/py-pdf/pypdf/files/10669995/broke.pdf"
-    name = "iss1614.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader)
-    # test for 2nd error case reported in #1614
-    url = "https://github.com/py-pdf/pypdf/files/10696390/broken.pdf"
-    name = "iss1614.2.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer.append(reader)
-
-
-@pytest.mark.enable_socket()
-def test_new_removes():
-    # test of an annotation(link) directly stored in the /Annots in the page
-    url = "https://github.com/py-pdf/pypdf/files/10807951/tt.pdf"
-    name = "iss1650.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.remove_images()
-    b = BytesIO()
-    writer.write(b)
-    bb = bytes(b.getbuffer())
-    assert b"/Im0 Do" not in bb
-    assert b"/Fm0 Do" in bb
-    assert b" TJ" in bb
-
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    writer.remove_text()
-    b = BytesIO()
-    writer.write(b)
-    bb = bytes(b.getbuffer())
-    assert b"/Im0" in bb
-    assert b"Chap" not in bb
-    assert b" TJ" not in bb
-
-    url = "https://github.com/py-pdf/pypdf/files/10832029/tt2.pdf"
-    name = "GeoBaseWithComments.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer.append(reader)
-    writer.remove_objects_from_page(writer.pages[0], [ObjectDeletionFlag.LINKS])
-    assert "/Links" not in [
-        a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]
-    ]
-    writer.remove_objects_from_page(writer.pages[0], ObjectDeletionFlag.ATTACHMENTS)
-    assert "/FileAttachment" not in [
-        a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]
-    ]
-
-    writer.pages[0]["/Annots"].append(
-        DictionaryObject({NameObject("/Subtype"): TextStringObject("/3D")})
-    )
-    assert "/3D" in [a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]]
-    writer.remove_objects_from_page(writer.pages[0], ObjectDeletionFlag.OBJECTS_3D)
-    assert "/3D" not in [a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]]
-
-    writer.remove_links()
-    assert len(writer.pages[0]["/Annots"]) == 0
-    assert len(writer.pages[3]["/Annots"]) == 0
-
-    writer.remove_annotations("/Text")
-
-
-@pytest.mark.enable_socket()
-def test_late_iss1654():
-    url = "https://github.com/py-pdf/pypdf/files/10935632/bid1.pdf"
-    name = "bid1.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.clone_document_from_reader(reader)
-    for p in writer.pages:
-        p.compress_content_streams()
-    b = BytesIO()
-    writer.write(b)
-
-
-@pytest.mark.enable_socket()
-def test_iss1723():
-    # test of an annotation(link) directly stored in the /Annots in the page
-    url = "https://github.com/py-pdf/pypdf/files/11015242/inputFile.pdf"
-    name = "iss1723.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader, (3, 5))
-
-
-@pytest.mark.enable_socket()
-def test_iss1767():
-    # test with a pdf which is buggy because the object 389,0 exists 3 times:
-    # twice to define catalog and one as an XObject inducing a loop when
-    # cloning
-    url = "https://github.com/py-pdf/pypdf/files/11138472/test.pdf"
-    name = "iss1723.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    PdfWriter(clone_from=reader)
-
-
-@pytest.mark.enable_socket()
-def test_named_dest_page_number():
-    """
-    Closes iss471
-    tests appending with named destinations as integers
-    """
-    url = "https://github.com/py-pdf/pypdf/files/10704333/central.pdf"
-    name = "central.pdf"
-    writer = PdfWriter()
-    writer.add_blank_page(100, 100)
-    writer.append(BytesIO(get_data_from_url(url, name=name)), pages=[0, 1, 2])
-    assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 2
-    assert writer._root_object["/Names"]["/Dests"]["/Names"][-1][0] == (1 + 1)
-    writer.append(BytesIO(get_data_from_url(url, name=name)))
-    assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 6
-    writer2 = PdfWriter()
-    writer2.add_blank_page(100, 100)
-    dest = writer2.add_named_destination("toto", 0)
-    dest.get_object()[NameObject("/D")][0] = NullObject()
-    b = BytesIO()
-    writer2.write(b)
-    b.seek(0)
-    writer.append(b)
-    assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 6
-
-
-@pytest.mark.parametrize(
-    ("write_data_here", "needs_cleanup"),
-    [
-        (
-            "dont_commit_writer.pdf",
-            True,
-        )
-    ],
-)
-def test_update_form_fields(write_data_here, needs_cleanup):
-    writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf")
-    writer.update_page_form_field_values(
-        writer.pages[0],
-        {
-            "CheckBox1": "/Yes",
-            "Text1": "mon Text1",
-            "Text2": "ligne1\nligne2",
-            "RadioGroup1": "/2",
-            "RdoS1": "/",
-            "Combo1": "!!monCombo!!",
-            "Liste1": "Liste2",
-            "Liste2": ["Lst1", "Lst3"],
-            "DropList1": "DropListe3",
-        },
-        auto_regenerate=False,
-    )
-    del writer.pages[0]["/Annots"][1].get_object()["/AP"]["/N"]
-    writer.update_page_form_field_values(
-        writer.pages[0],
-        {"Text1": "my Text1", "Text2": "ligne1\nligne2\nligne3"},
-        auto_regenerate=False,
-    )
-
-    writer.write("dont_commit_writer.pdf")
-    reader = PdfReader("dont_commit_writer.pdf")
-    flds = reader.get_fields()
-    assert flds["CheckBox1"]["/V"] == "/Yes"
-    assert flds["CheckBox1"].indirect_reference.get_object()["/AS"] == "/Yes"
-    assert (
-        b"(my Text1)"
-        in flds["Text1"].indirect_reference.get_object()["/AP"]["/N"].get_data()
-    )
-    assert flds["Text2"]["/V"] == "ligne1\nligne2\nligne3"
-    assert (
-        b"(ligne3)"
-        in flds["Text2"].indirect_reference.get_object()["/AP"]["/N"].get_data()
-    )
-    assert flds["RadioGroup1"]["/V"] == "/2"
-    assert flds["RadioGroup1"]["/Kids"][0].get_object()["/AS"] == "/Off"
-    assert flds["RadioGroup1"]["/Kids"][1].get_object()["/AS"] == "/2"
-    assert all(x in flds["Liste2"]["/V"] for x in ["Lst1", "Lst3"])
-
-    assert all(x in flds["CheckBox1"]["/_States_"] for x in ["/Off", "/Yes"])
-    assert all(x in flds["RadioGroup1"]["/_States_"] for x in ["/1", "/2", "/3"])
-    assert all(x in flds["Liste1"]["/_States_"] for x in ["Liste1", "Liste2", "Liste3"])
-
-    if needs_cleanup:
-        Path(write_data_here).unlink()
-
-
-@pytest.mark.enable_socket()
-def test_iss1862():
-    # The file here has "/B" entry to define the font in a object below the page
-    # The excluded field shall be considered only at first level (page) and not
-    # below
-    url = "https://github.com/py-pdf/pypdf/files/11708801/intro.pdf"
-    name = "iss1862.pdf"
-    writer = PdfWriter()
-    writer.append(BytesIO(get_data_from_url(url, name=name)))
-    # check that "/B" is in the font
-    writer.pages[0]["/Resources"]["/Font"]["/F1"]["/CharProcs"]["/B"].get_data()
-
-
-def test_empty_objects_before_cloning():
-    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
-    reader = PdfReader(pdf_path)
-    writer = PdfWriter(clone_from=reader)
-    nb_obj_reader = len(reader.xref_objStm) + sum(
-        len(reader.xref[i]) for i in reader.xref
-    )
-    nb_obj_reader -= 1  # for trailer
-    nb_obj_reader -= len(
-        {x: 1 for x, y in reader.xref_objStm.values()}
-    )  # to remove object streams
-    assert len(writer._objects) == nb_obj_reader
-
-
-@pytest.mark.enable_socket()
-def test_watermark():
-    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
-    name = "bgwatermark.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    url = "https://github.com/py-pdf/pypdf/files/11985888/source.pdf"
-    name = "srcwatermark.pdf"
-    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
-    for p in writer.pages:
-        p.merge_page(reader.pages[0], over=False)
-
-    assert isinstance(p["/Contents"], ArrayObject)
-    assert isinstance(p["/Contents"][0], IndirectObject)
-
-    b = BytesIO()
-    writer.write(b)
-    assert len(b.getvalue()) < 2.1 * 1024 * 1024
-
-
-@pytest.mark.enable_socket()
-@pytest.mark.timeout(4)
-def test_watermarking_speed():
-    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
-    name = "bgwatermark.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    url = "https://arxiv.org/pdf/2201.00214.pdf"
-    name = "2201.00214.pdf"
-    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
-    for p in writer.pages:
-        p.merge_page(reader.pages[0], over=False)
-    out_pdf_bytesio = BytesIO()
-    writer.write(out_pdf_bytesio)
-    pdf_size_in_mib = len(out_pdf_bytesio.getvalue()) / 1024 / 1024
-    assert pdf_size_in_mib < 20
-
-
-@pytest.mark.enable_socket()
-@pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript")
-def test_watermark_rendering(tmp_path):
-    """Ensure the visual appearance of watermarking stays correct."""
-    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
-    name = "bgwatermark.pdf"
-    watermark = PdfReader(BytesIO(get_data_from_url(url, name=name))).pages[0]
-    url = "https://github.com/py-pdf/pypdf/files/11985888/source.pdf"
-    name = "srcwatermark.pdf"
-    page = PdfReader(BytesIO(get_data_from_url(url, name=name))).pages[0]
-    writer = PdfWriter()
-    page.merge_page(watermark, over=False)
-    writer.add_page(page)
-
-    target_png_path = tmp_path / "target.png"
-    url = "https://github.com/py-pdf/pypdf/assets/96178532/d5c72d0e-7047-4504-bbf6-bc591c80d7c0"
-    name = "dstwatermark.png"
-    target_png_path.write_bytes(get_data_from_url(url, name=name))
-
-    pdf_path = tmp_path / "out.pdf"
-    png_path = tmp_path / "out.png"
-    writer.write(pdf_path)
-
-    # False positive: https://github.com/PyCQA/bandit/issues/333
-    subprocess.run(
-        [  # noqa: S603
-            GHOSTSCRIPT_BINARY,
-            "-sDEVICE=pngalpha",
-            "-o",
-            png_path,
-            pdf_path,
-        ]
-    )
-    assert png_path.is_file()
-    assert image_similarity(png_path, target_png_path) >= 0.95
-
-
-@pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript")
-def test_watermarking_reportlab_rendering(tmp_path):
-    """
-    This test is showing a rotated+mirrored watermark in pypdf==3.15.4.
-
-    Replacing the generate_base with e.g. the crazyones did not show the issue.
-    """
-    base_path = SAMPLE_ROOT / "022-pdfkit/pdfkit.pdf"
-    watermark_path = SAMPLE_ROOT / "013-reportlab-overlay/reportlab-overlay.pdf"
-
-    reader = PdfReader(base_path)
-    base_page = reader.pages[0]
-    watermark = PdfReader(watermark_path).pages[0]
-
-    writer = PdfWriter()
-    base_page.merge_page(watermark)
-    writer.add_page(base_page)
-
-    target_png_path = RESOURCE_ROOT / "test_watermarking_reportlab_rendering.png"
-    pdf_path = tmp_path / "out.pdf"
-    png_path = tmp_path / "test_watermarking_reportlab_rendering.png"
-
-    writer.write(pdf_path)
-    # False positive: https://github.com/PyCQA/bandit/issues/333
-    subprocess.run(
-        [  # noqa: S603
-            GHOSTSCRIPT_BINARY,
-            "-r120",
-            "-sDEVICE=pngalpha",
-            "-o",
-            png_path,
-            pdf_path,
-        ]
-    )
-    assert png_path.is_file()
-    assert image_similarity(png_path, target_png_path) >= 0.999
-
-
-@pytest.mark.enable_socket()
-def test_da_missing_in_annot():
-    url = "https://github.com/py-pdf/pypdf/files/12136285/Building.Division.Permit.Application.pdf"
-    name = "BuildingDivisionPermitApplication.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter(clone_from=reader)
-    writer.update_page_form_field_values(
-        writer.pages[0], {"PCN-1": "0"}, auto_regenerate=False
-    )
-    b = BytesIO()
-    writer.write(b)
-    reader = PdfReader(BytesIO(b.getvalue()))
-    ff = reader.get_fields()
-    # check for autosize processing
-    assert (
-        b"0 Tf"
-        not in ff["PCN-1"].indirect_reference.get_object()["/AP"]["/N"].get_data()
-    )
-    f2 = writer.get_object(ff["PCN-2"].indirect_reference.idnum)
-    f2[NameObject("/Parent")] = writer.get_object(
-        ff["PCN-1"].indirect_reference.idnum
-    ).indirect_reference
-    writer.update_page_form_field_values(
-        writer.pages[0], {"PCN-2": "1"}, auto_regenerate=False
-    )
-
-
-def test_missing_fields(pdf_file_path):
-    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
-
-    writer = PdfWriter()
-    writer.add_page(reader.pages[0])
-
-    with pytest.raises(PyPdfError) as exc:
-        writer.update_page_form_field_values(
-            writer.pages[0], {"foo": "some filled in text"}, flags=1
-        )
-    assert exc.value.args[0] == "No /AcroForm dictionary in PdfWriter Object"
-
-    writer = PdfWriter()
-    writer.append(reader, [0])
-    del writer._root_object["/AcroForm"]["/Fields"]
-    with pytest.raises(PyPdfError) as exc:
-        writer.update_page_form_field_values(
-            writer.pages[0], {"foo": "some filled in text"}, flags=1
-        )
-    assert exc.value.args[0] == "No /Fields dictionary in Pdf in PdfWriter Object"
-
-
-def test_missing_info():
-    reader = PdfReader(RESOURCE_ROOT / "missing_info.pdf")
-
-    writer = PdfWriter(clone_from=reader)
-    assert len(writer.pages) == len(reader.pages)
-
-
-@pytest.mark.enable_socket()
-def test_germanfields():
-    """Cf #2035"""
-    url = "https://github.com/py-pdf/pypdf/files/12194195/test.pdf"
-    name = "germanfields.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter(clone_from=reader)
-    form_fields = {"Text Box 1": "test æ ø å"}
-    writer.update_page_form_field_values(
-        writer.pages[0], form_fields, auto_regenerate=False
-    )
-    bytes_stream = BytesIO()
-    writer.write(bytes_stream)
-    bytes_stream.seek(0)
-    reader2 = PdfReader(bytes_stream)
-    assert (
-        b"test \xe6 \xf8 \xe5"
-        in reader2.get_fields()["Text Box 1"]
-        .indirect_reference.get_object()["/AP"]["/N"]
-        .get_data()
-    )
-
-
-@pytest.mark.enable_socket()
-def test_no_t_in_articles():
-    """Cf #2078"""
-    url = "https://github.com/py-pdf/pypdf/files/12311735/bad.pdf"
-    name = "iss2078.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader)
-
-
-@pytest.mark.enable_socket()
-def test_no_i_in_articles():
-    """Cf #2089"""
-    url = "https://github.com/py-pdf/pypdf/files/12352793/kim2002.pdf"
-    name = "iss2089.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader)
-
-
-@pytest.mark.enable_socket()
-def test_damaged_pdf_length_returning_none():
-    """
-    Cf #140
-    https://github.com/py-pdf/pypdf/issues/140#issuecomment-1685380549
-    """
-    url = "https://github.com/py-pdf/pypdf/files/12168578/bad_pdf_example.pdf"
-    name = "iss140_bad_pdf.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    writer.append(reader)
-
-
-@pytest.mark.enable_socket()
-def test_viewerpreferences():
-    """Add Tests for ViewerPreferences"""
-    url = "https://github.com/py-pdf/pypdf/files/9175966/2015._pb_decode_pg0.pdf"
-    name = "2015._pb_decode_pg0.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    v = reader.viewer_preferences
-    assert v.center_window == True  # noqa: E712
-    writer = PdfWriter(clone_from=reader)
-    v = writer.viewer_preferences
-    assert v.center_window == True  # noqa: E712
-    v.center_window = False
-    assert (
-        writer._root_object["/ViewerPreferences"]["/CenterWindow"]
-        == False  # noqa: E712
-    )
-    assert v.print_area == "/CropBox"
-    with pytest.raises(ValueError):
-        v.non_fullscreen_pagemode = "toto"
-    with pytest.raises(ValueError):
-        v.non_fullscreen_pagemode = "/toto"
-    v.non_fullscreen_pagemode = "/UseOutlines"
-    assert (
-        writer._root_object["/ViewerPreferences"]["/NonFullScreenPageMode"]
-        == "/UseOutlines"
-    )
-    writer = PdfWriter(clone_from=reader)
-    v = writer.viewer_preferences
-    assert v.center_window == True  # noqa: E712
-    v.center_window = False
-    assert (
-        writer._root_object["/ViewerPreferences"]["/CenterWindow"]
-        == False  # noqa: E712
-    )
-
-    writer = PdfWriter(clone_from=reader)
-    writer._root_object[NameObject("/ViewerPreferences")] = writer._add_object(
-        writer._root_object["/ViewerPreferences"]
-    )
-    v = writer.viewer_preferences
-    v.center_window = False
-    assert (
-        writer._root_object["/ViewerPreferences"]["/CenterWindow"]
-        == False  # noqa: E712
-    )
-    v.num_copies = 1
-    assert v.num_copies == 1
-    assert v.print_pagerange is None
-    with pytest.raises(ValueError):
-        v.print_pagerange = "toto"
-    v.print_pagerange = ArrayObject()
-    assert len(v.print_pagerange) == 0
-
-    writer.create_viewer_preferences()
-    assert len(writer._root_object["/ViewerPreferences"]) == 0
-    writer.viewer_preferences.direction = "/R2L"
-    assert len(writer._root_object["/ViewerPreferences"]) == 1
-
-    del reader.trailer["/Root"]["/ViewerPreferences"]
-    assert reader.viewer_preferences is None
-    writer = PdfWriter(clone_from=reader)
-    assert writer.viewer_preferences is None
-
-
-def test_extra_spaces_in_da_text(caplog):
-    writer = PdfWriter(clone_from=RESOURCE_ROOT / "form.pdf")
-    t = writer.pages[0]["/Annots"][0].get_object()["/DA"]
-    t = t.replace("/Helv", "/Helv   ")
-    writer.pages[0]["/Annots"][0].get_object()[NameObject("/DA")] = TextStringObject(t)
-    writer.update_page_form_field_values(
-        writer.pages[0], {"foo": "abcd"}, auto_regenerate=False
-    )
-    t = writer.pages[0]["/Annots"][0].get_object()["/AP"]["/N"].get_data()
-    assert "Font dictionary for  not found." not in caplog.text
-    assert b"/Helv" in t
-    assert b"(abcd)" in t
-
-
-@pytest.mark.enable_socket()
-def test_object_contains_indirect_reference_to_self():
-    url = "https://github.com/py-pdf/pypdf/files/12389243/testbook.pdf"
-    name = "iss2102.pdf"
-    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
-    writer = PdfWriter()
-    width, height = 595, 841
-    outpage = writer.add_blank_page(width, height)
-    outpage.merge_page(reader.pages[6])
-    writer.append(reader)
+"""Test the pypdf._writer module."""
+import re
+import shutil
+import subprocess
+from io import BytesIO
+from pathlib import Path
+
+import pytest
+
+from pypdf import (
+    ObjectDeletionFlag,
+    PageObject,
+    PdfMerger,
+    PdfReader,
+    PdfWriter,
+    Transformation,
+)
+from pypdf.errors import DeprecationError, PageSizeNotDefinedError, PyPdfError
+from pypdf.generic import (
+    ArrayObject,
+    ContentStream,
+    DictionaryObject,
+    Fit,
+    IndirectObject,
+    NameObject,
+    NullObject,
+    NumberObject,
+    RectangleObject,
+    StreamObject,
+    TextStringObject,
+)
+
+from . import get_data_from_url, is_sublist
+from .test_images import image_similarity
+
+TESTS_ROOT = Path(__file__).parent.resolve()
+PROJECT_ROOT = TESTS_ROOT.parent
+RESOURCE_ROOT = PROJECT_ROOT / "resources"
+SAMPLE_ROOT = Path(PROJECT_ROOT) / "sample-files"
+GHOSTSCRIPT_BINARY = shutil.which("gs")
+
+
+def test_writer_exception_non_binary(tmp_path, caplog):
+    src = RESOURCE_ROOT / "pdflatex-outline.pdf"
+
+    reader = PdfReader(src)
+    writer = PdfWriter()
+    writer.add_page(reader.pages[0])
+
+    with open(tmp_path / "out.txt", "w") as fp, pytest.raises(TypeError):
+        writer.write_stream(fp)
+    ending = "to write to is not in binary mode. It may not be written to correctly.\n"
+    assert caplog.text.endswith(ending)
+
+
+def test_writer_clone():
+    src = RESOURCE_ROOT / "pdflatex-outline.pdf"
+
+    reader = PdfReader(src)
+    writer = PdfWriter(clone_from=reader)
+    assert len(writer.pages) == 4
+    assert "PageObject" in str(type(writer.pages[0]))
+
+    writer = PdfWriter(clone_from=src)
+    assert len(writer.pages) == 4
+    assert "PageObject" in str(type(writer.pages[0]))
+
+
+def test_writer_clone_bookmarks():
+    # Arrange
+    src = RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf"
+    reader = PdfReader(src)
+    writer = PdfWriter()
+
+    # Act + test cat
+    cat = ""
+
+    def cat1(p) -> None:
+        nonlocal cat
+        cat += p.__repr__()
+
+    writer.clone_document_from_reader(reader, cat1)
+    assert "/Page" in cat
+    assert writer.pages[0].raw_get("/Parent") == writer._pages
+    writer.add_outline_item("Page 1", 0)
+    writer.add_outline_item("Page 2", 1)
+
+    # Assert
+    bytes_stream = BytesIO()
+    writer.write(bytes_stream)
+    bytes_stream.seek(0)
+    reader2 = PdfReader(bytes_stream)
+    assert len(reader2.pages) == len(reader.pages)
+    assert len(reader2.outline) == 2
+
+    # test with append
+    writer = PdfWriter()
+    writer.append(reader)
+    writer.add_outline_item("Page 1", 0)
+    writer.add_outline_item("Page 2", 1)
+
+    # Assert
+    bytes_stream = BytesIO()
+    writer.write(bytes_stream)
+    bytes_stream.seek(0)
+    reader2 = PdfReader(bytes_stream)
+    assert len(reader2.pages) == len(reader.pages)
+    assert len(reader2.outline) == 2
+
+
+def writer_operate(writer: PdfWriter) -> None:
+    """
+    To test the writer that initialized by each of the four usages.
+
+    Args:
+        writer: A PdfWriter object
+    """
+    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
+    pdf_outline_path = RESOURCE_ROOT / "pdflatex-outline.pdf"
+
+    reader = PdfReader(pdf_path)
+    reader_outline = PdfReader(pdf_outline_path)
+
+    page = reader.pages[0]
+    with pytest.raises(PageSizeNotDefinedError) as exc:
+        writer.add_blank_page()
+    assert exc.value.args == ()
+    writer.insert_page(page, 1)
+    writer.insert_page(reader_outline.pages[0], 0)
+    writer.add_outline_item_destination(page)
+    writer.remove_links()
+    writer.add_outline_item_destination(page)
+    oi = writer.add_outline_item(
+        "An outline item", 0, None, (255, 0, 15), True, True, Fit.fit_box_vertically(10)
+    )
+    writer.add_outline_item(
+        "The XYZ fit", 0, oi, (255, 0, 15), True, True, Fit.xyz(left=10, top=20, zoom=3)
+    )
+    writer.add_outline_item(
+        "The XYZ fit no args", 0, oi, (255, 0, 15), True, True, Fit.xyz()
+    )
+    writer.add_outline_item(
+        "The FitH fit", 0, oi, (255, 0, 15), True, True, Fit.fit_horizontally(top=10)
+    )
+    writer.add_outline_item(
+        "The FitV fit", 0, oi, (255, 0, 15), True, True, Fit.fit_vertically(left=10)
+    )
+    writer.add_outline_item(
+        "The FitR fit",
+        0,
+        oi,
+        (255, 0, 15),
+        True,
+        True,
+        Fit.fit_rectangle(left=10, bottom=20, right=30, top=40),
+    )
+    writer.add_outline_item(
+        "The FitB fit", 0, oi, (255, 0, 15), True, True, Fit.fit_box()
+    )
+    writer.add_outline_item(
+        "The FitBH fit",
+        0,
+        oi,
+        (255, 0, 15),
+        True,
+        True,
+        Fit.fit_box_horizontally(top=10),
+    )
+    writer.add_outline_item(
+        "The FitBV fit",
+        0,
+        oi,
+        (255, 0, 15),
+        True,
+        True,
+        Fit.fit_box_vertically(left=10),
+    )
+    writer.add_blank_page()
+    writer.add_uri(2, "https://example.com", RectangleObject([0, 0, 100, 100]))
+    with pytest.warns(
+        DeprecationWarning, match="'pagenum' argument of add_uri is deprecated"
+    ):
+        writer.add_uri(
+            2, "https://example.com", RectangleObject([0, 0, 100, 100]), pagenum=2
+        )
+    with pytest.raises(DeprecationError):
+        writer.add_link(2, 1, RectangleObject([0, 0, 100, 100]))
+    assert writer._get_page_layout() is None
+    writer.page_layout = "broken"
+    assert writer.page_layout == "broken"
+    writer.page_layout = NameObject("/SinglePage")
+    assert writer._get_page_layout() == "/SinglePage"
+    assert writer._get_page_mode() is None
+    writer.set_page_mode("/UseNone")
+    assert writer._get_page_mode() == "/UseNone"
+    writer.set_page_mode(NameObject("/UseOC"))
+    assert writer._get_page_mode() == "/UseOC"
+    writer.insert_blank_page(width=100, height=100)
+    writer.insert_blank_page()  # without parameters
+
+    writer.remove_images()
+
+    writer.add_metadata(reader.metadata)
+    writer.add_metadata({"/Author": "Martin Thoma"})
+    writer.add_metadata({"/MyCustom": 1234})
+
+    writer.add_attachment("foobar.gif", b"foobarcontent")
+
+    # Check that every key in _idnum_hash is correct
+    objects_hash = [o.hash_value() for o in writer._objects]
+    for k, v in writer._idnum_hash.items():
+        assert v.pdf == writer
+        assert k in objects_hash, f"Missing {v}"
+
+
+tmp_path = "dont_commit_writer.pdf"
+
+
+@pytest.mark.parametrize(
+    ("write_data_here", "needs_cleanup"),
+    [
+        ("dont_commit_writer.pdf", True),
+        (Path("dont_commit_writer.pdf"), True),
+        (BytesIO(), False),
+    ],
+)
+def test_writer_operations_by_traditional_usage(write_data_here, needs_cleanup):
+    writer = PdfWriter()
+
+    writer_operate(writer)
+
+    # finally, write "output" to pypdf-output.pdf
+    if needs_cleanup:
+        with open(write_data_here, "wb") as output_stream:
+            writer.write(output_stream)
+    else:
+        output_stream = write_data_here
+        writer.write(output_stream)
+
+    if needs_cleanup:
+        Path(write_data_here).unlink()
+
+
+@pytest.mark.parametrize(
+    ("write_data_here", "needs_cleanup"),
+    [
+        ("dont_commit_writer.pdf", True),
+        (Path("dont_commit_writer.pdf"), True),
+        (BytesIO(), False),
+    ],
+)
+def test_writer_operations_by_semi_traditional_usage(write_data_here, needs_cleanup):
+    with PdfWriter() as writer:
+        writer_operate(writer)
+
+        # finally, write "output" to pypdf-output.pdf
+        if needs_cleanup:
+            with open(write_data_here, "wb") as output_stream:
+                writer.write(output_stream)
+        else:
+            output_stream = write_data_here
+            writer.write(output_stream)
+
+    if needs_cleanup:
+        Path(write_data_here).unlink()
+
+
+@pytest.mark.parametrize(
+    ("write_data_here", "needs_cleanup"),
+    [
+        ("dont_commit_writer.pdf", True),
+        (Path("dont_commit_writer.pdf"), True),
+        (BytesIO(), False),
+    ],
+)
+def test_writer_operations_by_semi_new_traditional_usage(
+    write_data_here, needs_cleanup
+):
+    with PdfWriter() as writer:
+        writer_operate(writer)
+
+        # finally, write "output" to pypdf-output.pdf
+        writer.write(write_data_here)
+
+    if needs_cleanup:
+        Path(write_data_here).unlink()
+
+
+@pytest.mark.parametrize(
+    ("write_data_here", "needs_cleanup"),
+    [
+        ("dont_commit_writer.pdf", True),
+        (Path("dont_commit_writer.pdf"), True),
+        (BytesIO(), False),
+    ],
+)
+def test_writer_operation_by_new_usage(write_data_here, needs_cleanup):
+    # This includes write "output" to pypdf-output.pdf
+    with PdfWriter(write_data_here) as writer:
+        writer_operate(writer)
+
+    if needs_cleanup:
+        Path(write_data_here).unlink()
+
+
+@pytest.mark.parametrize(
+    "input_path",
+    [
+        "side-by-side-subfig.pdf",
+        "reportlab-inline-image.pdf",
+    ],
+)
+def test_remove_images(pdf_file_path, input_path):
+    pdf_path = RESOURCE_ROOT / input_path
+
+    reader = PdfReader(pdf_path)
+    writer = PdfWriter()
+
+    page = reader.pages[0]
+    writer.insert_page(page, 0)
+    writer.remove_images()
+    page_contents_stream = writer.pages[0]["/Contents"]._data
+    assert len(page_contents_stream.strip())
+
+    # finally, write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+    with open(pdf_file_path, "rb") as input_stream:
+        reader = PdfReader(input_stream)
+        if input_path == "side-by-side-subfig.pdf":
+            extracted_text = reader.pages[0].extract_text()
+            assert extracted_text
+            assert "Lorem ipsum dolor sit amet" in extracted_text
+
+
+@pytest.mark.enable_socket()
+def test_remove_images_sub_level():
+    """Cf #2035"""
+    url = "https://github.com/py-pdf/pypdf/files/12394781/2210.03142-1.pdf"
+    name = "iss2103.pdf"
+    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
+    writer.remove_images()
+    assert (
+        len(
+            [
+                o.get_object()
+                for o in writer.pages[0]["/Resources"]["/XObject"]["/Fm1"][
+                    "/Resources"
+                ]["/XObject"]["/Im1"]["/Resources"]["/XObject"].values()
+                if not isinstance(o.get_object(), NullObject)
+            ]
+        )
+        == 0
+    )
+
+
+@pytest.mark.parametrize(
+    "input_path",
+    [
+        "side-by-side-subfig.pdf",
+        "reportlab-inline-image.pdf",
+    ],
+)
+def test_remove_text(input_path, pdf_file_path):
+    pdf_path = RESOURCE_ROOT / input_path
+
+    reader = PdfReader(pdf_path)
+    writer = PdfWriter()
+
+    page = reader.pages[0]
+    writer.insert_page(page, 0)
+    writer.remove_text()
+
+    # finally, write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_remove_text_all_operators(pdf_file_path):
+    stream = (
+        b"BT "
+        b"/F0 36 Tf "
+        b"50 706 Td "
+        b"36 TL "
+        b"(The Tj operator) Tj "
+        b'1 2 (The double quote operator) " '
+        b"(The single quote operator) ' "
+        b"ET"
+    )
+    pdf_data = (
+        b"%%PDF-1.7\n"
+        b"1 0 obj << /Count 1 /Kids [5 0 R] /Type /Pages >> endobj\n"
+        b"2 0 obj << >> endobj\n"
+        b"3 0 obj << >> endobj\n"
+        b"4 0 obj << /Length %d >>\n"
+        b"stream\n" + (b"%s\n" % stream) + b"endstream\n"
+        b"endobj\n"
+        b"5 0 obj << /Contents 4 0 R /CropBox [0.0 0.0 2550.0 3508.0]\n"
+        b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
+        b" /Resources << /Font << >> >>"
+        b" /Rotate 0 /Type /Page >> endobj\n"
+        b"6 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
+        b"xref 1 6\n"
+        b"%010d 00000 n\n"
+        b"%010d 00000 n\n"
+        b"%010d 00000 n\n"
+        b"%010d 00000 n\n"
+        b"%010d 00000 n\n"
+        b"%010d 00000 n\n"
+        b"trailer << /Root 6 0 R /Size 6 >>\n"
+        b"startxref\n%d\n"
+        b"%%%%EOF"
+    )
+    startx_correction = -1
+    pdf_data = pdf_data % (
+        len(stream),
+        pdf_data.find(b"1 0 obj") + startx_correction,
+        pdf_data.find(b"2 0 obj") + startx_correction,
+        pdf_data.find(b"3 0 obj") + startx_correction,
+        pdf_data.find(b"4 0 obj") + startx_correction,
+        pdf_data.find(b"5 0 obj") + startx_correction,
+        pdf_data.find(b"6 0 obj") + startx_correction,
+        # startx_correction should be -1 due to double % at the beginning
+        # inducing an error on startxref computation
+        pdf_data.find(b"xref"),
+    )
+    pdf_stream = BytesIO(pdf_data)
+
+    reader = PdfReader(pdf_stream, strict=False)
+    writer = PdfWriter()
+
+    page = reader.pages[0]
+    writer.insert_page(page, 0)
+    writer.remove_text()
+
+    # finally, write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_write_metadata(pdf_file_path):
+    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
+
+    reader = PdfReader(pdf_path)
+    writer = PdfWriter()
+
+    writer.add_page(reader.pages[0])
+    for page in reader.pages:
+        writer.add_page(page)
+
+    metadata = reader.metadata
+    writer.add_metadata(metadata)
+
+    writer.add_metadata({"/Title": "The Crazy Ones"})
+
+    # finally, write data to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+    # Check if the title was set
+    reader = PdfReader(pdf_file_path)
+    metadata = reader.metadata
+    assert metadata.get("/Title") == "The Crazy Ones"
+
+
+def test_fill_form(pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
+    writer = PdfWriter()
+
+    writer.append(reader, [0])
+    writer.append(RESOURCE_ROOT / "crazyones.pdf", [0])
+
+    writer.update_page_form_field_values(
+        writer.pages[0], {"foo": "some filled in text"}, flags=1
+    )
+
+    # check if no fields to fill in the page
+    writer.update_page_form_field_values(
+        writer.pages[1], {"foo": "some filled in text"}, flags=1
+    )
+
+    writer.update_page_form_field_values(
+        writer.pages[0], {"foo": "some filled in text"}
+    )
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_fill_form_with_qualified():
+    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
+    reader.add_form_topname("top")
+
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.add_page(reader.pages[0])
+    writer.update_page_form_field_values(
+        writer.pages[0], {"top.foo": "filling"}, flags=1
+    )
+    b = BytesIO()
+    writer.write(b)
+
+    reader2 = PdfReader(b)
+    fields = reader2.get_fields()
+    assert fields["top.foo"]["/V"] == "filling"
+
+
+@pytest.mark.parametrize(
+    ("use_128bit", "user_password", "owner_password"),
+    [(True, "userpwd", "ownerpwd"), (False, "userpwd", "ownerpwd")],
+)
+def test_encrypt(use_128bit, user_password, owner_password, pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
+    writer = PdfWriter()
+
+    page = reader.pages[0]
+    orig_text = page.extract_text()
+
+    writer.add_page(page)
+
+    with pytest.raises(ValueError, match="owner_pwd of encrypt is deprecated."):
+        writer.encrypt(
+            owner_pwd=user_password,
+            owner_password=owner_password,
+            user_password=user_password,
+            use_128bit=use_128bit,
+        )
+    with pytest.raises(ValueError, match="'user_pwd' argument is deprecated"):
+        writer.encrypt(
+            owner_password=owner_password,
+            user_password=user_password,
+            user_pwd=user_password,
+            use_128bit=use_128bit,
+        )
+    writer.encrypt(
+        user_password=user_password,
+        owner_password=owner_password,
+        use_128bit=use_128bit,
+    )
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+    # Test that the data is not there in clear text
+    with open(pdf_file_path, "rb") as input_stream:
+        data = input_stream.read()
+    assert b"foo" not in data
+
+    # Test the user password (str):
+    reader = PdfReader(pdf_file_path, password="userpwd")
+    new_text = reader.pages[0].extract_text()
+    assert reader.metadata.get("/Producer") == "pypdf"
+    assert new_text == orig_text
+
+    # Test the owner password (str):
+    reader = PdfReader(pdf_file_path, password="ownerpwd")
+    new_text = reader.pages[0].extract_text()
+    assert reader.metadata.get("/Producer") == "pypdf"
+    assert new_text == orig_text
+
+    # Test the user password (bytes):
+    reader = PdfReader(pdf_file_path, password=b"userpwd")
+    new_text = reader.pages[0].extract_text()
+    assert reader.metadata.get("/Producer") == "pypdf"
+    assert new_text == orig_text
+
+    # Test the owner password (stbytesr):
+    reader = PdfReader(pdf_file_path, password=b"ownerpwd")
+    new_text = reader.pages[0].extract_text()
+    assert reader.metadata.get("/Producer") == "pypdf"
+    assert new_text == orig_text
+
+
+def test_add_outline_item(pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
+    writer = PdfWriter()
+
+    for page in reader.pages:
+        writer.add_page(page)
+
+    outline_item = writer.add_outline_item(
+        "An outline item",
+        1,
+        None,
+        (255, 0, 15),
+        True,
+        True,
+        Fit.fit(),
+        is_open=False,
+    )
+    _o2a = writer.add_outline_item(
+        "Another", 2, outline_item, None, False, False, Fit.fit()
+    )
+    _o2b = writer.add_outline_item(
+        "Another bis", 2, outline_item, None, False, False, Fit.fit()
+    )
+    outline_item2 = writer.add_outline_item(
+        "An outline item 2",
+        1,
+        None,
+        (255, 0, 15),
+        True,
+        True,
+        Fit.fit(),
+        is_open=True,
+    )
+    _o3a = writer.add_outline_item(
+        "Another 2", 2, outline_item2, None, False, False, Fit.fit()
+    )
+    _o3b = writer.add_outline_item(
+        "Another 2bis", 2, outline_item2, None, False, False, Fit.fit()
+    )
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "w+b") as output_stream:
+        writer.write(output_stream)
+        output_stream.seek(0)
+        reader = PdfReader(output_stream)
+        assert reader.trailer["/Root"]["/Outlines"]["/Count"] == 3
+        assert reader.outline[0]["/Count"] == -2
+        assert reader.outline[0]["/%is_open%"] == False  # noqa
+        assert reader.outline[2]["/Count"] == 2
+        assert reader.outline[2]["/%is_open%"] == True  # noqa
+        assert reader.outline[1][0]["/Count"] == 0
+
+
+def test_add_named_destination(pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
+    writer = PdfWriter()
+    assert writer.get_named_dest_root() == []
+
+    for page in reader.pages:
+        writer.add_page(page)
+
+    assert writer.get_named_dest_root() == []
+
+    writer.add_named_destination(TextStringObject("A named dest"), 2)
+    writer.add_named_destination(TextStringObject("A named dest2"), 2)
+
+    with pytest.warns(DeprecationWarning, match="pagenum is deprecated as an argument"):
+        writer.add_named_destination(TextStringObject("A named dest3"), pagenum=2)
+
+    with pytest.raises(ValueError):
+        writer.add_named_destination(
+            TextStringObject("A named dest3"), pagenum=2, page_number=2
+        )
+
+    root = writer.get_named_dest_root()
+    assert root[0] == "A named dest"
+    assert root[1].pdf == writer
+    assert root[1].get_object()["/S"] == NameObject("/GoTo")
+    assert root[1].get_object()["/D"][0] == writer.pages[2].indirect_reference
+    assert root[2] == "A named dest2"
+    assert root[3].pdf == writer
+    assert root[3].get_object()["/S"] == NameObject("/GoTo")
+    assert root[3].get_object()["/D"][0] == writer.pages[2].indirect_reference
+    assert root[4] == "A named dest3"
+
+    # test get_object
+
+    assert writer.get_object(root[1].idnum) == writer.get_object(root[1])
+    with pytest.raises(ValueError) as exc:
+        writer.get_object(reader.pages[0].indirect_reference)
+    assert exc.value.args[0] == "pdf must be self"
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_add_named_destination_sort_order(pdf_file_path):
+    """
+    Issue #1927 does not appear.
+
+    add_named_destination() maintains the named destination list sort order
+    """
+    writer = PdfWriter()
+
+    assert writer.get_named_dest_root() == []
+
+    writer.add_blank_page(200, 200)
+    writer.add_named_destination("b", 0)
+    # "a" should be moved before "b" on insert
+    writer.add_named_destination("a", 0)
+
+    root = writer.get_named_dest_root()
+
+    assert len(root) == 4
+    assert (
+        root[0] == "a"
+    ), '"a" was not inserted before "b" in the named destination root'
+    assert root[2] == "b"
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_add_uri(pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
+    writer = PdfWriter()
+
+    for page in reader.pages:
+        writer.add_page(page)
+
+    writer.add_uri(
+        1,
+        "http://www.example.com",
+        RectangleObject([0, 0, 100, 100]),
+        border=[1, 2, 3, [4]],
+    )
+    writer.add_uri(
+        2,
+        "https://pypdf.readthedocs.io/en/latest/",
+        RectangleObject([20, 30, 50, 80]),
+        border=[1, 2, 3],
+    )
+    writer.add_uri(
+        3,
+        "https://pypdf.readthedocs.io/en/latest/user/adding-pdf-annotations.html",
+        "[ 200 300 250 350 ]",
+        border=[0, 0, 0],
+    )
+    writer.add_uri(
+        3,
+        "https://pypdf.readthedocs.io/en/latest/user/adding-pdf-annotations.html",
+        [100, 200, 150, 250],
+        border=[0, 0, 0],
+    )
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_add_link(pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
+    writer = PdfWriter()
+
+    for page in reader.pages:
+        writer.add_page(page)
+
+    with pytest.raises(
+        DeprecationError,
+        match=(
+            re.escape(
+                "add_link is deprecated and was removed in pypdf 3.0.0. "
+                "Use add_annotation(pypdf.annotations.Link(...)) instead."
+            )
+        ),
+    ):
+        writer.add_link(
+            1,
+            2,
+            RectangleObject([0, 0, 100, 100]),
+            border=[1, 2, 3, [4]],
+            fit="/Fit",
+        )
+        writer.add_link(
+            2, 3, RectangleObject([20, 30, 50, 80]), [1, 2, 3], "/FitH", None
+        )
+        writer.add_link(
+            3,
+            0,
+            "[ 200 300 250 350 ]",
+            [0, 0, 0],
+            "/XYZ",
+            0,
+            0,
+            2,
+        )
+        writer.add_link(
+            3,
+            0,
+            [100, 200, 150, 250],
+            border=[0, 0, 0],
+        )
+
+    # write "output" to pypdf-output.pdf
+    with open(pdf_file_path, "wb") as output_stream:
+        writer.write(output_stream)
+
+
+def test_io_streams():
+    """This is the example from the docs ("Streaming data")."""
+    filepath = RESOURCE_ROOT / "pdflatex-outline.pdf"
+    with open(filepath, "rb") as fh:
+        bytes_stream = BytesIO(fh.read())
+
+    # Read from bytes stream
+    reader = PdfReader(bytes_stream)
+    assert len(reader.pages) == 4
+
+    # Write to bytes stream
+    writer = PdfWriter()
+    with BytesIO() as output_stream:
+        writer.write(output_stream)
+
+
+def test_regression_issue670(pdf_file_path):
+    filepath = RESOURCE_ROOT / "crazyones.pdf"
+    reader = PdfReader(filepath, strict=False)
+    for _ in range(2):
+        writer = PdfWriter()
+        writer.add_page(reader.pages[0])
+        with open(pdf_file_path, "wb") as f_pdf:
+            writer.write(f_pdf)
+
+
+def test_issue301():
+    """Test with invalid stream length object."""
+    with open(RESOURCE_ROOT / "issue-301.pdf", "rb") as f:
+        reader = PdfReader(f)
+        writer = PdfWriter()
+        writer.append_pages_from_reader(reader)
+        b = BytesIO()
+        writer.write(b)
+
+
+def test_append_pages_from_reader_append():
+    """Use append_pages_from_reader with a callable."""
+    with open(RESOURCE_ROOT / "issue-301.pdf", "rb") as f:
+        reader = PdfReader(f)
+        writer = PdfWriter()
+        writer.append_pages_from_reader(reader, callable)
+        b = BytesIO()
+        writer.write(b)
+
+
+@pytest.mark.enable_socket()
+@pytest.mark.slow()
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+def test_sweep_indirect_references_nullobject_exception(pdf_file_path):
+    # TODO: Check this more closely... this looks weird
+    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
+    name = "tika-924666.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    merger = PdfMerger()
+    merger.append(reader)
+    merger.write(pdf_file_path)
+
+
+@pytest.mark.enable_socket()
+@pytest.mark.slow()
+@pytest.mark.parametrize(
+    ("url", "name"),
+    [
+        (
+            "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf",
+            "test_sweep_indirect_references_nullobject_exception.pdf",
+        ),
+        (
+            "https://corpora.tika.apache.org/base/docs/govdocs1/922/922840.pdf",
+            "test_write_outline_item_on_page_fitv.pdf",
+        ),
+        ("https://github.com/py-pdf/pypdf/files/10715624/test.pdf", "iss1627.pdf"),
+    ],
+)
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+def test_some_appends(pdf_file_path, url, name):
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    # PdfMerger
+    merger = PdfMerger()
+    merger.append(reader)
+    merger.write(pdf_file_path)
+    # PdfWriter
+    merger = PdfWriter()
+    merger.append(reader)
+    merger.write(pdf_file_path)
+
+
+def test_pdf_header():
+    writer = PdfWriter()
+    assert writer.pdf_header == b"%PDF-1.3"
+
+    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
+    writer.add_page(reader.pages[0])
+    assert writer.pdf_header == b"%PDF-1.5"
+
+    writer.pdf_header = b"%PDF-1.6"
+    assert writer.pdf_header == b"%PDF-1.6"
+
+
+def test_write_dict_stream_object(pdf_file_path):
+    stream = (
+        b"BT "
+        b"/F0 36 Tf "
+        b"50 706 Td "
+        b"36 TL "
+        b"(The Tj operator) Tj "
+        b'1 2 (The double quote operator) " '
+        b"(The single quote operator) ' "
+        b"ET"
+    )
+
+    stream_object = StreamObject()
+    stream_object[NameObject("/Type")] = NameObject("/Text")
+    stream_object._data = stream
+
+    writer = PdfWriter()
+
+    page_object = PageObject.create_blank_page(writer, 1000, 1000)
+    # Construct dictionary object (PageObject) with stream object
+    # Writer will replace this stream object with indirect object
+    page_object[NameObject("/Test")] = stream_object
+
+    page_object = writer.add_page(page_object)
+    with open(pdf_file_path, "wb") as fp:
+        writer.write(fp)
+
+    for k, v in page_object.items():
+        if k == "/Test":
+            assert str(v) != str(stream_object)
+            assert isinstance(v, IndirectObject)
+            assert str(v.get_object()) == str(stream_object)
+            break
+    else:
+        pytest.fail("/Test not found")
+
+    # Check that every key in _idnum_hash is correct
+    objects_hash = [o.hash_value() for o in writer._objects]
+    for k, v in writer._idnum_hash.items():
+        assert v.pdf == writer
+        assert k in objects_hash, "Missing %s" % v
+
+
+def test_add_single_annotation(pdf_file_path):
+    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
+    reader = PdfReader(pdf_path)
+    page = reader.pages[0]
+    writer = PdfWriter()
+    writer.add_page(page)
+
+    annot_dict = {
+        "/Type": "/Annot",
+        "/Subtype": "/Text",
+        "/Rect": [270.75, 596.25, 294.75, 620.25],
+        "/Contents": "Note in second paragraph",
+        "/C": [1, 1, 0],
+        "/M": "D:20220406191858+02'00",
+        "/Popup": {
+            "/Type": "/Annot",
+            "/Subtype": "/Popup",
+            "/Rect": [294.75, 446.25, 494.75, 596.25],
+            "/M": "D:20220406191847+02'00",
+        },
+        "/T": "moose",
+    }
+    writer.add_annotation(0, annot_dict)
+
+    # Inspect manually by adding 'assert False' and viewing the PDF
+    with open(pdf_file_path, "wb") as fp:
+        writer.write(fp)
+
+
+def test_deprecation_bookmark_decorator():
+    reader = PdfReader(RESOURCE_ROOT / "outlines-with-invalid-destinations.pdf")
+    page = reader.pages[0]
+    outline_item = reader.outline[0]
+    writer = PdfWriter()
+    writer.add_page(page)
+    with pytest.raises(
+        DeprecationError,
+        match="bookmark is deprecated as an argument. Use outline_item instead",
+    ):
+        writer.add_outline_item_dict(bookmark=outline_item)
+
+
+@pytest.mark.samples()
+def test_colors_in_outline_item(pdf_file_path):
+    reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf")
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    purple_rgb = (0.5019607843137255, 0.0, 0.5019607843137255)
+    writer.add_outline_item("First Outline Item", page_number=2, color="800080")
+    writer.add_outline_item("Second Outline Item", page_number=3, color="#800080")
+    writer.add_outline_item("Third Outline Item", page_number=4, color=purple_rgb)
+
+    with open(pdf_file_path, "wb") as f:
+        writer.write(f)
+
+    reader2 = PdfReader(pdf_file_path)
+    for outline_item in reader2.outline:
+        # convert float to string because of mutability
+        assert ["%.5f" % c for c in outline_item.color] == [
+            "%.5f" % p for p in purple_rgb
+        ]
+
+
+@pytest.mark.samples()
+def test_write_empty_stream():
+    reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf")
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+
+    with pytest.raises(ValueError) as exc:
+        writer.write("")
+    assert exc.value.args[0] == "Output(stream=) is empty."
+
+
+def test_startup_dest():
+    pdf_file_writer = PdfWriter()
+    pdf_file_writer.append_pages_from_reader(PdfReader(RESOURCE_ROOT / "issue-604.pdf"))
+
+    assert pdf_file_writer.open_destination is None
+    pdf_file_writer.open_destination = pdf_file_writer.pages[9]
+    # checked also using Acrobrat to verify the good page is opened
+    op = pdf_file_writer._root_object["/OpenAction"]
+    assert op[0] == pdf_file_writer.pages[9].indirect_reference
+    assert op[1] == "/Fit"
+    op = pdf_file_writer.open_destination
+    assert op.raw_get("/Page") == pdf_file_writer.pages[9].indirect_reference
+    assert op["/Type"] == "/Fit"
+    pdf_file_writer.open_destination = op
+    assert pdf_file_writer.open_destination == op
+
+    # irrelevant, just for coverage
+    pdf_file_writer._root_object[NameObject("/OpenAction")][0] = NumberObject(0)
+    pdf_file_writer.open_destination
+    with pytest.raises(Exception) as exc:
+        del pdf_file_writer._root_object[NameObject("/OpenAction")][0]
+        pdf_file_writer.open_destination
+    assert "Invalid Destination" in str(exc.value)
+
+    pdf_file_writer.open_destination = "Test"
+    # checked also using Acrobrat to verify open_destination
+    op = pdf_file_writer._root_object["/OpenAction"]
+    assert isinstance(op, TextStringObject)
+    assert op == "Test"
+    op = pdf_file_writer.open_destination
+    assert isinstance(op, TextStringObject)
+    assert op == "Test"
+
+    # irrelevant, this is just for coverage
+    pdf_file_writer._root_object[NameObject("/OpenAction")] = NumberObject(0)
+    assert pdf_file_writer.open_destination is None
+    pdf_file_writer.open_destination = None
+    assert "/OpenAction" not in pdf_file_writer._root_object
+    pdf_file_writer.open_destination = None
+
+
+@pytest.mark.enable_socket()
+def test_iss471():
+    url = "https://github.com/py-pdf/pypdf/files/9139245/book.pdf"
+    name = "book_471.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+
+    writer = PdfWriter()
+    writer.append(reader, excluded_fields=[])
+    assert isinstance(
+        writer.pages[0]["/Annots"][0].get_object()["/Dest"], TextStringObject
+    )
+
+
+@pytest.mark.enable_socket()
+def test_reset_translation():
+    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
+    name = "tika-924666.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader, (0, 10))
+    nb = len(writer._objects)
+    writer.append(reader, (0, 10))
+    assert (
+        len(writer._objects) == nb + 11
+    )  # +10 (pages) +1 because of the added outline
+    nb += 1
+    writer.reset_translation(reader)
+    writer.append(reader, (0, 10))
+    assert len(writer._objects) >= nb + 200
+    nb = len(writer._objects)
+    writer.reset_translation(reader.pages[0].indirect_reference)
+    writer.append(reader, (0, 10))
+    assert len(writer._objects) >= nb + 200
+    nb = len(writer._objects)
+    writer.reset_translation()
+    writer.append(reader, (0, 10))
+    assert len(writer._objects) >= nb + 200
+    nb = len(writer.pages)
+    writer.append(reader, [reader.pages[0], reader.pages[0]])
+    assert len(writer.pages) == nb + 2
+
+
+def test_threads_empty():
+    writer = PdfWriter()
+    thr = writer.threads
+    assert isinstance(thr, ArrayObject)
+    assert len(thr) == 0
+    thr2 = writer.threads
+    assert thr == thr2
+
+
+@pytest.mark.enable_socket()
+def test_append_without_annots_and_articles():
+    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
+    name = "tika-924666.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader, None, (0, 10), True, ["/B"])
+    writer.reset_translation()
+    writer.append(reader, (0, 10), True, ["/B"])
+    assert writer.threads == []
+    writer = PdfWriter()
+    writer.append(reader, None, (0, 10), True, ["/Annots"])
+    assert "/Annots" not in writer.pages[5]
+    writer = PdfWriter()
+    writer.append(reader, None, (0, 10), True, [])
+    assert "/Annots" in writer.pages[5]
+    assert len(writer.threads) >= 1
+
+
+@pytest.mark.enable_socket()
+def test_append_multiple():
+    url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
+    name = "tika-924666.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(
+        reader, [0, 0, 0]
+    )  # to demonstre multiple insertion of same page at once
+    writer.append(reader, [0, 0, 0])  # second pack
+    pages = writer._root_object["/Pages"]["/Kids"]
+    assert pages[0] not in pages[1:]  # page not repeated
+    assert pages[-1] not in pages[0:-1]  # page not repeated
+
+
+@pytest.mark.samples()
+def test_set_page_label(pdf_file_path):
+    src = RESOURCE_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"  # File without labels
+    reader = PdfReader(src)
+
+    expected = [
+        "i",
+        "ii",
+        "1",
+        "2",
+        "A",
+        "B",
+        "1",
+        "2",
+        "3",
+        "4",
+        "A",
+        "i",
+        "I",
+        "II",
+        "1",
+        "2",
+        "3",
+        "I",
+        "II",
+    ]
+
+    # Tests full lenght with labels assigned at first and last elements
+    # Tests different labels assigned to consecutive ranges
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(0, 1, "/r")
+    writer.set_page_label(4, 5, "/A")
+    writer.set_page_label(10, 10, "/A")
+    writer.set_page_label(11, 11, "/r")
+    writer.set_page_label(12, 13, "/R")
+    writer.set_page_label(17, 18, "/R")
+    writer.write(pdf_file_path)
+    assert PdfReader(pdf_file_path).page_labels == expected
+
+    writer = PdfWriter()  # Same labels, different set order
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(17, 18, "/R")
+    writer.set_page_label(4, 5, "/A")
+    writer.set_page_label(10, 10, "/A")
+    writer.set_page_label(0, 1, "/r")
+    writer.set_page_label(12, 13, "/R")
+    writer.set_page_label(11, 11, "/r")
+    writer.write(pdf_file_path)
+    assert PdfReader(pdf_file_path).page_labels == expected
+
+    # Tests labels assigned only in the middle
+    # Tests label assigned to a range already containing labled ranges
+    expected = ["1", "2", "i", "ii", "iii", "iv", "v", "1"]
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(3, 4, "/a")
+    writer.set_page_label(5, 5, "/A")
+    writer.set_page_label(2, 6, "/r")
+    writer.write(pdf_file_path)
+    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
+
+    # Tests labels assigned inside a previously existing range
+    expected = ["1", "2", "i", "a", "b", "A", "1", "1", "2"]
+    # Ones repeat because user didnt cover the entire original range
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(2, 6, "/r")
+    writer.set_page_label(3, 4, "/a")
+    writer.set_page_label(5, 5, "/A")
+    writer.write(pdf_file_path)
+    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
+
+    # Tests invalid user input
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    with pytest.raises(
+        ValueError, match="at least one between style and prefix must be given"
+    ):
+        writer.set_page_label(0, 5, start=2)
+    with pytest.raises(
+        ValueError, match="page_index_from must be equal or greater then 0"
+    ):
+        writer.set_page_label(-1, 5, "/r")
+    with pytest.raises(
+        ValueError, match="page_index_to must be equal or greater then page_index_from"
+    ):
+        writer.set_page_label(5, 0, "/r")
+    with pytest.raises(ValueError, match="page_index_to exceeds number of pages"):
+        writer.set_page_label(0, 19, "/r")
+    with pytest.raises(
+        ValueError, match="if given, start must be equal or greater than one"
+    ):
+        writer.set_page_label(0, 5, "/r", start=-1)
+
+    pdf_file_path.unlink()
+
+    src = (
+        SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf"
+    )  # File with pre existing labels
+    reader = PdfReader(src)
+
+    # Tests adding labels to existing ones
+    expected = ["i", "ii", "A", "B", "1"]
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(2, 3, "/A")
+    writer.write(pdf_file_path)
+    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
+
+    # Tests replacing existing lables
+    expected = ["A", "B", "1", "1", "2"]
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.set_page_label(0, 1, "/A")
+    writer.write(pdf_file_path)
+    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected
+
+    pdf_file_path.unlink()
+
+    # Tests prefix and start.
+    src = RESOURCE_ROOT / "issue-604.pdf"  # File without page labels
+    reader = PdfReader(src)
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+
+    writer.set_page_label(0, 0, prefix="FRONT")
+    writer.set_page_label(1, 2, "/D", start=2)
+    writer.set_page_label(3, 6, prefix="UPDATES")
+    writer.set_page_label(7, 10, "/D", prefix="THYR-")
+    writer.set_page_label(11, 21, "/D", prefix="PAP-")
+    writer.set_page_label(22, 30, "/D", prefix="FOLL-")
+    writer.set_page_label(31, 39, "/D", prefix="HURT-")
+    writer.write(pdf_file_path)
+
+
+@pytest.mark.enable_socket()
+def test_iss1601():
+    url = "https://github.com/py-pdf/pypdf/files/10579503/badges-38.pdf"
+    name = "badge-38.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    original_cs_operations = ContentStream(
+        reader.pages[0].get_contents(), reader
+    ).operations
+    writer = PdfWriter()
+    page_1 = writer.add_blank_page(
+        reader.pages[0].mediabox[2], reader.pages[0].mediabox[3]
+    )
+    page_1.merge_transformed_page(reader.pages[0], Transformation())
+    page_1_cs_operations = page_1.get_contents().operations
+    assert is_sublist(original_cs_operations, page_1_cs_operations)
+    page_1 = writer.add_blank_page(
+        reader.pages[0].mediabox[2], reader.pages[0].mediabox[3]
+    )
+    page_1.merge_page(reader.pages[0])
+    page_1_cs_operations = page_1.get_contents().operations
+    assert is_sublist(original_cs_operations, page_1_cs_operations)
+
+
+def test_attachments():
+    writer = PdfWriter()
+    writer.add_blank_page(100, 100)
+    b = BytesIO()
+    writer.write(b)
+    b.seek(0)
+    reader = PdfReader(b)
+    b = None
+    assert reader.attachments == {}
+    assert reader._list_attachments() == []
+    assert reader._get_attachments() == {}
+    to_add = [
+        ("foobar.txt", b"foobarcontent"),
+        ("foobar2.txt", b"foobarcontent2"),
+        ("foobar2.txt", b"2nd_foobarcontent"),
+    ]
+    for name, content in to_add:
+        writer.add_attachment(name, content)
+
+    b = BytesIO()
+    writer.write(b)
+    b.seek(0)
+    reader = PdfReader(b)
+    b = None
+    assert sorted(reader.attachments.keys()) == sorted({name for name, _ in to_add})
+    assert reader.attachments == {
+        "foobar.txt": [b"foobarcontent"],
+        "foobar2.txt": [b"2nd_foobarcontent"],
+    }
+    writer.add_attachment("foobar2.txt", b"overwrite_ignored", overwrite=False)
+    assert reader.attachments == {
+        "foobar.txt": [b"foobarcontent"],
+        "foobar2.txt": [b"2nd_foobarcontent"],
+    }
+    _l = list({name for name, _ in to_add})
+    _l.sort()
+    assert reader._list_attachments() == _l
+
+    # We've added the same key twice - hence only 2 and not 3:
+    att = reader._get_attachments()
+    assert len(att) == 2  # we have 2 keys, but 3 attachments!
+
+    # The content for foobar.txt is clear and just a single value:
+    assert att["foobar.txt"] == b"foobarcontent"
+
+    # The content for foobar2.txt is a list!
+    att = reader._get_attachments("foobar2.txt")
+    assert len(att) == 1
+    assert att["foobar2.txt"] == [b"2nd_foobarcontent"]
+
+    # Let's do both cases with the public interface:
+    assert reader.attachments["foobar.txt"][0] == b"foobarcontent"
+    assert reader.attachments["foobar2.txt"][0] == b"2nd_foobarcontent"
+    assert len(reader.attachments["foobar2.txt"]) == 1
+
+
+@pytest.mark.enable_socket()
+def test_iss1614():
+    # test of an annotation(link) directly stored in the /Annots in the page
+    url = "https://github.com/py-pdf/pypdf/files/10669995/broke.pdf"
+    name = "iss1614.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader)
+    # test for 2nd error case reported in #1614
+    url = "https://github.com/py-pdf/pypdf/files/10696390/broken.pdf"
+    name = "iss1614.2.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer.append(reader)
+
+
+@pytest.mark.enable_socket()
+def test_new_removes():
+    # test of an annotation(link) directly stored in the /Annots in the page
+    url = "https://github.com/py-pdf/pypdf/files/10807951/tt.pdf"
+    name = "iss1650.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.remove_images()
+    b = BytesIO()
+    writer.write(b)
+    bb = bytes(b.getbuffer())
+    assert b"/Im0 Do" not in bb
+    assert b"/Fm0 Do" in bb
+    assert b" TJ" in bb
+
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    writer.remove_text()
+    b = BytesIO()
+    writer.write(b)
+    bb = bytes(b.getbuffer())
+    assert b"/Im0" in bb
+    assert b"Chap" not in bb
+    assert b" TJ" not in bb
+
+    url = "https://github.com/py-pdf/pypdf/files/10832029/tt2.pdf"
+    name = "GeoBaseWithComments.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer.append(reader)
+    writer.remove_objects_from_page(writer.pages[0], [ObjectDeletionFlag.LINKS])
+    assert "/Links" not in [
+        a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]
+    ]
+    writer.remove_objects_from_page(writer.pages[0], ObjectDeletionFlag.ATTACHMENTS)
+    assert "/FileAttachment" not in [
+        a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]
+    ]
+
+    writer.pages[0]["/Annots"].append(
+        DictionaryObject({NameObject("/Subtype"): TextStringObject("/3D")})
+    )
+    assert "/3D" in [a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]]
+    writer.remove_objects_from_page(writer.pages[0], ObjectDeletionFlag.OBJECTS_3D)
+    assert "/3D" not in [a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]]
+
+    writer.remove_links()
+    assert len(writer.pages[0]["/Annots"]) == 0
+    assert len(writer.pages[3]["/Annots"]) == 0
+
+    writer.remove_annotations("/Text")
+
+
+@pytest.mark.enable_socket()
+def test_late_iss1654():
+    url = "https://github.com/py-pdf/pypdf/files/10935632/bid1.pdf"
+    name = "bid1.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.clone_document_from_reader(reader)
+    for p in writer.pages:
+        p.compress_content_streams()
+    b = BytesIO()
+    writer.write(b)
+
+
+@pytest.mark.enable_socket()
+def test_iss1723():
+    # test of an annotation(link) directly stored in the /Annots in the page
+    url = "https://github.com/py-pdf/pypdf/files/11015242/inputFile.pdf"
+    name = "iss1723.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader, (3, 5))
+
+
+@pytest.mark.enable_socket()
+def test_iss1767():
+    # test with a pdf which is buggy because the object 389,0 exists 3 times:
+    # twice to define catalog and one as an XObject inducing a loop when
+    # cloning
+    url = "https://github.com/py-pdf/pypdf/files/11138472/test.pdf"
+    name = "iss1723.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    PdfWriter(clone_from=reader)
+
+
+@pytest.mark.enable_socket()
+def test_named_dest_page_number():
+    """
+    Closes iss471
+    tests appending with named destinations as integers
+    """
+    url = "https://github.com/py-pdf/pypdf/files/10704333/central.pdf"
+    name = "central.pdf"
+    writer = PdfWriter()
+    writer.add_blank_page(100, 100)
+    writer.append(BytesIO(get_data_from_url(url, name=name)), pages=[0, 1, 2])
+    assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 2
+    assert writer._root_object["/Names"]["/Dests"]["/Names"][-1][0] == (1 + 1)
+    writer.append(BytesIO(get_data_from_url(url, name=name)))
+    assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 6
+    writer2 = PdfWriter()
+    writer2.add_blank_page(100, 100)
+    dest = writer2.add_named_destination("toto", 0)
+    dest.get_object()[NameObject("/D")][0] = NullObject()
+    b = BytesIO()
+    writer2.write(b)
+    b.seek(0)
+    writer.append(b)
+    assert len(writer._root_object["/Names"]["/Dests"]["/Names"]) == 6
+
+
+@pytest.mark.parametrize(
+    ("write_data_here", "needs_cleanup"),
+    [
+        (
+            "dont_commit_writer.pdf",
+            True,
+        )
+    ],
+)
+def test_update_form_fields(write_data_here, needs_cleanup):
+    writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf")
+    writer.update_page_form_field_values(
+        writer.pages[0],
+        {
+            "CheckBox1": "/Yes",
+            "Text1": "mon Text1",
+            "Text2": "ligne1\nligne2",
+            "RadioGroup1": "/2",
+            "RdoS1": "/",
+            "Combo1": "!!monCombo!!",
+            "Liste1": "Liste2",
+            "Liste2": ["Lst1", "Lst3"],
+            "DropList1": "DropListe3",
+        },
+        auto_regenerate=False,
+    )
+    del writer.pages[0]["/Annots"][1].get_object()["/AP"]["/N"]
+    writer.update_page_form_field_values(
+        writer.pages[0],
+        {"Text1": "my Text1", "Text2": "ligne1\nligne2\nligne3"},
+        auto_regenerate=False,
+    )
+
+    writer.write("dont_commit_writer.pdf")
+    reader = PdfReader("dont_commit_writer.pdf")
+    flds = reader.get_fields()
+    assert flds["CheckBox1"]["/V"] == "/Yes"
+    assert flds["CheckBox1"].indirect_reference.get_object()["/AS"] == "/Yes"
+    assert (
+        b"(my Text1)"
+        in flds["Text1"].indirect_reference.get_object()["/AP"]["/N"].get_data()
+    )
+    assert flds["Text2"]["/V"] == "ligne1\nligne2\nligne3"
+    assert (
+        b"(ligne3)"
+        in flds["Text2"].indirect_reference.get_object()["/AP"]["/N"].get_data()
+    )
+    assert flds["RadioGroup1"]["/V"] == "/2"
+    assert flds["RadioGroup1"]["/Kids"][0].get_object()["/AS"] == "/Off"
+    assert flds["RadioGroup1"]["/Kids"][1].get_object()["/AS"] == "/2"
+    assert all(x in flds["Liste2"]["/V"] for x in ["Lst1", "Lst3"])
+
+    assert all(x in flds["CheckBox1"]["/_States_"] for x in ["/Off", "/Yes"])
+    assert all(x in flds["RadioGroup1"]["/_States_"] for x in ["/1", "/2", "/3"])
+    assert all(x in flds["Liste1"]["/_States_"] for x in ["Liste1", "Liste2", "Liste3"])
+
+    if needs_cleanup:
+        Path(write_data_here).unlink()
+
+
+@pytest.mark.enable_socket()
+def test_iss1862():
+    # The file here has "/B" entry to define the font in a object below the page
+    # The excluded field shall be considered only at first level (page) and not
+    # below
+    url = "https://github.com/py-pdf/pypdf/files/11708801/intro.pdf"
+    name = "iss1862.pdf"
+    writer = PdfWriter()
+    writer.append(BytesIO(get_data_from_url(url, name=name)))
+    # check that "/B" is in the font
+    writer.pages[0]["/Resources"]["/Font"]["/F1"]["/CharProcs"]["/B"].get_data()
+
+
+def test_empty_objects_before_cloning():
+    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
+    reader = PdfReader(pdf_path)
+    writer = PdfWriter(clone_from=reader)
+    nb_obj_reader = len(reader.xref_objStm) + sum(
+        len(reader.xref[i]) for i in reader.xref
+    )
+    nb_obj_reader -= 1  # for trailer
+    nb_obj_reader -= len(
+        {x: 1 for x, y in reader.xref_objStm.values()}
+    )  # to remove object streams
+    assert len(writer._objects) == nb_obj_reader
+
+
+@pytest.mark.enable_socket()
+def test_watermark():
+    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
+    name = "bgwatermark.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    url = "https://github.com/py-pdf/pypdf/files/11985888/source.pdf"
+    name = "srcwatermark.pdf"
+    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
+    for p in writer.pages:
+        p.merge_page(reader.pages[0], over=False)
+
+    assert isinstance(p["/Contents"], ArrayObject)
+    assert isinstance(p["/Contents"][0], IndirectObject)
+
+    b = BytesIO()
+    writer.write(b)
+    assert len(b.getvalue()) < 2.1 * 1024 * 1024
+
+
+@pytest.mark.enable_socket()
+@pytest.mark.timeout(4)
+def test_watermarking_speed():
+    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
+    name = "bgwatermark.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    url = "https://arxiv.org/pdf/2201.00214.pdf"
+    name = "2201.00214.pdf"
+    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
+    for p in writer.pages:
+        p.merge_page(reader.pages[0], over=False)
+    out_pdf_bytesio = BytesIO()
+    writer.write(out_pdf_bytesio)
+    pdf_size_in_mib = len(out_pdf_bytesio.getvalue()) / 1024 / 1024
+    assert pdf_size_in_mib < 20
+
+
+@pytest.mark.enable_socket()
+@pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript")
+def test_watermark_rendering(tmp_path):
+    """Ensure the visual appearance of watermarking stays correct."""
+    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
+    name = "bgwatermark.pdf"
+    watermark = PdfReader(BytesIO(get_data_from_url(url, name=name))).pages[0]
+    url = "https://github.com/py-pdf/pypdf/files/11985888/source.pdf"
+    name = "srcwatermark.pdf"
+    page = PdfReader(BytesIO(get_data_from_url(url, name=name))).pages[0]
+    writer = PdfWriter()
+    page.merge_page(watermark, over=False)
+    writer.add_page(page)
+
+    target_png_path = tmp_path / "target.png"
+    url = "https://github.com/py-pdf/pypdf/assets/96178532/d5c72d0e-7047-4504-bbf6-bc591c80d7c0"
+    name = "dstwatermark.png"
+    target_png_path.write_bytes(get_data_from_url(url, name=name))
+
+    pdf_path = tmp_path / "out.pdf"
+    png_path = tmp_path / "out.png"
+    writer.write(pdf_path)
+
+    # False positive: https://github.com/PyCQA/bandit/issues/333
+    subprocess.run(
+        [  # noqa: S603
+            GHOSTSCRIPT_BINARY,
+            "-sDEVICE=pngalpha",
+            "-o",
+            png_path,
+            pdf_path,
+        ]
+    )
+    assert png_path.is_file()
+    assert image_similarity(png_path, target_png_path) >= 0.95
+
+
+@pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript")
+def test_watermarking_reportlab_rendering(tmp_path):
+    """
+    This test is showing a rotated+mirrored watermark in pypdf==3.15.4.
+
+    Replacing the generate_base with e.g. the crazyones did not show the issue.
+    """
+    base_path = SAMPLE_ROOT / "022-pdfkit/pdfkit.pdf"
+    watermark_path = SAMPLE_ROOT / "013-reportlab-overlay/reportlab-overlay.pdf"
+
+    reader = PdfReader(base_path)
+    base_page = reader.pages[0]
+    watermark = PdfReader(watermark_path).pages[0]
+
+    writer = PdfWriter()
+    base_page.merge_page(watermark)
+    writer.add_page(base_page)
+
+    target_png_path = RESOURCE_ROOT / "test_watermarking_reportlab_rendering.png"
+    pdf_path = tmp_path / "out.pdf"
+    png_path = tmp_path / "test_watermarking_reportlab_rendering.png"
+
+    writer.write(pdf_path)
+    # False positive: https://github.com/PyCQA/bandit/issues/333
+    subprocess.run(
+        [  # noqa: S603
+            GHOSTSCRIPT_BINARY,
+            "-r120",
+            "-sDEVICE=pngalpha",
+            "-o",
+            png_path,
+            pdf_path,
+        ]
+    )
+    assert png_path.is_file()
+    assert image_similarity(png_path, target_png_path) >= 0.999
+
+
+@pytest.mark.enable_socket()
+def test_da_missing_in_annot():
+    url = "https://github.com/py-pdf/pypdf/files/12136285/Building.Division.Permit.Application.pdf"
+    name = "BuildingDivisionPermitApplication.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter(clone_from=reader)
+    writer.update_page_form_field_values(
+        writer.pages[0], {"PCN-1": "0"}, auto_regenerate=False
+    )
+    b = BytesIO()
+    writer.write(b)
+    reader = PdfReader(BytesIO(b.getvalue()))
+    ff = reader.get_fields()
+    # check for autosize processing
+    assert (
+        b"0 Tf"
+        not in ff["PCN-1"].indirect_reference.get_object()["/AP"]["/N"].get_data()
+    )
+    f2 = writer.get_object(ff["PCN-2"].indirect_reference.idnum)
+    f2[NameObject("/Parent")] = writer.get_object(
+        ff["PCN-1"].indirect_reference.idnum
+    ).indirect_reference
+    writer.update_page_form_field_values(
+        writer.pages[0], {"PCN-2": "1"}, auto_regenerate=False
+    )
+
+
+def test_missing_fields(pdf_file_path):
+    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
+
+    writer = PdfWriter()
+    writer.add_page(reader.pages[0])
+
+    with pytest.raises(PyPdfError) as exc:
+        writer.update_page_form_field_values(
+            writer.pages[0], {"foo": "some filled in text"}, flags=1
+        )
+    assert exc.value.args[0] == "No /AcroForm dictionary in PdfWriter Object"
+
+    writer = PdfWriter()
+    writer.append(reader, [0])
+    del writer._root_object["/AcroForm"]["/Fields"]
+    with pytest.raises(PyPdfError) as exc:
+        writer.update_page_form_field_values(
+            writer.pages[0], {"foo": "some filled in text"}, flags=1
+        )
+    assert exc.value.args[0] == "No /Fields dictionary in Pdf in PdfWriter Object"
+
+
+def test_missing_info():
+    reader = PdfReader(RESOURCE_ROOT / "missing_info.pdf")
+
+    writer = PdfWriter(clone_from=reader)
+    assert len(writer.pages) == len(reader.pages)
+
+
+@pytest.mark.enable_socket()
+def test_germanfields():
+    """Cf #2035"""
+    url = "https://github.com/py-pdf/pypdf/files/12194195/test.pdf"
+    name = "germanfields.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter(clone_from=reader)
+    form_fields = {"Text Box 1": "test æ ø å"}
+    writer.update_page_form_field_values(
+        writer.pages[0], form_fields, auto_regenerate=False
+    )
+    bytes_stream = BytesIO()
+    writer.write(bytes_stream)
+    bytes_stream.seek(0)
+    reader2 = PdfReader(bytes_stream)
+    assert (
+        b"test \xe6 \xf8 \xe5"
+        in reader2.get_fields()["Text Box 1"]
+        .indirect_reference.get_object()["/AP"]["/N"]
+        .get_data()
+    )
+
+
+@pytest.mark.enable_socket()
+def test_no_t_in_articles():
+    """Cf #2078"""
+    url = "https://github.com/py-pdf/pypdf/files/12311735/bad.pdf"
+    name = "iss2078.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader)
+
+
+@pytest.mark.enable_socket()
+def test_no_i_in_articles():
+    """Cf #2089"""
+    url = "https://github.com/py-pdf/pypdf/files/12352793/kim2002.pdf"
+    name = "iss2089.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader)
+
+
+@pytest.mark.enable_socket()
+def test_damaged_pdf_length_returning_none():
+    """
+    Cf #140
+    https://github.com/py-pdf/pypdf/issues/140#issuecomment-1685380549
+    """
+    url = "https://github.com/py-pdf/pypdf/files/12168578/bad_pdf_example.pdf"
+    name = "iss140_bad_pdf.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    writer.append(reader)
+
+
+@pytest.mark.enable_socket()
+def test_viewerpreferences():
+    """Add Tests for ViewerPreferences"""
+    url = "https://github.com/py-pdf/pypdf/files/9175966/2015._pb_decode_pg0.pdf"
+    name = "2015._pb_decode_pg0.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    v = reader.viewer_preferences
+    assert v.center_window == True  # noqa: E712
+    writer = PdfWriter(clone_from=reader)
+    v = writer.viewer_preferences
+    assert v.center_window == True  # noqa: E712
+    v.center_window = False
+    assert (
+        writer._root_object["/ViewerPreferences"]["/CenterWindow"]
+        == False  # noqa: E712
+    )
+    assert v.print_area == "/CropBox"
+    with pytest.raises(ValueError):
+        v.non_fullscreen_pagemode = "toto"
+    with pytest.raises(ValueError):
+        v.non_fullscreen_pagemode = "/toto"
+    v.non_fullscreen_pagemode = "/UseOutlines"
+    assert (
+        writer._root_object["/ViewerPreferences"]["/NonFullScreenPageMode"]
+        == "/UseOutlines"
+    )
+    writer = PdfWriter(clone_from=reader)
+    v = writer.viewer_preferences
+    assert v.center_window == True  # noqa: E712
+    v.center_window = False
+    assert (
+        writer._root_object["/ViewerPreferences"]["/CenterWindow"]
+        == False  # noqa: E712
+    )
+
+    writer = PdfWriter(clone_from=reader)
+    writer._root_object[NameObject("/ViewerPreferences")] = writer._add_object(
+        writer._root_object["/ViewerPreferences"]
+    )
+    v = writer.viewer_preferences
+    v.center_window = False
+    assert (
+        writer._root_object["/ViewerPreferences"]["/CenterWindow"]
+        == False  # noqa: E712
+    )
+    v.num_copies = 1
+    assert v.num_copies == 1
+    assert v.print_pagerange is None
+    with pytest.raises(ValueError):
+        v.print_pagerange = "toto"
+    v.print_pagerange = ArrayObject()
+    assert len(v.print_pagerange) == 0
+
+    writer.create_viewer_preferences()
+    assert len(writer._root_object["/ViewerPreferences"]) == 0
+    writer.viewer_preferences.direction = "/R2L"
+    assert len(writer._root_object["/ViewerPreferences"]) == 1
+
+    del reader.trailer["/Root"]["/ViewerPreferences"]
+    assert reader.viewer_preferences is None
+    writer = PdfWriter(clone_from=reader)
+    assert writer.viewer_preferences is None
+
+
+def test_extra_spaces_in_da_text(caplog):
+    writer = PdfWriter(clone_from=RESOURCE_ROOT / "form.pdf")
+    t = writer.pages[0]["/Annots"][0].get_object()["/DA"]
+    t = t.replace("/Helv", "/Helv   ")
+    writer.pages[0]["/Annots"][0].get_object()[NameObject("/DA")] = TextStringObject(t)
+    writer.update_page_form_field_values(
+        writer.pages[0], {"foo": "abcd"}, auto_regenerate=False
+    )
+    t = writer.pages[0]["/Annots"][0].get_object()["/AP"]["/N"].get_data()
+    assert "Font dictionary for  not found." not in caplog.text
+    assert b"/Helv" in t
+    assert b"(abcd)" in t
+
+
+@pytest.mark.enable_socket()
+def test_object_contains_indirect_reference_to_self():
+    url = "https://github.com/py-pdf/pypdf/files/12389243/testbook.pdf"
+    name = "iss2102.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    writer = PdfWriter()
+    width, height = 595, 841
+    outpage = writer.add_blank_page(width, height)
+    outpage.merge_page(reader.pages[6])
+    writer.append(reader)

From be002732ea5bc3d9034eaf24694164fc57c5595b Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Fri, 6 Oct 2023 07:09:32 +0200
Subject: [PATCH 05/13] fix

---
 pypdf/_writer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index bea4c11ab..e77e4e9d9 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -807,7 +807,7 @@ def add_attachment(
             return None
         if fname is None:
             st = filename.replace("/", "\\/").replace("\\\\/", "\\/")
-            fname = st.encode().decode("ansi", errors="xmlcharreplace")
+            fname = st.encode().decode("ascii", errors="xmlcharreplace")
             fname = f"{fname}"  # to escape string
 
         # We need three entries:

From 6e16e47bb7ce4d4fd97613429881948ef44acacf Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Fri, 13 Oct 2023 21:55:42 +0200
Subject: [PATCH 06/13] coverage

---
 pypdf/_reader.py                  | 34 +++++++++++++++++++++++++------
 pypdf/generic/__init__.py         |  4 ++--
 pypdf/generic/_base.py            |  1 +
 pypdf/generic/_data_structures.py | 23 +++++++++++----------
 tests/test_generic.py             |  8 ++++++++
 tests/test_reader.py              |  5 +++++
 6 files changed, 56 insertions(+), 19 deletions(-)

diff --git a/pypdf/_reader.py b/pypdf/_reader.py
index f5d0c5ada..f6ce71ae1 100644
--- a/pypdf/_reader.py
+++ b/pypdf/_reader.py
@@ -101,9 +101,11 @@
     NullObject,
     NumberObject,
     PdfObject,
+    StreamObject,
     TextStringObject,
     TreeObject,
     ViewerPreferences,
+    get_from_file_specification,
     read_object,
 )
 from .types import OutlineType, PagemodeType
@@ -2316,12 +2318,32 @@ def _get_attachments(
             return {k: v if len(v) > 1 else v[0] for k, v in self.attachments.items()}  # type: ignore
         else:
             lst = ef.list_get(filename)
-            return {
-                filename: [(x.get_object())["/EF"].get_object(  # type: ignore
-                    )["/F"].get_object().get_data() for x in lst]  # type: ignore
-                if isinstance(lst, list)
-                else (lst.get_object())["/EF"].get_object()["/F"].get_object().get_data()  # type: ignore
-            }
+            if lst is None:
+                return {}
+            lst = cast(DictionaryObject, lst.get_object())
+            efo = cast(DictionaryObject, lst["/EF"].get_object())
+            rst = cast(
+                StreamObject,
+                get_from_file_specification(efo).get_object(),
+            ).get_data()
+            if isinstance(rst, str):
+                rst = rst.encode()
+            if "/RF" not in lst:
+                return {filename: [rst]}
+            else:
+                rst2 = {"": rst}  # /EF will be returned by empty key
+                lst = cast(
+                    ArrayObject,
+                    get_from_file_specification(
+                        cast(DictionaryObject, lst["/RF"].get_object())
+                    ),
+                )
+                for i in range(0, len(lst), 2):
+                    t = cast(StreamObject, lst[i + 1].get_object()).get_data()
+                    if isinstance(t, str):
+                        t = t.encode()
+                    rst2[lst[i]] = t
+                return {filename: [rst2]}
 
 
 class PdfFileReader(PdfReader):  # deprecated
diff --git a/pypdf/generic/__init__.py b/pypdf/generic/__init__.py
index bed5eb601..9288bf050 100644
--- a/pypdf/generic/__init__.py
+++ b/pypdf/generic/__init__.py
@@ -56,7 +56,7 @@
     NameTree,
     StreamObject,
     TreeObject,
-    get_name_from_file_specification,
+    get_from_file_specification,
     read_object,
 )
 from ._fit import Fit
@@ -447,7 +447,7 @@ def link(
     "Field",
     "Destination",
     "NameTree",
-    "get_name_from_file_specification",
+    "get_from_file_specification",
     "ViewerPreferences",
     # --- More specific stuff
     # Outline
diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py
index c17dcbea6..d429f2724 100644
--- a/pypdf/generic/_base.py
+++ b/pypdf/generic/_base.py
@@ -319,6 +319,7 @@ def replace_object(self, obj: "PdfObject") -> None:
         Replace the pointed object with obj
         Only applies to IndirectObjects within a PdfWriter
         """
+        obj = cast("PdfObject", obj.get_object())
         pdf = self.pdf
         if not hasattr(pdf, "_replace_object"):
             raise TypeError("Trying to replace Object in a non PdfWriter")
diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index a0c56c69f..236ed8fe0 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1545,7 +1545,7 @@ def _append_with_dup(
         _list(self, _l)
         return dict(_l)
 
-    def list_get(self, key: str) -> List[PdfObject]:
+    def list_get(self, key: str) -> Optional[PdfObject]:
         """
         Get the entry from the Name Tree
 
@@ -1557,10 +1557,9 @@ def list_get(self, key: str) -> List[PdfObject]:
         attributeEntries as a dictionary
         """
 
-        def _get(key: str, o: Optional[PdfObject]) -> List[PdfObject]:
+        def _get(key: str, o: Optional[PdfObject]) -> Optional[PdfObject]:
             if o is None:
-                return []
-            rst = []
+                return None
             o = cast(DictionaryObject, o)
             _l = o.get("/Names", None)
             a = o.get("/Kids", None)
@@ -1568,10 +1567,12 @@ def _get(key: str, o: Optional[PdfObject]) -> List[PdfObject]:
             a = a.get_object() if a else []
             for i, x in enumerate(_l):
                 if x == key:
-                    rst.append(_l[i + 1])
+                    return _l[i + 1]
             for x in a:
-                rst.extend(_get(key, x))
-            return rst
+                v = _get(key, x)
+                if v is not None:
+                    return v
+            return None  # if we arrive here, it means nothing matched
 
         return _get(key, self)
 
@@ -1676,14 +1677,14 @@ def _add_in(
         return o.indirect_reference if o is not None else None
 
 
-def get_name_from_file_specification(_a: DictionaryObject) -> str:
-    return cast(
-        str,
+def get_from_file_specification(_a: DictionaryObject) -> PdfObject:
+    return (
         _a.get("/UF")
         or _a.get("/F")
         or _a.get("/DOS")
         or _a.get("/Unix")
-        or _a.get("/Mac"),
+        or _a.get("/Mac")
+        or DictionaryObject()
     )
 
 
diff --git a/tests/test_generic.py b/tests/test_generic.py
index 0e0fff677..0dd724686 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -1235,3 +1235,11 @@ def test_encodedstream_set_data():
     assert cc["/Filter"] == ["/FlateDecode", "/FlateDecode", "/FlateDecode"]
     assert str(cc["/DecodeParms"]) == "[NullObject, NullObject, NullObject]"
     assert cc[NameObject("/Test")] == "/MyTest"
+
+
+def test_replace_object():
+    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
+    i = writer.pages[0]["/Contents"][0].idnum
+    writer.pages[0]["/Contents"][0].replace_object(NullObject())
+    assert writer.pages[0]["/Contents"][0].idnum == i
+    assert isinstance(writer.pages[0]["/Contents"][0].get_object(), NullObject)
diff --git a/tests/test_reader.py b/tests/test_reader.py
index 8afb45737..cae355103 100644
--- a/tests/test_reader.py
+++ b/tests/test_reader.py
@@ -1454,3 +1454,8 @@ def test_issue_140():
     b = get_data_from_url(url, name=name)
     reader = PdfReader(BytesIO(b))
     assert len(reader.pages) == 54
+
+
+def test_embedded_files_no_ef():
+    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
+    assert reader.embedded_files is None

From cf997de562aa7cf2a424f09cf33d41e60b303e75 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 16 Oct 2023 21:48:31 +0200
Subject: [PATCH 07/13] coverage

---
 tests/test_reader.py |  5 +++--
 tests/test_writer.py | 21 ++++++++++++++++++++-
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/tests/test_reader.py b/tests/test_reader.py
index 067d1e66a..37e7fdfbc 100644
--- a/tests/test_reader.py
+++ b/tests/test_reader.py
@@ -1455,7 +1455,7 @@ def test_issue_140():
     reader = PdfReader(BytesIO(b))
     assert len(reader.pages) == 54
 
-    
+
 @pytest.mark.enable_socket()
 def test_xyz_with_missing_param():
     """Cf #2236"""
@@ -1470,4 +1470,5 @@ def test_xyz_with_missing_param():
 
 def test_embedded_files_no_ef():
     reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
-    assert reader.embedded_files is None
\ No newline at end of file
+    reader[NameObject("/Names")] = DictionaryObject()
+    assert reader.embedded_files is None
diff --git a/tests/test_writer.py b/tests/test_writer.py
index 667c5d4fc..8c26124e0 100644
--- a/tests/test_writer.py
+++ b/tests/test_writer.py
@@ -1321,27 +1321,46 @@ def test_attachments():
         "foobar.txt": [b"foobarcontent"],
         "foobar2.txt": [b"2nd_foobarcontent"],
     }
+    assert writer.attachments == {
+        "foobar.txt": [b"foobarcontent"],
+        "foobar2.txt": [b"2nd_foobarcontent"],
+    }
     _l = list({name for name, _ in to_add})
     _l.sort()
     assert reader._list_attachments() == _l
+    assert writer._list_attachments() == _l
 
     # We've added the same key twice - hence only 2 and not 3:
     att = reader._get_attachments()
-    assert len(att) == 2  # we have 2 keys, but 3 attachments!
+    assert len(att) == 2
 
     # The content for foobar.txt is clear and just a single value:
     assert att["foobar.txt"] == b"foobarcontent"
 
+    # Not applicable for writer
+    # att = writer._get_attachments()
+    # assert len(att) == 2  # we have 2 keys only
+    # assert att["foobar.txt"] == b"foobarcontent"
+
     # The content for foobar2.txt is a list!
     att = reader._get_attachments("foobar2.txt")
     assert len(att) == 1
     assert att["foobar2.txt"] == [b"2nd_foobarcontent"]
 
+    # The content for foobar2.txt is a list!
+    # att = writer._get_attachments("foobar2.txt")
+    # assert len(att) == 1
+    # assert att["foobar2.txt"] == [b"2nd_foobarcontent"]
+
     # Let's do both cases with the public interface:
     assert reader.attachments["foobar.txt"][0] == b"foobarcontent"
     assert reader.attachments["foobar2.txt"][0] == b"2nd_foobarcontent"
     assert len(reader.attachments["foobar2.txt"]) == 1
 
+    assert writer.attachments["foobar.txt"][0] == b"foobarcontent"
+    assert writer.attachments["foobar2.txt"][0] == b"2nd_foobarcontent"
+    assert len(writer.attachments["foobar2.txt"]) == 1
+
 
 @pytest.mark.enable_socket()
 def test_iss1614():

From ba983a8dca45fb893b0958a064f6bda5aec302e0 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Mon, 16 Oct 2023 22:04:43 +0200
Subject: [PATCH 08/13] oups

---
 tests/test_reader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_reader.py b/tests/test_reader.py
index 37e7fdfbc..71d137236 100644
--- a/tests/test_reader.py
+++ b/tests/test_reader.py
@@ -1470,5 +1470,5 @@ def test_xyz_with_missing_param():
 
 def test_embedded_files_no_ef():
     reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
-    reader[NameObject("/Names")] = DictionaryObject()
+    reader.trailer["/Root"][NameObject("/Names")] = DictionaryObject()
     assert reader.embedded_files is None

From fcc1353b976d654be29168f77e4bbcbcbed56985 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Tue, 17 Oct 2023 19:41:44 +0200
Subject: [PATCH 09/13] coverage

---
 pypdf/generic/_data_structures.py | 5 +----
 tests/test_generic.py             | 8 ++++++++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index ed940e50a..9cdf6e2d9 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1467,10 +1467,7 @@ def __init__(self, obj: Optional[PdfObject] = None) -> None:
             x not in obj for x in ("/Names", "/Kids")
         ):
             raise ValueError("source object is not a valid source object")
-        if obj is not None:
-            self.update(obj)
-        else:  # building a new Name Tree
-            self[NameObject("/Names")] = ArrayObject()
+        self.update(obj)
         if hasattr(obj, "indirect_reference"):
             self.indirect_reference = obj.indirect_reference
 
diff --git a/tests/test_generic.py b/tests/test_generic.py
index 0dd724686..5f776d5ed 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -19,6 +19,7 @@
     FloatObject,
     IndirectObject,
     NameObject,
+    NameTree,
     NullObject,
     NumberObject,
     OutlineItem,
@@ -1243,3 +1244,10 @@ def test_replace_object():
     writer.pages[0]["/Contents"][0].replace_object(NullObject())
     assert writer.pages[0]["/Contents"][0].idnum == i
     assert isinstance(writer.pages[0]["/Contents"][0].get_object(), NullObject)
+
+
+def test_nametree():
+    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
+    with pytest.raises(ValueError):
+        NameTree(writer._root_object)
+    writer._root_object[NameObject("/Names")] = DictionaryObject()

From 90a3408dfc9c17d7c4d11290cb1e4d6ce64401c2 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Tue, 17 Oct 2023 19:50:10 +0200
Subject: [PATCH 10/13] TST: Fix test_image_without_pillow in windows
 environment

fixes test failure in windows environment
---
 tests/test_filters.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/tests/test_filters.py b/tests/test_filters.py
index 12819c43b..9cfea57b5 100644
--- a/tests/test_filters.py
+++ b/tests/test_filters.py
@@ -265,9 +265,11 @@ def test_image_without_pillow(tmp_path):
     name = "tika-914102.pdf"
     _ = get_data_from_url(url, name=name)
     pdf_path = Path(__file__).parent / "pdf_cache" / name
+    pdf_path_str = str(pdf_path.resolve()).replace("\\", "/")
 
     source_file = tmp_path / "script.py"
-    source_file.write_text(f"""
+    source_file.write_text(
+        f"""
 import sys
 from pypdf import PdfReader
 
@@ -275,7 +277,7 @@ def test_image_without_pillow(tmp_path):
 
 
 sys.modules["PIL"] = None
-reader = PdfReader("{pdf_path.resolve()}", strict=True)
+reader = PdfReader("{pdf_path_str}", strict=True)
 
 for page in reader.pages:
     with pytest.raises(ImportError) as exc:
@@ -284,13 +286,20 @@ def test_image_without_pillow(tmp_path):
         "pillow is required to do image extraction. "
         "It can be installed via 'pip install pypdf[image]'"
     ), exc.value.args[0]
-""")
+"""
+    )
     result = subprocess.run(  # noqa: UP022
-        [shutil.which("python"), source_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE  # noqa: S603
+        [shutil.which("python"), source_file],  # noqa: S603
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
     )
     assert result.returncode == 0
     assert result.stdout == b""
-    assert result.stderr == b"Superfluous whitespace found in object header b'4' b'0'\n"
+    assert (
+        result.stderr.replace(b"\r", b"")
+        == b"Superfluous whitespace found in object header b'4' b'0'\n"
+    )
+
 
 @pytest.mark.enable_socket()
 def test_issue_1737():

From 1abcd0bd7828e888f33037d0f8aaf40caf54d8c0 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Wed, 1 Nov 2023 11:32:20 +0100
Subject: [PATCH 11/13] progressing

---
 pypdf/_reader.py                  |  59 +++++------
 pypdf/generic/__init__.py         |   4 +
 pypdf/generic/_data_structures.py | 161 ++++++++++++++++++++++++++++++
 3 files changed, 196 insertions(+), 28 deletions(-)

diff --git a/pypdf/_reader.py b/pypdf/_reader.py
index 739a262fa..a90e98afe 100644
--- a/pypdf/_reader.py
+++ b/pypdf/_reader.py
@@ -86,6 +86,7 @@
 )
 from .generic import (
     ArrayObject,
+    AttachmentBytes,
     BooleanObject,
     ContentStream,
     DecodedStreamObject,
@@ -2232,39 +2233,41 @@ def _get_embedded_files_root(self) -> Optional[NameTree]:
         return NameTree(efo)
 
     @property
-    def embedded_files(self) -> Optional[Mapping[str, List[PdfObject]]]:
+    def attachments_names(self) -> List[str]:
+        """
+        Returns:
+            List of names
+        """
         ef = self._get_embedded_files_root()
-        if ef:
-            return ef.list_items()
-        else:
-            return None
+        if ef is None:
+            return []
+        return ef.list_keys()
 
     @property
-    def attachments(self) -> Mapping[str, List[Union[bytes, Dict[str, bytes]]]]:
+    def attachments(self) -> Mapping[str, AttachmentBytes]:
+        """
+        extracts the /EF entries as bytes from the embedded files
+        Returns:
+            Dictionary with the filenames as keys and the file content as bytes,
+            extra data cah be accessed with Attachmentbytes extra properties(.name,
+            .list_rf_names(), .get_embeddedfile(), .all_files)
+
+        Note:
+            If you want to access /RF
+        """
         ef = self._get_embedded_files_root()
-        if ef:
-            d: Dict[str, List[Union[bytes, Dict[str, bytes]]]] = {}
-            for k, v in ef.list_items().items():
-                if isinstance(v, list):
-                    if k not in d:
-                        d[k] = []
-                    for e in v:
-                        e = cast(DictionaryObject, e.get_object())
-                        if "/EF" in e:
-                            d[k].append(e["/EF"]["/F"].get_data())  # type: ignore
-                        elif "/RF" in e:
-                            r = cast(
-                                ArrayObject, cast(DictionaryObject, e["/RF"])["/F"]
-                            )
-                            di: Dict[str, bytes] = {}
-                            i = 0
-                            while i < len(r):
-                                di[cast(str, r[i])] = r[i + 1].get_object().get_data()
-                                i += 2
-                            d[k].append(di)
-            return d
-        else:
+        if ef is None:
             return {}
+        d: Dict[str, AttachmentBytes] = {}
+        for k, v in ef.list_items().items():
+            if len(v) > 1:
+                logger_warning(
+                    "Unexpected amout of entries in attachments, please report"
+                    "and share the file for analysis with pypdf dev team",
+                    __name__,
+                )
+            d[k] = AttachmentBytes(cast(DictionaryObject, v[0].get_object()))
+        return d
 
     def _list_attachments(self) -> List[str]:
         """
diff --git a/pypdf/generic/__init__.py b/pypdf/generic/__init__.py
index 9288bf050..7bbf362fe 100644
--- a/pypdf/generic/__init__.py
+++ b/pypdf/generic/__init__.py
@@ -46,7 +46,9 @@
     encode_pdfdocencoding,
 )
 from ._data_structures import (
+    PREFERED_ATTACHMENT,
     ArrayObject,
+    AttachmentBytes,
     ContentStream,
     DecodedStreamObject,
     Destination,
@@ -437,6 +439,7 @@ def link(
     "PAGE_FIT",
     # Data structures
     "ArrayObject",
+    "AttachmentBytes",
     "DictionaryObject",
     "TreeObject",
     "StreamObject",
@@ -447,6 +450,7 @@ def link(
     "Field",
     "Destination",
     "NameTree",
+    "PREFERED_ATTACHMENT",
     "get_from_file_specification",
     "ViewerPreferences",
     # --- More specific stuff
diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 9cdf6e2d9..70818ea40 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -1452,6 +1452,163 @@ def additionalActions(self) -> Optional[DictionaryObject]:  # deprecated
         return self.additional_actions
 
 
+class AttachmentBytes(bytes):
+    """Extension of bytes class, adding File Spefication dedicated properties"""
+
+    source_object: Optional[IndirectObject] = None
+    """
+    Pointer to the File Specification entry associated ;
+        None, if created from a bytes or StreamObject
+    """
+    within_page: Optional[IndirectObject] = None
+    """
+    Page where the File Spefication is referenced, else None
+    This is relevant only for file attachement annotations
+    note : this property should be initialized manually out of the constructor
+    """
+
+    def __new__(
+        cls,
+        src: Optional[
+            Union[bytes, IndirectObject, StreamObject, DictionaryObject]
+        ] = None,
+    ) -> "AttachmentBytes":
+        """
+        Object Constructor.
+
+        Args:
+            src [DictionaryObject] : FileSpecification Object to populate the new object
+            src [bytes/StreamObject] : bytes/StreamObject(EmbeddedFile) to extract the stream
+                to initialize (partially the object)
+            src [IndirectObject] : Pointer to the DictionaryObject/StreamObject for init
+            src [None] : similar to src = b""
+        """
+        inp: Optional[IndirectObject] = None
+        obj: Any = src
+        v: Union[str, bytes]
+        if isinstance(obj, IndirectObject):
+            obj = obj.get_object()
+        if isinstance(obj, bytes):
+            v = obj
+        elif isinstance(obj, StreamObject):
+            v = obj.get_data()
+        elif isinstance(obj, DictionaryObject) and "/EF" in obj:
+            inp = obj.indirect_reference
+            o = cast(DictionaryObject, obj["/EF"])
+            o = cast(StreamObject, get_from_file_specification(o).get_object())
+            v = o.get_data()
+        else:
+            v = b""
+        if isinstance(v, str):
+            v = v.encode()
+        out = bytes.__new__(cls, v)
+        if inp is None:
+            out.source_object = None
+        else:
+            out.source_object = inp.indirect_reference
+        out.within_page = None  # has to be set by program
+        return out
+
+    @property
+    def name(self) -> Optional[str]:
+        """Returns the (best) name from the File Specification Object else None"""
+        o: Any = self.source_object
+        if o is None:
+            return None
+        o = cast(DictionaryObject, o.get_object())
+        return cast(str, get_from_file_specification(o))
+
+    def list_rf_names(self) -> List[str]:
+        """
+        Returns:
+            List of filenames store in /RF fields;
+            Empty list if no /RF field exists
+
+        Note:
+            does not contains "" entry (for EF)
+        """
+        o: Any = self.source_object
+        if o is None:
+            return []
+        o = cast(DictionaryObject, o.get_object())
+        if "/RF" in o:
+            o = cast(DictionaryObject, o["/RF"])
+            o = cast(DictionaryObject, get_from_file_specification(o))
+            try:
+                lst = [o[i] for i in range(0, len(o), 2)]
+                return lst
+            except ValueError:
+                return []
+        else:
+            return []
+
+    def get_embeddedfile(self, subfile: str = "") -> Optional[StreamObject]:
+        """
+        Returns the EmbeddedFile(Stream Object) containing the data bytes
+        Args:
+            subfile: filename of the EmbeddedFile to be returned;
+                     "" returns the EmbeddedFile from the /EF field
+        Returns:
+            StreamObject
+
+        Note:
+            o == o.get_embeddedfile("").get_data()
+        """
+        o: Any = self.source_object
+        if o is None:
+            return None
+        o = cast(DictionaryObject, o.get_object())
+        if subfile == "":
+            o = cast(DictionaryObject, o["/EF"])
+            return cast(StreamObject, get_from_file_specification(o).get_object())
+        elif "/RF" in o:
+            o = cast(DictionaryObject, o["/RF"])
+            o = cast(DictionaryObject, get_from_file_specification(o))
+            try:
+                i = o.index(subfile)
+                return cast(StreamObject, o[i + 1].get_object())
+            except ValueError:
+                return None
+        else:
+            return None
+
+    @property
+    def all_files(self) -> Dict[str, bytes]:
+        """
+        Returns:
+            a dictionary filename/data bytes;
+            {} if the object is not assocatied with a File Spefication.
+
+        Note:
+            the results contains also the /EF stored behin "" key
+        """
+        o: Any = self.source_object
+        if o is None:
+            return {}
+        o = cast(DictionaryObject, o.get_object())
+        out: Dict[str, bytes] = {}
+        o = cast(DictionaryObject, o["/EF"])
+        v = cast(StreamObject, get_from_file_specification(o)).get_data()
+        if isinstance(v, str):
+            v = v.encode()
+        out[""] = v
+        if "/RF" in o:
+            o = cast(DictionaryObject, o["/RF"])
+            a = cast(ArrayObject, get_from_file_specification(o))
+            try:
+                for i in range(0, len(a), 2):
+                    v = cast(StreamObject, a[i + 1].get_object()).get_data()
+                    if isinstance(v, str):
+                        v = v.encode()
+                    out[a[i]] = v
+                return out
+            except ValueError as exc:
+                logger_warning(exc.__repr__(), __name__)
+                return out
+        else:
+            return out
+
+
 class NameTree(DictionaryObject):
     """
     Name Tree Structure
@@ -1675,10 +1832,14 @@ def _add_in(
         return o.indirect_reference if o is not None else None
 
 
+PREFERED_ATTACHMENT = "/DOS"
+
+
 def get_from_file_specification(_a: DictionaryObject) -> PdfObject:
     return (
         _a.get("/UF")
         or _a.get("/F")
+        or _a.get(PREFERED_ATTACHMENT)
         or _a.get("/DOS")
         or _a.get("/Unix")
         or _a.get("/Mac")

From a0ee1a417717f923d9972bbe7f271d892c22a175 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Wed, 1 Nov 2023 23:57:17 +0100
Subject: [PATCH 12/13] fix but still some mypy

---
 pypdf/_reader.py                  | 105 ++----------------------------
 pypdf/_writer.py                  |  62 ++++++------------
 pypdf/generic/__init__.py         |   2 +
 pypdf/generic/_data_structures.py |  78 ++++++++++++++++------
 tests/test_reader.py              |   3 +-
 tests/test_writer.py              |  52 +++++++--------
 6 files changed, 110 insertions(+), 192 deletions(-)

diff --git a/pypdf/_reader.py b/pypdf/_reader.py
index a90e98afe..eb3f0c35f 100644
--- a/pypdf/_reader.py
+++ b/pypdf/_reader.py
@@ -40,7 +40,6 @@
     Dict,
     Iterable,
     List,
-    Mapping,
     Optional,
     Tuple,
     Union,
@@ -86,7 +85,7 @@
 )
 from .generic import (
     ArrayObject,
-    AttachmentBytes,
+    AttachmentBytesDictionary,
     BooleanObject,
     ContentStream,
     DecodedStreamObject,
@@ -102,11 +101,9 @@
     NullObject,
     NumberObject,
     PdfObject,
-    StreamObject,
     TextStringObject,
     TreeObject,
     ViewerPreferences,
-    get_from_file_specification,
     read_object,
 )
 from .types import OutlineType, PagemodeType
@@ -2238,13 +2235,10 @@ def attachments_names(self) -> List[str]:
         Returns:
             List of names
         """
-        ef = self._get_embedded_files_root()
-        if ef is None:
-            return []
-        return ef.list_keys()
+        return self.attachments.keys()
 
     @property
-    def attachments(self) -> Mapping[str, AttachmentBytes]:
+    def attachments(self) -> AttachmentBytesDictionary:
         """
         extracts the /EF entries as bytes from the embedded files
         Returns:
@@ -2255,98 +2249,7 @@ def attachments(self) -> Mapping[str, AttachmentBytes]:
         Note:
             If you want to access /RF
         """
-        ef = self._get_embedded_files_root()
-        if ef is None:
-            return {}
-        d: Dict[str, AttachmentBytes] = {}
-        for k, v in ef.list_items().items():
-            if len(v) > 1:
-                logger_warning(
-                    "Unexpected amout of entries in attachments, please report"
-                    "and share the file for analysis with pypdf dev team",
-                    __name__,
-                )
-            d[k] = AttachmentBytes(cast(DictionaryObject, v[0].get_object()))
-        return d
-
-    def _list_attachments(self) -> List[str]:
-        """
-        Retrieves the list of filenames of file attachments.
-
-        Returns:
-            list of filenames
-        """
-        ef = self._get_embedded_files_root()
-        if ef:
-            lst = ef.list_keys()
-        else:
-            lst = []
-        """
-        for ip, p in enumerate(self.pages):
-            for a in [_a.get_object()
-                      for _a in p.get("/Annots",[])]:
-                if _a.get_object().get("/Subtype","") != "/FileAttachements":
-                    continue
-                lst.append(f"$page_{ip}.{get_name_from_file_specification(_a)}")
-        """
-        return lst
-
-    def _get_attachment_list(self, name: str) -> List[Union[bytes, Dict[str, bytes]]]:
-        out = self._get_attachments(name)[name]
-        if isinstance(out, list):
-            return out
-        return [out]
-
-    def _get_attachments(
-        self, filename: Optional[str] = None
-    ) -> Dict[str, List[Union[bytes, Dict[str, bytes]]]]:
-        """
-        Retrieves all or selected file attachments of the PDF as a dictionary of file names
-        and the file data as a bytestring.
-
-        Args:
-            filename: If filename is None, then a dictionary of all attachments
-                will be returned, where the key is the filename and the value
-                is the content. Otherwise, a dictionary with just a single key
-                - the filename - and its content will be returned.
-
-        Returns:
-            dictionary of filename -> Union[bytestring or List[ByteString]]
-            if the filename exists multiple times a List of the different version will be provided
-        """
-        ef = self._get_embedded_files_root()
-        if ef is None:
-            return {}
-        if filename is None:
-            return {k: v if len(v) > 1 else v[0] for k, v in self.attachments.items()}  # type: ignore
-        else:
-            lst = ef.list_get(filename)
-            if lst is None:
-                return {}
-            lst = cast(DictionaryObject, lst.get_object())
-            efo = cast(DictionaryObject, lst["/EF"].get_object())
-            rst = cast(
-                StreamObject,
-                get_from_file_specification(efo).get_object(),
-            ).get_data()
-            if isinstance(rst, str):
-                rst = rst.encode()
-            if "/RF" not in lst:
-                return {filename: [rst]}
-            else:
-                rst2 = {"": rst}  # /EF will be returned by empty key
-                lst = cast(
-                    ArrayObject,
-                    get_from_file_specification(
-                        cast(DictionaryObject, lst["/RF"].get_object())
-                    ),
-                )
-                for i in range(0, len(lst), 2):
-                    t = cast(StreamObject, lst[i + 1].get_object()).get_data()
-                    if isinstance(t, str):
-                        t = t.encode()
-                    rst2[lst[i]] = t
-                return {filename: [rst2]}
+        return AttachmentBytesDictionary(self._get_embedded_files_root())
 
 
 class PdfFileReader(PdfReader):  # deprecated
diff --git a/pypdf/_writer.py b/pypdf/_writer.py
index e36b6867d..a75d01bbb 100644
--- a/pypdf/_writer.py
+++ b/pypdf/_writer.py
@@ -46,7 +46,6 @@
     Dict,
     Iterable,
     List,
-    Mapping,
     Optional,
     Pattern,
     Tuple,
@@ -96,6 +95,7 @@
 from .generic import (
     PAGE_FIT,
     ArrayObject,
+    AttachmentBytesDictionary,
     BooleanObject,
     ByteStringObject,
     ContentStream,
@@ -740,48 +740,26 @@ def _create_attachment_root(self) -> NameTree:
         return node
 
     @property
-    def embedded_files(self) -> Optional[Mapping[str, List[PdfObject]]]:
-        ef = self._get_embedded_files_root()
-        if ef:
-            return ef.list_items()
-        else:
-            return None
-
-    def _list_attachments(self) -> List[str]:
-        ef = self._get_embedded_files_root()
-        if ef:
-            return ef.list_keys()
-        else:
-            return []
+    def attachments_names(self) -> List[str]:
+        """
+        Returns:
+            List of names
+        """
+        return self.attachments.keys()
 
     @property
-    def attachments(self) -> Mapping[str, List[Union[bytes, Dict[str, bytes]]]]:
-        ef = self._get_embedded_files_root()
-        if ef:
-            d: Dict[str, List[Union[bytes, Dict[str, bytes]]]] = {}
-            for k, v in ef.list_items().items():
-                if isinstance(v, list):
-                    if k not in d:
-                        d[k] = []
-                    for e in v:
-                        e = cast(DictionaryObject, e.get_object())
-                        if "/EF" in e:
-                            d[k].append(e["/EF"]["/F"].get_data())  # type: ignore
-                        elif "/RF" in e:
-                            r = cast(
-                                ArrayObject, cast(DictionaryObject, e["/RF"])["/F"]
-                            )
-                            di = {}
-                            i = 0
-                            while i < len(r):
-                                di[cast(str, r[i])] = cast(
-                                    bytes, r[i + 1].get_object().get_data()
-                                )
-                                i += 2
-                            d[k].append(di)
-            return d
-        else:
-            return {}
+    def attachments(self) -> AttachmentBytesDictionary:
+        """
+        extracts the /EF entries as bytes from the embedded files
+        Returns:
+            Dictionary with the filenames as keys and the file content as bytes,
+            extra data cah be accessed with Attachmentbytes extra properties(.name,
+            .list_rf_names(), .get_embeddedfile(), .all_files)
+
+        Note:
+            If you want to access /RF
+        """
+        return AttachmentBytesDictionary(self._get_embedded_files_root())
 
     def add_attachment(
         self,
@@ -808,7 +786,7 @@ def add_attachment(
         Returns:
             The filespec DictionaryObject
         """
-        if not overwrite and filename in self._list_attachments():
+        if not overwrite and filename in self.attachments_names:
             return None
         if fname is None:
             st = filename.replace("/", "\\/").replace("\\\\/", "\\/")
diff --git a/pypdf/generic/__init__.py b/pypdf/generic/__init__.py
index 7bbf362fe..f2eadf079 100644
--- a/pypdf/generic/__init__.py
+++ b/pypdf/generic/__init__.py
@@ -49,6 +49,7 @@
     PREFERED_ATTACHMENT,
     ArrayObject,
     AttachmentBytes,
+    AttachmentBytesDictionary,
     ContentStream,
     DecodedStreamObject,
     Destination,
@@ -440,6 +441,7 @@ def link(
     # Data structures
     "ArrayObject",
     "AttachmentBytes",
+    "AttachmentBytesDictionary",
     "DictionaryObject",
     "TreeObject",
     "StreamObject",
diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index cb48bb93d..c3c7328ea 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -36,12 +36,12 @@
     Any,
     Callable,
     Dict,
+    Generator,
     Iterable,
     List,
     Mapping,
     Optional,
     Sequence,
-    Set,
     Tuple,
     Union,
     cast,
@@ -189,7 +189,6 @@ def clone(
         except Exception:
             pass
 
-        visited: Set[Tuple[int, int]] = set()  # (idnum, generation)
         d__ = cast(
             "DictionaryObject",
             self._reference_clone(self.__class__(), pdf_dest, force_duplicate),
@@ -197,7 +196,7 @@ def clone(
         if ignore_fields is None:
             ignore_fields = []
         if len(d__.keys()) == 0:
-            d__._clone(self, pdf_dest, force_duplicate, ignore_fields, visited)
+            d__._clone(self, pdf_dest, force_duplicate, ignore_fields)
         return d__
 
     def _clone(
@@ -206,7 +205,6 @@ def _clone(
         pdf_dest: PdfWriterProtocol,
         force_duplicate: bool,
         ignore_fields: Optional[Sequence[Union[str, int]]],
-        visited: Set[Tuple[int, int]],  # (idnum, generation)
     ) -> None:
         """
         Update the object from src.
@@ -274,14 +272,6 @@ def _clone(
                                     cur_obj.__class__(), pdf_dest, force_duplicate
                                 ),
                             )
-                            # check to see if we've previously processed our item
-                            if clon.indirect_reference is not None:
-                                idnum = clon.indirect_reference.idnum
-                                generation = clon.indirect_reference.generation
-                                if (idnum, generation) in visited:
-                                    cur_obj = None
-                                    break
-                                visited.add((idnum, generation))
                             objs.append((cur_obj, clon))
                             assert prev_obj is not None
                             prev_obj[NameObject(k)] = clon.indirect_reference
@@ -294,9 +284,7 @@ def _clone(
                             except Exception:
                                 cur_obj = None
                         for s, c in objs:
-                            c._clone(
-                                s, pdf_dest, force_duplicate, ignore_fields, visited
-                            )
+                            c._clone(s, pdf_dest, force_duplicate, ignore_fields)
 
         for k, v in src.items():
             if k not in ignore_fields:
@@ -812,7 +800,6 @@ def _clone(
         pdf_dest: PdfWriterProtocol,
         force_duplicate: bool,
         ignore_fields: Optional[Sequence[Union[str, int]]],
-        visited: Set[Tuple[int, int]],
     ) -> None:
         """
         Update the object from src.
@@ -835,7 +822,7 @@ def _clone(
                 )
         except Exception:
             pass
-        super()._clone(src, pdf_dest, force_duplicate, ignore_fields, visited)
+        super()._clone(src, pdf_dest, force_duplicate, ignore_fields)
 
     def get_data(self) -> Union[bytes, str]:
         return self._data
@@ -1063,7 +1050,6 @@ def clone(
         except Exception:
             pass
 
-        visited: Set[Tuple[int, int]] = set()
         d__ = cast(
             "ContentStream",
             self._reference_clone(
@@ -1072,7 +1058,7 @@ def clone(
         )
         if ignore_fields is None:
             ignore_fields = []
-        d__._clone(self, pdf_dest, force_duplicate, ignore_fields, visited)
+        d__._clone(self, pdf_dest, force_duplicate, ignore_fields)
         return d__
 
     def _clone(
@@ -1081,7 +1067,6 @@ def _clone(
         pdf_dest: PdfWriterProtocol,
         force_duplicate: bool,
         ignore_fields: Optional[Sequence[Union[str, int]]],
-        visited: Set[Tuple[int, int]],
     ) -> None:
         """
         Update the object from src.
@@ -1098,7 +1083,7 @@ def _clone(
         self._operations = list(src_cs._operations)
         self.forced_encoding = src_cs.forced_encoding
         # no need to call DictionaryObjection or anything
-        # like super(DictionaryObject,self)._clone(src, pdf_dest, force_duplicate, ignore_fields, visited)
+        # like super(DictionaryObject,self)._clone(src, pdf_dest, force_duplicate, ignore_fields)
 
     def _parse_content_stream(self, stream: StreamType) -> None:
         # 7.8.2 Content Streams
@@ -1863,6 +1848,57 @@ def get_from_file_specification(_a: DictionaryObject) -> PdfObject:
     )
 
 
+class AttachmentBytesDictionary(dict):
+    """
+    Dict[str, AttachmentBytes]
+    Ease access  to Dictionary of Object
+    """
+
+    root: Optional[NameTree]
+    names: List[str]
+
+    def __init__(
+        self, root: Optional[Union[NameTree, DictionaryObject, IndirectObject]]
+    ):
+        dict.__init__(self)
+        if isinstance(root, IndirectObject):
+            root = cast(DictionaryObject, root.get_object())
+        if root is not None:
+            self.root = (
+                root if isinstance(root, NameTree) else NameTree(root)
+            )
+            self.names = list(self.root.list_keys())
+        else:
+            self.root = None
+            self.names = []
+
+    def keys(self) -> List[str]:
+        return self.names
+
+    def items(self) -> Generator[str, AttachmentBytes]:
+        if self.root is None:
+            return []
+        else:
+            for k, v in self.root.list_items().items():
+                if len(v) > 1:
+                    logger_warning(
+                        "Unexpected amout of entries in attachments,"
+                        "please report"
+                        "and share the file for analysis with pypdf dev team",
+                        __name__,
+                    )
+                yield (k, AttachmentBytes(cast(DictionaryObject, v[0].get_object())))
+
+    def __getitem__(self, k: str) -> AttachmentBytes:
+        if k not in self.names:
+            raise KeyError("KeyError: k")
+        v = self.root.list_get(k)
+        return AttachmentBytes(cast(DictionaryObject, v.get_object()))
+
+    def __repr__(self) -> str:
+        return "{ " + ", ".join(["'" + x + "': ..." for x in self.names]) + "}"
+
+
 class Destination(TreeObject):
     """
     A class representing a destination within a PDF file.
diff --git a/tests/test_reader.py b/tests/test_reader.py
index 71d137236..f43f11df6 100644
--- a/tests/test_reader.py
+++ b/tests/test_reader.py
@@ -1471,4 +1471,5 @@ def test_xyz_with_missing_param():
 def test_embedded_files_no_ef():
     reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
     reader.trailer["/Root"][NameObject("/Names")] = DictionaryObject()
-    assert reader.embedded_files is None
+    assert reader.attachments_names == []
+    assert reader.attachments == {}
diff --git a/tests/test_writer.py b/tests/test_writer.py
index 7b3d98643..1a8a545f0 100644
--- a/tests/test_writer.py
+++ b/tests/test_writer.py
@@ -1297,8 +1297,8 @@ def test_attachments():
     reader = PdfReader(b)
     b = None
     assert reader.attachments == {}
-    assert reader._list_attachments() == []
-    assert reader._get_attachments() == {}
+    # assert reader._list_attachments() == []
+    # assert reader._get_attachments() == {}
     to_add = [
         ("foobar.txt", b"foobarcontent"),
         ("foobar2.txt", b"foobarcontent2"),
@@ -1313,30 +1313,30 @@ def test_attachments():
     reader = PdfReader(b)
     b = None
     assert sorted(reader.attachments.keys()) == sorted({name for name, _ in to_add})
-    assert reader.attachments == {
-        "foobar.txt": [b"foobarcontent"],
-        "foobar2.txt": [b"2nd_foobarcontent"],
+    assert dict(reader.attachments.items()) == {
+        "foobar.txt": b"foobarcontent",
+        "foobar2.txt": b"2nd_foobarcontent",
     }
     writer.add_attachment("foobar2.txt", b"overwrite_ignored", overwrite=False)
-    assert reader.attachments == {
-        "foobar.txt": [b"foobarcontent"],
-        "foobar2.txt": [b"2nd_foobarcontent"],
+    assert dict(reader.attachments.items()) == {
+        "foobar.txt": b"foobarcontent",
+        "foobar2.txt": b"2nd_foobarcontent",
     }
-    assert writer.attachments == {
-        "foobar.txt": [b"foobarcontent"],
-        "foobar2.txt": [b"2nd_foobarcontent"],
+    assert dict(writer.attachments.items()) == {
+        "foobar.txt": b"foobarcontent",
+        "foobar2.txt": b"2nd_foobarcontent",
     }
-    _l = list({name for name, _ in to_add})
-    _l.sort()
-    assert reader._list_attachments() == _l
-    assert writer._list_attachments() == _l
+    # _l = list({name for name, _ in to_add})
+    # _l.sort()
+    # assert reader._list_attachments() == _l
+    # assert writer._list_attachments() == _l
 
     # We've added the same key twice - hence only 2 and not 3:
-    att = reader._get_attachments()
-    assert len(att) == 2
+    # att = reader._get_attachments()
+    # assert len(att) == 2
 
     # The content for foobar.txt is clear and just a single value:
-    assert att["foobar.txt"] == b"foobarcontent"
+    # assert att["foobar.txt"] == b"foobarcontent"
 
     # Not applicable for writer
     # att = writer._get_attachments()
@@ -1344,9 +1344,9 @@ def test_attachments():
     # assert att["foobar.txt"] == b"foobarcontent"
 
     # The content for foobar2.txt is a list!
-    att = reader._get_attachments("foobar2.txt")
-    assert len(att) == 1
-    assert att["foobar2.txt"] == [b"2nd_foobarcontent"]
+    # att = reader._get_attachments("foobar2.txt")
+    # assert len(att) == 1
+    # assert att["foobar2.txt"] == [b"2nd_foobarcontent"]
 
     # The content for foobar2.txt is a list!
     # att = writer._get_attachments("foobar2.txt")
@@ -1354,13 +1354,11 @@ def test_attachments():
     # assert att["foobar2.txt"] == [b"2nd_foobarcontent"]
 
     # Let's do both cases with the public interface:
-    assert reader.attachments["foobar.txt"][0] == b"foobarcontent"
-    assert reader.attachments["foobar2.txt"][0] == b"2nd_foobarcontent"
-    assert len(reader.attachments["foobar2.txt"]) == 1
+    assert reader.attachments["foobar.txt"] == b"foobarcontent"
+    assert reader.attachments["foobar2.txt"] == b"2nd_foobarcontent"
 
-    assert writer.attachments["foobar.txt"][0] == b"foobarcontent"
-    assert writer.attachments["foobar2.txt"][0] == b"2nd_foobarcontent"
-    assert len(writer.attachments["foobar2.txt"]) == 1
+    assert writer.attachments["foobar.txt"] == b"foobarcontent"
+    assert writer.attachments["foobar2.txt"] == b"2nd_foobarcontent"
 
 
 @pytest.mark.enable_socket()

From ab963313e2d54d5b757f1f4c6e07f12eea5311bc Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Thu, 2 Nov 2023 14:04:57 +0100
Subject: [PATCH 13/13] fix include mypy

---
 pypdf/generic/_data_structures.py | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index c3c7328ea..9261eafc3 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -36,8 +36,8 @@
     Any,
     Callable,
     Dict,
-    Generator,
     Iterable,
+    Iterator,
     List,
     Mapping,
     Optional,
@@ -1848,7 +1848,7 @@ def get_from_file_specification(_a: DictionaryObject) -> PdfObject:
     )
 
 
-class AttachmentBytesDictionary(dict):
+class AttachmentBytesDictionary(Mapping[str, AttachmentBytes]):
     """
     Dict[str, AttachmentBytes]
     Ease access  to Dictionary of Object
@@ -1858,24 +1858,28 @@ class AttachmentBytesDictionary(dict):
     names: List[str]
 
     def __init__(
-        self, root: Optional[Union[NameTree, DictionaryObject, IndirectObject]]
-    ):
-        dict.__init__(self)
+        self, root: Optional[Union[NameTree, DictionaryObject]] = None
+    ) -> None:
+        # super().__init__(self)
         if isinstance(root, IndirectObject):
             root = cast(DictionaryObject, root.get_object())
         if root is not None:
-            self.root = (
-                root if isinstance(root, NameTree) else NameTree(root)
-            )
+            self.root = root if isinstance(root, NameTree) else NameTree(root)
             self.names = list(self.root.list_keys())
         else:
             self.root = None
             self.names = []
 
-    def keys(self) -> List[str]:
+    def keys(self) -> List[str]:  # type: ignore[override]
         return self.names
 
-    def items(self) -> Generator[str, AttachmentBytes]:
+    def __len__(self) -> int:
+        return len(self.names)
+
+    def __iter__(self) -> Iterator[str]:  # type: ignore
+        yield from self.names
+
+    def items(self) -> Iterable[Tuple[str, AttachmentBytes]]:  # type: ignore[override]
         if self.root is None:
             return []
         else:
@@ -1891,8 +1895,12 @@ def items(self) -> Generator[str, AttachmentBytes]:
 
     def __getitem__(self, k: str) -> AttachmentBytes:
         if k not in self.names:
-            raise KeyError("KeyError: k")
+            raise KeyError(f"KeyError: {k}")
+        if self.root is None:
+            raise ValueError("Empty Object")
         v = self.root.list_get(k)
+        if v is None:
+            raise KeyError(f"KeyError: {k}")
         return AttachmentBytes(cast(DictionaryObject, v.get_object()))
 
     def __repr__(self) -> str: