|
25 | 25 | # POSSIBILITY OF SUCH DAMAGE.
|
26 | 26 |
|
27 | 27 |
|
28 |
| -# This module contains classes used by _writer.py to track links in |
29 |
| -# pages being added to the writer until the links can be resolved. |
| 28 | +# This module contains code used by _writer.py to track links in pages |
| 29 | +# being added to the writer until the links can be resolved. |
30 | 30 |
|
31 |
| -from typing import TYPE_CHECKING, Union |
| 31 | +from typing import TYPE_CHECKING, List, Optional, Tuple, Union, cast |
32 | 32 |
|
33 |
| -from . import ArrayObject, IndirectObject, TextStringObject |
| 33 | +from . import ArrayObject, DictionaryObject, IndirectObject, PdfObject, TextStringObject |
34 | 34 |
|
35 | 35 | if TYPE_CHECKING:
|
| 36 | + from .._page import PageObject |
36 | 37 | from .._reader import PdfReader
|
37 | 38 | from .._writer import PdfWriter
|
38 | 39 |
|
39 | 40 |
|
40 |
| -class NamedRefLink: |
| 41 | +class NamedReferenceLink: |
41 | 42 | """Named reference link being preserved until we can resolve it correctly."""
|
42 | 43 |
|
43 |
| - def __init__(self, ref: TextStringObject, source_pdf: "PdfReader") -> None: |
44 |
| - """ref: TextStringObject with named reference""" |
45 |
| - self._ref = ref |
| 44 | + def __init__(self, reference: TextStringObject, source_pdf: "PdfReader") -> None: |
| 45 | + """reference: TextStringObject with named reference""" |
| 46 | + self._reference = reference |
46 | 47 | self._source_pdf = source_pdf
|
47 | 48 |
|
48 |
| - def find_referenced_page(self) -> Union[IndirectObject,None]: |
49 |
| - dest = self._source_pdf.named_destinations.get(str(self._ref)) |
| 49 | + def find_referenced_page(self) -> Union[IndirectObject, None]: |
| 50 | + dest = self._source_pdf.named_destinations.get(str(self._reference)) |
50 | 51 | return dest.page if dest else None
|
51 | 52 |
|
52 | 53 | def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
|
53 | 54 | """target_pdf: PdfWriter which the new link went into"""
|
54 | 55 | # point named destination in new PDF to the new page
|
55 |
| - if str(self._ref) not in target_pdf.named_destinations: |
56 |
| - target_pdf.add_named_destination(str(self._ref), new_page.page_number) |
| 56 | + if str(self._reference) not in target_pdf.named_destinations: |
| 57 | + target_pdf.add_named_destination(str(self._reference), new_page.page_number) |
57 | 58 |
|
58 | 59 |
|
59 |
| -class DirectRefLink: |
| 60 | +class DirectReferenceLink: |
60 | 61 | """Direct reference link being preserved until we can resolve it correctly."""
|
61 | 62 |
|
62 |
| - def __init__(self, ref: ArrayObject) -> None: |
63 |
| - """ref: an ArrayObject whose first element is the Page indir obj""" |
64 |
| - self._ref = ref |
| 63 | + def __init__(self, reference: ArrayObject) -> None: |
| 64 | + """reference: an ArrayObject whose first element is the Page indir obj""" |
| 65 | + self._reference = reference |
65 | 66 |
|
66 | 67 | def find_referenced_page(self) -> IndirectObject:
|
67 |
| - return self._ref[0] |
| 68 | + return self._reference[0] |
68 | 69 |
|
69 | 70 | def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
|
70 | 71 | """target_pdf: PdfWriter which the new link went into"""
|
71 |
| - self._ref[0] = new_page |
| 72 | + self._reference[0] = new_page |
72 | 73 |
|
73 | 74 |
|
74 |
| -RefLink = Union[NamedRefLink,DirectRefLink] |
| 75 | +ReferenceLink = Union[NamedReferenceLink, DirectReferenceLink] |
| 76 | + |
| 77 | + |
| 78 | +def extract_links(new_page: "PageObject", old_page: "PageObject") -> List[Tuple[ReferenceLink, ReferenceLink]]: |
| 79 | + """Extracts links from two pages on the assumption that the two pages are |
| 80 | + the same. Produces one list of (new link, old link) tuples. |
| 81 | + """ |
| 82 | + new_links = [_build_link(link, new_page) for link in new_page.get("/Annots", [])] |
| 83 | + old_links = [_build_link(link, old_page) for link in old_page.get("/Annots", [])] |
| 84 | + |
| 85 | + return [(new_link, old_link) for (new_link, old_link) |
| 86 | + in zip(new_links, old_links) |
| 87 | + if new_link and old_link] |
| 88 | + |
| 89 | + |
| 90 | +def _build_link(indir_obj: IndirectObject, page: "PageObject") -> Optional[ReferenceLink]: |
| 91 | + src = cast("PdfReader", page.pdf) |
| 92 | + link = cast(DictionaryObject, indir_obj.get_object()) |
| 93 | + if link.get("/Subtype") != "/Link": |
| 94 | + return None |
| 95 | + |
| 96 | + if "/A" in link: |
| 97 | + action = cast(DictionaryObject, link["/A"]) |
| 98 | + if action.get("/S") != "/GoTo": |
| 99 | + return None |
| 100 | + |
| 101 | + return _create_link(action["/D"], src) |
| 102 | + |
| 103 | + if "/Dest" in link: |
| 104 | + return _create_link(link["/Dest"], src) |
| 105 | + |
| 106 | + return None # nothing we need to do |
| 107 | + |
| 108 | + |
| 109 | +def _create_link(ref: PdfObject, src: "PdfReader")-> Optional[ReferenceLink]: |
| 110 | + if isinstance(ref, TextStringObject): |
| 111 | + return NamedReferenceLink(ref, src) |
| 112 | + if isinstance(ref, ArrayObject): |
| 113 | + return DirectReferenceLink(ref) |
| 114 | + return None |
0 commit comments