|
33 | 33 | import enum
|
34 | 34 | import hashlib
|
35 | 35 | import re
|
| 36 | +import time |
36 | 37 | import uuid
|
37 | 38 | import warnings
|
38 | 39 | from io import BytesIO, FileIO, IOBase
|
@@ -136,13 +137,6 @@ class ObjectDeletionFlag(enum.IntFlag):
|
136 | 137 | ALL_ANNOTATIONS = enum.auto()
|
137 | 138 |
|
138 | 139 |
|
139 |
| -def _rolling_checksum(stream: BytesIO, blocksize: int = 65536) -> str: |
140 |
| - hash = hashlib.md5() |
141 |
| - for block in iter(lambda: stream.read(blocksize), b""): |
142 |
| - hash.update(block) |
143 |
| - return hash.hexdigest() |
144 |
| - |
145 |
| - |
146 | 140 | class PdfWriter:
|
147 | 141 | """
|
148 | 142 | Write a PDF file out, given pages produced by another class.
|
@@ -206,6 +200,7 @@ def __init__(
|
206 | 200 |
|
207 | 201 | self._encryption: Optional[Encryption] = None
|
208 | 202 | self._encrypt_entry: Optional[DictionaryObject] = None
|
| 203 | + self._ID: Union[ArrayObject, None] = None |
209 | 204 |
|
210 | 205 | def __enter__(self) -> "PdfWriter":
|
211 | 206 | """Store that writer is initialized by 'with'."""
|
@@ -1128,25 +1123,35 @@ def cloneDocumentFromReader(
|
1128 | 1123 | )
|
1129 | 1124 | self.clone_document_from_reader(reader, after_page_append)
|
1130 | 1125 |
|
1131 |
| - def _compute_document_identifier_from_content(self) -> ByteStringObject: |
1132 |
| - stream = BytesIO() |
1133 |
| - self._write_pdf_structure(stream) |
1134 |
| - stream.seek(0) |
1135 |
| - return ByteStringObject(_rolling_checksum(stream).encode("utf8")) |
| 1126 | + def _compute_document_identifier(self) -> ByteStringObject: |
| 1127 | + md5 = hashlib.md5() |
| 1128 | + md5.update(str(time.time()).encode("utf-8")) |
| 1129 | + md5.update(str(self.fileobj).encode("utf-8")) |
| 1130 | + md5.update(str(len(self._objects)).encode("utf-8")) |
| 1131 | + if hasattr(self, "_info"): |
| 1132 | + for k, v in cast(DictionaryObject, self._info.get_object()).items(): |
| 1133 | + md5.update(f"{k}={v}".encode()) |
| 1134 | + return ByteStringObject(md5.hexdigest().encode("utf-8")) |
1136 | 1135 |
|
1137 | 1136 | def generate_file_identifiers(self) -> None:
|
1138 | 1137 | """
|
1139 | 1138 | Generate an identifier for the PDF that will be written.
|
1140 | 1139 |
|
1141 | 1140 | The only point of this is ensuring uniqueness. Reproducibility is not
|
1142 |
| - required; see 14.4 "File Identifiers". |
1143 |
| - """ |
1144 |
| - if hasattr(self, "_ID") and self._ID and len(self._ID) == 2: |
1145 |
| - ID_1 = self._ID[0] |
| 1141 | + required; |
| 1142 | + When a file is first written, both identifiers shall be set to the same value. |
| 1143 | + If both identifiers match when a file reference is resolved, it is very |
| 1144 | + likely that the correct and unchanged file has been found. If only the first |
| 1145 | + identifier matches, a different version of the correct file has been found. |
| 1146 | + see 14.4 "File Identifiers". |
| 1147 | + """ |
| 1148 | + if self._ID: |
| 1149 | + id1 = self._ID[0] |
| 1150 | + id2 = self._compute_document_identifier() |
1146 | 1151 | else:
|
1147 |
| - ID_1 = self._compute_document_identifier_from_content() |
1148 |
| - ID_2 = self._compute_document_identifier_from_content() |
1149 |
| - self._ID = ArrayObject((ID_1, ID_2)) |
| 1152 | + id1 = self._compute_document_identifier() |
| 1153 | + id2 = ByteStringObject(id1.original_bytes) |
| 1154 | + self._ID = ArrayObject((id1, id2)) |
1150 | 1155 |
|
1151 | 1156 | def encrypt(
|
1152 | 1157 | self,
|
@@ -1230,7 +1235,9 @@ def encrypt(
|
1230 | 1235 | if not use_128bit:
|
1231 | 1236 | alg = EncryptAlgorithm.RC4_40
|
1232 | 1237 | self.generate_file_identifiers()
|
1233 |
| - self._encryption = Encryption.make(alg, permissions_flag, self._ID[0]) |
| 1238 | + self._encryption = Encryption.make( |
| 1239 | + alg, permissions_flag, cast(ArrayObject, self._ID)[0] |
| 1240 | + ) |
1234 | 1241 | # in case call `encrypt` again
|
1235 | 1242 | entry = self._encryption.write_entry(user_password, owner_password)
|
1236 | 1243 | if self._encrypt_entry:
|
@@ -1331,7 +1338,7 @@ def _write_trailer(self, stream: StreamType, xref_location: int) -> None:
|
1331 | 1338 | NameObject(TK.INFO): self._info,
|
1332 | 1339 | }
|
1333 | 1340 | )
|
1334 |
| - if hasattr(self, "_ID"): |
| 1341 | + if self._ID: |
1335 | 1342 | trailer[NameObject(TK.ID)] = self._ID
|
1336 | 1343 | if self._encrypt_entry:
|
1337 | 1344 | trailer[NameObject(TK.ENCRYPT)] = self._encrypt_entry.indirect_reference
|
|
0 commit comments