Skip to content

Commit e8e2ab6

Browse files
ENH: Add PDF/A XMP metadata support (#3314)
Closes #3313. --------- Signed-off-by: Arya Nair <aryaajitnair@gmail.com> Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com>
1 parent 67d1ba1 commit e8e2ab6

File tree

2 files changed

+31
-0
lines changed

2 files changed

+31
-0
lines changed

pypdf/xmp.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@
5252
# Schemas".
5353
PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
5454

55+
# PDF/A
56+
PDFAID_NAMESPACE = "http://www.aiim.org/pdfa/ns/id/"
57+
5558
iso8601 = re.compile(
5659
"""
5760
(?P<year>[0-9]{4})
@@ -363,6 +366,12 @@ def _get_text(self, element: XmlElement) -> str:
363366
"""An identifier for a specific incarnation of a document, updated each
364367
time a file is saved."""
365368

369+
pdfaid_part = property(_getter_single(PDFAID_NAMESPACE, "part"))
370+
"""The part of the PDF/A standard that the document conforms to (e.g., 1, 2, 3)."""
371+
372+
pdfaid_conformance = property(_getter_single(PDFAID_NAMESPACE, "conformance"))
373+
"""The conformance level within the PDF/A standard (e.g., 'A', 'B', 'U')."""
374+
366375
@property
367376
def custom_properties(self) -> Dict[Any, Any]:
368377
"""

tests/test_xmp.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,3 +272,25 @@ class Tst: # to replace pdf
272272

273273
assert xmp_info is not None
274274
f(xmp_info)
275+
276+
277+
@pytest.mark.samples
278+
def test_pdfa_xmp_metadata_with_values():
279+
"""Test PDF/A XMP metadata extraction from a file with PDF/A metadata."""
280+
reader = PdfReader(SAMPLE_ROOT / "021-pdfa" / "crazyones-pdfa.pdf")
281+
xmp = reader.xmp_metadata
282+
283+
assert xmp is not None
284+
assert xmp.pdfaid_part == "1"
285+
assert xmp.pdfaid_conformance == "B"
286+
287+
288+
@pytest.mark.samples
289+
def test_pdfa_xmp_metadata_without_values():
290+
"""Test PDF/A XMP metadata extraction from a file without PDF/A metadata."""
291+
reader = PdfReader(SAMPLE_ROOT / "020-xmp" / "output_with_metadata_pymupdf.pdf")
292+
xmp = reader.xmp_metadata
293+
294+
assert xmp is not None
295+
assert xmp.pdfaid_part is None
296+
assert xmp.pdfaid_conformance is None

0 commit comments

Comments
 (0)