Skip to content

Commit c2a741e

Browse files
authored
BUG: Process CMYK in deflate images (#1977)
Closes #1954
1 parent 1d16ca5 commit c2a741e

File tree

2 files changed

+33
-5
lines changed

2 files changed

+33
-5
lines changed

pypdf/filters.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,7 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
743743
return bytes(nbuff)
744744

745745
extension = ".png" # mime_type = "image/png"
746+
image_format = "PNG"
746747
lookup: Any
747748
base: Any
748749
hival: Any
@@ -794,10 +795,14 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
794795
elif not isinstance(color_space, NullObject) and color_space[0] == "/ICCBased":
795796
# see Table 66 - Additional Entries Specific to an ICC Profile
796797
# Stream Dictionary
797-
mode = _get_imagemode(color_space, colors, mode)
798-
extension = ".png"
799-
img = Image.frombytes(mode, size, data) # reloaded as mode may have change
800-
image_format = "PNG"
798+
mode2 = _get_imagemode(color_space, colors, mode)
799+
if mode != mode2:
800+
img = Image.frombytes(
801+
mode2, size, data
802+
) # reloaded as mode may have change
803+
if mode == "CMYK":
804+
extension = ".tif"
805+
image_format = "TIFF"
801806
return img, image_format, extension
802807

803808
def _handle_jpx(
@@ -907,7 +912,10 @@ def _handle_jpx(
907912

908913
# CMYK image without decode requires reverting scale (cf p243,2§ last sentence)
909914
decode = x_object_obj.get(
910-
IA.DECODE, ([1.0, 0.0] * 4) if img.mode == "CMYK" else None
915+
IA.DECODE,
916+
([1.0, 0.0] * 4)
917+
if img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE)
918+
else None,
911919
)
912920
if (
913921
isinstance(color_space, ArrayObject)

tests/test_filters.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,7 @@ def test_rgba():
388388
@pytest.mark.enable_socket()
389389
def test_cmyk():
390390
"""Decode cmyk"""
391+
# JPEG compression
391392
try:
392393
from Crypto.Cipher import AES # noqa: F401
393394
except ImportError:
@@ -401,11 +402,30 @@ def test_cmyk():
401402
BytesIO(get_pdf_from_url(url_png, name=name_png))
402403
) # not a pdf but it works
403404
data = reader.pages[1].images[0]
405+
assert data.image.mode == "CMYK"
406+
assert ".jpg" in data.name
404407
diff = ImageChops.difference(data.image, refimg)
405408
d = sqrt(
406409
sum([(a * a + b * b + c * c + d * d) for a, b, c, d in diff.getdata()])
407410
) / (diff.size[0] * diff.size[1])
408411
assert d < 0.01
412+
# deflate
413+
url = "https://github.com/py-pdf/pypdf/files/12078533/cmyk2.pdf"
414+
name = "cmyk_deflate.pdf"
415+
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
416+
url_png = "https://github.com/py-pdf/pypdf/files/12078556/cmyk.tif.txt"
417+
name_png = "cmyk_deflate.tif"
418+
refimg = Image.open(
419+
BytesIO(get_pdf_from_url(url_png, name=name_png))
420+
) # not a pdf but it works
421+
data = reader.pages[0].images[0]
422+
assert data.image.mode == "CMYK"
423+
assert ".tif" in data.name
424+
diff = ImageChops.difference(data.image, refimg)
425+
d = sqrt(
426+
sum([(a * a + b * b + c * c + d * d) for a, b, c, d in diff.getdata()])
427+
) / (diff.size[0] * diff.size[1])
428+
assert d < 0.001 # lossless compression expected
409429

410430

411431
@pytest.mark.enable_socket()

0 commit comments

Comments
 (0)