Skip to content

Commit 85ca63e

Browse files
authored
BUG: Decode Black only/CMYK deviceN images (#1984)
Closes #1979
1 parent c2a741e commit 85ca63e

File tree

3 files changed

+45
-15
lines changed

3 files changed

+45
-15
lines changed

pypdf/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ class ImageAttributes:
224224
BITS_PER_COMPONENT = "/BitsPerComponent" # integer, required
225225
COLOR_SPACE = "/ColorSpace" # name, required
226226
DECODE = "/Decode" # array, optional
227+
INTENT = "/Intent" # string, optional
227228
INTERPOLATE = "/Interpolate" # boolean, optional
228229
IMAGE_MASK = "/ImageMask" # boolean, optional
229230
MASK = "/Mask" # 1-bit image mask stream

pypdf/filters.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,9 @@ def decodeStreamData(stream: Any) -> Union[str, bytes]: # deprecated
647647

648648

649649
def _get_imagemode(
650-
color_space: Union[str, List[Any]], color_components: int, prev_mode: mode_str_type
650+
color_space: Union[str, List[Any], Any],
651+
color_components: int,
652+
prev_mode: mode_str_type,
651653
) -> mode_str_type:
652654
"""Returns the image mode not taking into account mask(transparency)"""
653655
if isinstance(color_space, str):
@@ -663,26 +665,29 @@ def _get_imagemode(
663665
color_components = cast(int, icc_profile["/N"])
664666
color_space = icc_profile.get("/Alternate", "")
665667
elif color_space[0] == "/Indexed":
666-
color_space = color_space[1].get_object()
667-
if isinstance(color_space, list):
668-
color_space = color_space[1].get_object().get("/Alternate", "")
669-
color_components = 1 if "Gray" in color_space else 2
670-
if not (isinstance(color_space, str) and "Gray" in color_space):
671-
color_space = "palette"
668+
color_space = color_space[1]
669+
if isinstance(color_space, IndirectObject):
670+
color_space = color_space.get_object()
671+
mode2 = _get_imagemode(color_space, color_components, prev_mode)
672+
if mode2 in ("RGB", "CMYK"):
673+
mode2 = "P"
674+
return mode2
672675
elif color_space[0] == "/Separation":
673676
color_space = color_space[2]
674677
elif color_space[0] == "/DeviceN":
675-
color_space = color_space[2]
676678
color_components = len(color_space[1])
679+
color_space = color_space[2]
680+
if isinstance(color_space, IndirectObject): # pragma: no cover
681+
color_space = color_space.get_object()
677682

678683
mode_map = {
679-
"1bit": "1", # 0 will be used for 1 bit
684+
"1bit": "1", # pos [0] will be used for 1 bit
685+
"/DeviceGray": "L", # must be in pos [1]
686+
"palette": "P", # must be in pos [2] for color_components align.
687+
"/DeviceRGB": "RGB", # must be in pos [3]
688+
"/DeviceCMYK": "CMYK", # must be in pos [4]
680689
"2bit": "2bits", # 2 bits images
681690
"4bit": "4bits", # 4 bits
682-
"/DeviceGray": "L",
683-
"palette": "P", # reserved for color_components alignment
684-
"/DeviceRGB": "RGB",
685-
"/DeviceCMYK": "CMYK",
686691
}
687692
mode: mode_str_type = (
688693
mode_map.get(color_space) # type: ignore
@@ -913,8 +918,11 @@ def _handle_jpx(
913918
# CMYK image without decode requires reverting scale (cf p243,2§ last sentence)
914919
decode = x_object_obj.get(
915920
IA.DECODE,
916-
([1.0, 0.0] * 4)
917-
if img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE)
921+
([1.0, 0.0] * len(img.getbands()))
922+
if (
923+
(img.mode == "CMYK" or (mode == "CMYK" and img.mode == "L"))
924+
and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE)
925+
)
918926
else None,
919927
)
920928
if (

tests/test_filters.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,3 +485,24 @@ def test_2bits_image():
485485
sum([(a * a + b * b + c * c + d * d) for a, b, c, d in diff.getdata()])
486486
) / (diff.size[0] * diff.size[1])
487487
assert d < 0.01
488+
489+
490+
@pytest.mark.enable_socket()
491+
def test_gray_devicen_cmyk():
492+
"""
493+
Cf #1979
494+
Gray Image in CMYK : requiring reverse
495+
"""
496+
url = "https://github.com/py-pdf/pypdf/files/12080338/example_121.pdf"
497+
name = "gray_cmyk.pdf"
498+
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
499+
url_png = "https://user-images.githubusercontent.com/4083478/254545494-42df4949-1557-4f2d-acca-6be6e8de1122.png"
500+
name_png = "velo.png"
501+
refimg = Image.open(
502+
BytesIO(get_pdf_from_url(url_png, name=name_png))
503+
) # not a pdf but it works
504+
data = reader.pages[0].images[0]
505+
assert data.image.mode == "L"
506+
diff = ImageChops.difference(data.image, refimg)
507+
d = sqrt(sum([(a * a) for a in diff.getdata()])) / (diff.size[0] * diff.size[1])
508+
assert d < 0.001

0 commit comments

Comments
 (0)