Skip to content

Commit 908797f

Browse files
TST: Increase test coverage for flate handling of image mode 1 (#2339)
As mentioned in #2331, this will improve the test coverage for the edge cases. Further refactoring was necessary as iterating over bytes will yield integers instead of single bytes and thus the whitespace check has been broken. Additionally, the whitespace check has previously always been performed on the shortened bytes data.
1 parent 26e31cd commit 908797f

File tree

4 files changed

+98
-3
lines changed

4 files changed

+98
-3
lines changed

pypdf/_utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,23 @@ def skip_over_whitespace(stream: StreamType) -> bool:
190190
return cnt > 1
191191

192192

193+
def check_if_whitespace_only(value: bytes) -> bool:
194+
"""
195+
Check if the given value consists of whitespace characters only.
196+
197+
Args:
198+
value: The bytes to check.
199+
200+
Returns:
201+
True if the value only has whitespace characters, otherwise return False.
202+
"""
203+
for index in range(len(value)):
204+
current = value[index:index + 1]
205+
if current not in WHITESPACES:
206+
return False
207+
return True
208+
209+
193210
def skip_over_comment(stream: StreamType) -> None:
194211
tok = stream.read(1)
195212
stream.seek(-1, 1)

pypdf/_xobj_image_helpers.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from io import BytesIO
55
from typing import Any, List, Tuple, Union, cast
66

7-
from ._utils import WHITESPACES, logger_warning
7+
from ._utils import check_if_whitespace_only, logger_warning
88
from .constants import ColorSpaces
99
from .errors import PdfReadError
1010
from .generic import (
@@ -199,9 +199,9 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
199199
if len(lookup) != expected_count:
200200
if len(lookup) < expected_count:
201201
raise PdfReadError(f"Not enough lookup values: Expected {expected_count}, got {len(lookup)}.")
202-
lookup = lookup[:expected_count]
203-
if not all(_value in WHITESPACES for _value in lookup[expected_count:]):
202+
if not check_if_whitespace_only(lookup[expected_count:]):
204203
raise PdfReadError(f"Too many lookup values: Expected {expected_count}, got {len(lookup)}.")
204+
lookup = lookup[:expected_count]
205205
colors_arr = [lookup[:nb], lookup[nb:]]
206206
arr = b"".join(
207207
[

tests/test_utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
Version,
1111
_get_max_pdf_version_header,
1212
_human_readable_bytes,
13+
check_if_whitespace_only,
1314
deprecate_with_replacement,
1415
deprecation_bookmark,
1516
deprecation_no_replacement,
@@ -48,6 +49,23 @@ def test_skip_over_whitespace(stream, expected):
4849
assert skip_over_whitespace(stream) == expected
4950

5051

52+
@pytest.mark.parametrize(
53+
("value", "expected"),
54+
[
55+
(b"foo", False),
56+
(b" a", False),
57+
(b" a\n b", False),
58+
(b"", True),
59+
(b" ", True),
60+
(b" ", True),
61+
(b" \n", True),
62+
(b" \n", True),
63+
],
64+
)
65+
def test_check_if_whitespace_only(value, expected):
66+
assert check_if_whitespace_only(value) is expected
67+
68+
5169
def test_read_until_whitespace():
5270
assert read_until_whitespace(io.BytesIO(b"foo"), maxchars=1) == b"f"
5371

tests/test_xobject_image_helpers.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
import pytest
55

66
from pypdf import PdfReader
7+
from pypdf._xobj_image_helpers import _handle_flate
78
from pypdf.errors import PdfReadError
9+
from pypdf.generic import ArrayObject, DecodedStreamObject, NameObject, NumberObject
810

911
from . import get_data_from_url
1012

@@ -25,3 +27,61 @@ def test_get_imagemode_recursion_depth():
2527
match="Color spaces nested too deep. If required, consider increasing MAX_IMAGE_MODE_NESTING_DEPTH.",
2628
):
2729
reader.pages[0].images[0]
30+
31+
32+
def test_handle_flate__image_mode_1():
33+
data = b"\x00\xe0\x00"
34+
lookup = DecodedStreamObject()
35+
expected_data = [
36+
(66, 66, 66), (66, 66, 66), (66, 66, 66),
37+
(0, 19, 55), (0, 19, 55), (0, 19, 55),
38+
(66, 66, 66), (66, 66, 66), (66, 66, 66)
39+
]
40+
41+
# No trailing data.
42+
lookup.set_data(b"\x42\x42\x42\x00\x13\x37")
43+
result = _handle_flate(
44+
size=(3, 3),
45+
data=data,
46+
mode="1",
47+
color_space=ArrayObject([NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]),
48+
colors=2,
49+
obj_as_text="dummy"
50+
)
51+
assert expected_data == list(result[0].getdata())
52+
53+
# Trailing whitespace.
54+
lookup.set_data(b"\x42\x42\x42\x00\x13\x37 \x0a")
55+
result = _handle_flate(
56+
size=(3, 3),
57+
data=data,
58+
mode="1",
59+
color_space=ArrayObject([NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]),
60+
colors=2,
61+
obj_as_text="dummy"
62+
)
63+
assert expected_data == list(result[0].getdata())
64+
65+
# Trailing non-whitespace character.
66+
lookup.set_data(b"\x42\x42\x42\x00\x13\x37\x12")
67+
with pytest.raises(PdfReadError, match=r"^Too many lookup values: Expected 6, got 7\.$"):
68+
_handle_flate(
69+
size=(3, 3),
70+
data=data,
71+
mode="1",
72+
color_space=ArrayObject([NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]),
73+
colors=2,
74+
obj_as_text="dummy"
75+
)
76+
77+
# Not enough lookup data.
78+
lookup.set_data(b"\x42\x42\x42\x00\x13")
79+
with pytest.raises(PdfReadError, match=r"^Not enough lookup values: Expected 6, got 5\.$"):
80+
_handle_flate(
81+
size=(3, 3),
82+
data=data,
83+
mode="1",
84+
color_space=ArrayObject([NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]),
85+
colors=2,
86+
obj_as_text="dummy"
87+
)

0 commit comments

Comments
 (0)