Skip to content

Commit 672f080

Browse files
committed
Update function
1 parent b7a5245 commit 672f080

File tree

2 files changed

+50
-157
lines changed

2 files changed

+50
-157
lines changed

pypdf/filters.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,11 @@ def decode(
487487

488488

489489
class BrotliDecode:
490-
"""Decodes Brotli-compressed data (ISO 32000-2:2020, Section 7.4.11)."""
490+
"""
491+
Decodes Brotli-compressed data.
492+
493+
See ISO 32000-2:2020, Section 7.4.11.
494+
"""
491495
@staticmethod
492496
def decode(
493497
data: bytes,
@@ -499,16 +503,15 @@ def decode(
499503
500504
Args:
501505
data: Brotli-compressed data.
502-
decode_parms: Optional parameters (unused).
506+
decode_parms: A dictionary of parameter values (unused).
503507
504508
Returns:
505-
Decompressed data.
509+
The decompressed data.
506510
507511
Raises:
508-
ImportError: If brotli library is not installed.
512+
ImportError: If the 'brotli' library is not installed.
509513
"""
510514
if brotli is None:
511-
# Raise ImportError as the required library is missing
512515
raise ImportError("Brotli library not installed. Required for BrotliDecode filter.")
513516
return brotli.decompress(data)
514517

@@ -518,16 +521,16 @@ def encode(data: bytes, **kwargs: Any) -> bytes:
518521
Encode data using Brotli compression.
519522
520523
Args:
521-
data: Data to compress.
524+
data: The data to be compressed.
525+
**kwargs: Additional keyword arguments (unused).
522526
523527
Returns:
524-
Compressed data.
528+
The compressed data.
525529
526530
Raises:
527-
ImportError: If brotli library is not installed.
531+
ImportError: If the 'brotli' library is not installed.
528532
"""
529533
if brotli is None:
530-
# Raise ImportError as the required library is missing
531534
raise ImportError("Brotli library not installed. Required for BrotliDecode filter.")
532535
return brotli.compress(data)
533536

tests/test_filters.py

Lines changed: 38 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,12 @@
88
from io import BytesIO
99
from itertools import product as cartesian_product
1010
from pathlib import Path
11-
from typing import cast
11+
from unittest.mock import patch
1212

1313
import pytest
1414
from PIL import Image, ImageOps
1515

1616
from pypdf import PdfReader
17-
from pypdf.constants import FilterTypeAbbreviations as FTA
18-
from pypdf.constants import FilterTypes as FT
19-
from pypdf.constants import StreamAttributes as SA
2017
from pypdf.errors import DeprecationError, PdfReadError
2118
from pypdf.filters import (
2219
ASCII85Decode,
@@ -48,33 +45,6 @@
4845
RESOURCE_ROOT = PROJECT_ROOT / "resources"
4946

5047

51-
# Helper function for subprocess testing without brotli
52-
def _run_script_without_brotli(tmp_path, script_content) -> None:
53-
env = os.environ.copy()
54-
env["COVERAGE_PROCESS_START"] = str(PROJECT_ROOT / "pyproject.toml") # Ensure coverage
55-
56-
source_file = tmp_path / "script_no_brotli.py"
57-
source_file.write_text(script_content)
58-
59-
try:
60-
env["PYTHONPATH"] = str(PROJECT_ROOT) + os.pathsep + env["PYTHONPATH"]
61-
except KeyError:
62-
env["PYTHONPATH"] = str(PROJECT_ROOT)
63-
64-
result = subprocess.run( # noqa: S603
65-
[shutil.which("python"), source_file],
66-
capture_output=True,
67-
env=env,
68-
cwd=PROJECT_ROOT, # Run from project root
69-
)
70-
# Check stderr for unexpected errors from the subprocess itself
71-
if result.stderr:
72-
pass # Print removed for committed code
73-
assert result.returncode == 0, f"Subprocess failed with exit code {result.returncode}"
74-
# Allow specific stdout messages if needed, otherwise assert empty
75-
# assert result.stdout == b"", "Subprocess produced unexpected stdout"
76-
# Allow specific stderr messages if needed, otherwise assert empty
77-
# assert result.stderr == b"", "Subprocess produced unexpected stderr"
7848

7949

8050
@pytest.mark.parametrize(("predictor", "s"), list(cartesian_product([1], filter_inputs)))
@@ -95,54 +65,42 @@ def test_brotli_decode_encode(s):
9565
assert encoded != s_bytes # Ensure encoding actually happened
9666
decoded = codec.decode(encoded)
9767
assert decoded == s_bytes
68+
@patch("pypdf.filters.brotli", None)
69+
def test_brotli_missing_installation_mocked():
70+
"""Verify BrotliDecode raises ImportError if brotli is not installed (using mock)."""
71+
# Need to reload the filters module AFTER the patch is active
72+
# so that the 'brotli is None' check uses the mocked value
73+
import importlib
9874

75+
import pypdf.filters
76+
from pypdf.generic import DictionaryObject, NameObject
77+
importlib.reload(pypdf.filters) # Reload to see the patched 'None'
9978

100-
def test_brotli_decode_without_brotli_installed_subprocess(tmp_path):
101-
"""Verify BrotliDecode.decode raises ImportError via subprocess if brotli is not installed."""
102-
script = """
103-
import sys
104-
import pytest
105-
from pypdf.filters import BrotliDecode
106-
107-
# Simulate brotli not being installed
108-
sys.modules["brotli"] = None
109-
# Need to reload filters to make the None effective inside the module
110-
import importlib
111-
import pypdf.filters
112-
importlib.reload(pypdf.filters)
79+
# Test direct decode call
80+
codec = pypdf.filters.BrotliDecode()
81+
with pytest.raises(ImportError) as exc_info_decode:
82+
codec.decode(b"test data")
83+
assert "Brotli library not installed" in str(exc_info_decode.value)
11384

114-
codec = pypdf.filters.BrotliDecode()
115-
with pytest.raises(ImportError) as exc_info:
116-
codec.decode(b"test data")
85+
# Test direct encode call
86+
with pytest.raises(ImportError) as exc_info_encode:
87+
codec.encode(b"test data")
88+
assert "Brotli library not installed" in str(exc_info_encode.value)
11789

118-
assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
119-
print("Test finished successfully: decode without brotli") # Add print to confirm script completion
120-
"""
121-
_run_script_without_brotli(tmp_path, script)
90+
# Test call via decode_stream_data
91+
stream = DictionaryObject()
92+
stream[NameObject("/Filter")] = NameObject("/BrotliDecode")
93+
stream._data = b"dummy compressed data" # Data doesn't matter as decode won't run
94+
with pytest.raises(ImportError) as exc_info_stream:
95+
pypdf.filters.decode_stream_data(stream)
96+
assert "Brotli library not installed" in str(exc_info_stream.value)
12297

98+
# Important: Reload again after the test to restore the original state for other tests
99+
importlib.reload(pypdf.filters)
123100

124-
def test_brotli_encode_without_brotli_installed_subprocess(tmp_path):
125-
"""Verify BrotliDecode.encode raises ImportError via subprocess if brotli is not installed."""
126-
script = """
127-
import sys
128-
import pytest
129-
from pypdf.filters import BrotliDecode
130101

131-
# Simulate brotli not being installed
132-
sys.modules["brotli"] = None
133-
# Need to reload filters to make the None effective inside the module
134-
import importlib
135-
import pypdf.filters
136-
importlib.reload(pypdf.filters)
137102

138-
codec = pypdf.filters.BrotliDecode()
139-
with pytest.raises(ImportError) as exc_info:
140-
codec.encode(b"test data")
141103

142-
assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
143-
print("Test finished successfully: encode without brotli") # Add print to confirm script completion
144-
"""
145-
_run_script_without_brotli(tmp_path, script)
146104

147105

148106
def test_flatedecode_unsupported_predictor():
@@ -312,7 +270,9 @@ class Pdf:
312270
def get_object(self, reference) -> NumberObject:
313271
return NumberObject(42)
314272

315-
parameters = CCITTFaxDecode._get_parameters(parameters=None, rows=IndirectObject(13, 1, Pdf()))
273+
parameters = CCITTFaxDecode._get_parameters(
274+
parameters=None, rows=IndirectObject(13, 1, Pdf())
275+
)
316276
assert parameters.rows == 42
317277

318278

@@ -774,94 +734,24 @@ def test_flate_decode__not_rectangular(caplog):
774734
assert caplog.messages == ["Image data is not rectangular. Adding padding."]
775735

776736

777-
def test_main_decode_brotli_without_brotli_installed_subprocess(tmp_path):
778-
"""Test decode_stream_data raises ImportError via subprocess if brotli is not installed."""
779-
original_data = b"some data to be compressed with brotli"
780-
# We need brotli here in the main process to create the test data
781-
try:
782-
import brotli
783737

784-
compressed_data = brotli.compress(original_data)
785-
except ImportError:
786-
pytest.skip("brotli library not installed in the main test environment")
787738

788-
script = f"""
789-
import sys
790-
import pytest
791-
from pypdf import filters
792-
from pypdf.generic import DictionaryObject, NameObject
793-
794-
# Simulate brotli not being installed
795-
sys.modules["brotli"] = None
796-
# Need to reload filters to make the None effective inside the module
797-
import importlib
798-
import pypdf.filters
799-
importlib.reload(pypdf.filters)
800-
801-
# Simulate a stream dictionary indicating BrotliDecode
802-
stream = DictionaryObject()
803-
stream[NameObject("/Filter")] = NameObject("/BrotliDecode")
804-
# Pass compressed data as hex to avoid encoding issues in the script string
805-
stream._data = bytes.fromhex('{compressed_data.hex()}')
806-
807-
# Call the main decode function and expect an error
808-
with pytest.raises(ImportError) as exc_info:
809-
filters.decode_stream_data(stream)
810-
811-
assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
812-
print("Test finished successfully: main decode without brotli") # Add print to confirm script completion
813-
"""
814-
_run_script_without_brotli(tmp_path, script)
815-
816-
817-
# Renamed from test_main_decode_brotli
818739
def test_main_decode_brotli_installed():
819740
"""Test the main decode function with Brotli filter using a real PDF."""
820741
if importlib.util.find_spec("brotli") is None:
821742
pytest.skip("brotli library not installed")
822743

823-
# Use the prototype PDF provided by PDF Association
744+
# Use the test PDF generated by resources/create_brotli_test_pdf.py
824745
pdf_path = RESOURCE_ROOT / "brotli-test-pdfs" / "minimal-brotli-compressed.pdf"
825-
if not pdf_path.exists():
826-
pytest.skip(f"Brotli test PDF not found at {pdf_path}")
827746

828747
reader = PdfReader(pdf_path)
829-
# Assuming the first page's content stream uses Brotli
830-
# Access the raw stream object. Need to get the indirect object first.
831748
page = reader.pages[0]
832-
content_stream_ref = page[NameObject("/Contents")]
833-
# Handle cases where /Contents might be an array
834-
if isinstance(content_stream_ref, ArrayObject):
835-
# For simplicity, let's assume the first stream in the array uses Brotli
836-
# A more robust test might check all streams or find one specifically with /BrotliDecode
837-
if not content_stream_ref:
838-
pytest.skip("Content stream array is empty.")
839-
stream_obj = content_stream_ref[0].get_object()
840-
else:
841-
stream_obj = content_stream_ref.get_object()
842-
843-
# Check if the stream actually uses BrotliDecode
844-
filters = stream_obj.get(SA.FILTER, ())
845-
if isinstance(filters, IndirectObject):
846-
filters = cast(ArrayObject, filters.get_object())
847-
if not isinstance(filters, (ArrayObject, list)):
848-
filters = (filters,)
849-
850-
if FT.BROTLI_DECODE not in filters and FTA.BR not in filters:
851-
pytest.skip("Selected stream does not use BrotliDecode filter.")
852-
853-
# Call the main decode function directly on the stream object
854-
from pypdf import filters
855749

750+
# Extract text - this will implicitly use the BrotliDecode filter
856751
try:
857-
decoded_data = filters.decode_stream_data(stream_obj)
752+
extracted_text = page.extract_text()
858753
except Exception as e:
859-
pytest.fail(f"decode_stream_data failed with error: {e}")
860-
861-
# Since we don't know the exact content, assert that decoding succeeded
862-
# and returned some non-empty data.
863-
assert isinstance(decoded_data, bytes)
864-
assert len(decoded_data) > 0
865-
# We could add a basic check, e.g., if we expect text content
866-
# assert b"some_expected_keyword" in decoded_data
867-
# But without knowing the content, checking non-empty is the safest bet.
754+
pytest.fail(f"page.extract_text() failed with error: {e}")
755+
756+
# Verify the expected text content
757+
assert extracted_text.strip() == "Hello Brotli!"

0 commit comments

Comments
 (0)