8
8
from io import BytesIO
9
9
from itertools import product as cartesian_product
10
10
from pathlib import Path
11
- from typing import cast
11
+ from unittest . mock import patch
12
12
13
13
import pytest
14
14
from PIL import Image , ImageOps
15
15
16
16
from pypdf import PdfReader
17
- from pypdf .constants import FilterTypeAbbreviations as FTA
18
- from pypdf .constants import FilterTypes as FT
19
- from pypdf .constants import StreamAttributes as SA
20
17
from pypdf .errors import DeprecationError , PdfReadError
21
18
from pypdf .filters import (
22
19
ASCII85Decode ,
48
45
RESOURCE_ROOT = PROJECT_ROOT / "resources"
49
46
50
47
51
- # Helper function for subprocess testing without brotli
52
- def _run_script_without_brotli (tmp_path , script_content ) -> None :
53
- env = os .environ .copy ()
54
- env ["COVERAGE_PROCESS_START" ] = str (PROJECT_ROOT / "pyproject.toml" ) # Ensure coverage
55
-
56
- source_file = tmp_path / "script_no_brotli.py"
57
- source_file .write_text (script_content )
58
-
59
- try :
60
- env ["PYTHONPATH" ] = str (PROJECT_ROOT ) + os .pathsep + env ["PYTHONPATH" ]
61
- except KeyError :
62
- env ["PYTHONPATH" ] = str (PROJECT_ROOT )
63
-
64
- result = subprocess .run ( # noqa: S603
65
- [shutil .which ("python" ), source_file ],
66
- capture_output = True ,
67
- env = env ,
68
- cwd = PROJECT_ROOT , # Run from project root
69
- )
70
- # Check stderr for unexpected errors from the subprocess itself
71
- if result .stderr :
72
- pass # Print removed for committed code
73
- assert result .returncode == 0 , f"Subprocess failed with exit code { result .returncode } "
74
- # Allow specific stdout messages if needed, otherwise assert empty
75
- # assert result.stdout == b"", "Subprocess produced unexpected stdout"
76
- # Allow specific stderr messages if needed, otherwise assert empty
77
- # assert result.stderr == b"", "Subprocess produced unexpected stderr"
78
48
79
49
80
50
@pytest .mark .parametrize (("predictor" , "s" ), list (cartesian_product ([1 ], filter_inputs )))
@@ -95,54 +65,42 @@ def test_brotli_decode_encode(s):
95
65
assert encoded != s_bytes # Ensure encoding actually happened
96
66
decoded = codec .decode (encoded )
97
67
assert decoded == s_bytes
68
+ @patch ("pypdf.filters.brotli" , None )
69
+ def test_brotli_missing_installation_mocked ():
70
+ """Verify BrotliDecode raises ImportError if brotli is not installed (using mock)."""
71
+ # Need to reload the filters module AFTER the patch is active
72
+ # so that the 'brotli is None' check uses the mocked value
73
+ import importlib
98
74
75
+ import pypdf .filters
76
+ from pypdf .generic import DictionaryObject , NameObject
77
+ importlib .reload (pypdf .filters ) # Reload to see the patched 'None'
99
78
100
- def test_brotli_decode_without_brotli_installed_subprocess (tmp_path ):
101
- """Verify BrotliDecode.decode raises ImportError via subprocess if brotli is not installed."""
102
- script = """
103
- import sys
104
- import pytest
105
- from pypdf.filters import BrotliDecode
106
-
107
- # Simulate brotli not being installed
108
- sys.modules["brotli"] = None
109
- # Need to reload filters to make the None effective inside the module
110
- import importlib
111
- import pypdf.filters
112
- importlib.reload(pypdf.filters)
79
+ # Test direct decode call
80
+ codec = pypdf .filters .BrotliDecode ()
81
+ with pytest .raises (ImportError ) as exc_info_decode :
82
+ codec .decode (b"test data" )
83
+ assert "Brotli library not installed" in str (exc_info_decode .value )
113
84
114
- codec = pypdf.filters.BrotliDecode()
115
- with pytest.raises(ImportError) as exc_info:
116
- codec.decode(b"test data")
85
+ # Test direct encode call
86
+ with pytest .raises (ImportError ) as exc_info_encode :
87
+ codec .encode (b"test data" )
88
+ assert "Brotli library not installed" in str (exc_info_encode .value )
117
89
118
- assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
119
- print("Test finished successfully: decode without brotli") # Add print to confirm script completion
120
- """
121
- _run_script_without_brotli (tmp_path , script )
90
+ # Test call via decode_stream_data
91
+ stream = DictionaryObject ()
92
+ stream [NameObject ("/Filter" )] = NameObject ("/BrotliDecode" )
93
+ stream ._data = b"dummy compressed data" # Data doesn't matter as decode won't run
94
+ with pytest .raises (ImportError ) as exc_info_stream :
95
+ pypdf .filters .decode_stream_data (stream )
96
+ assert "Brotli library not installed" in str (exc_info_stream .value )
122
97
98
+ # Important: Reload again after the test to restore the original state for other tests
99
+ importlib .reload (pypdf .filters )
123
100
124
- def test_brotli_encode_without_brotli_installed_subprocess (tmp_path ):
125
- """Verify BrotliDecode.encode raises ImportError via subprocess if brotli is not installed."""
126
- script = """
127
- import sys
128
- import pytest
129
- from pypdf.filters import BrotliDecode
130
101
131
- # Simulate brotli not being installed
132
- sys.modules["brotli"] = None
133
- # Need to reload filters to make the None effective inside the module
134
- import importlib
135
- import pypdf.filters
136
- importlib.reload(pypdf.filters)
137
102
138
- codec = pypdf.filters.BrotliDecode()
139
- with pytest.raises(ImportError) as exc_info:
140
- codec.encode(b"test data")
141
103
142
- assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
143
- print("Test finished successfully: encode without brotli") # Add print to confirm script completion
144
- """
145
- _run_script_without_brotli (tmp_path , script )
146
104
147
105
148
106
def test_flatedecode_unsupported_predictor ():
@@ -312,7 +270,9 @@ class Pdf:
312
270
def get_object (self , reference ) -> NumberObject :
313
271
return NumberObject (42 )
314
272
315
- parameters = CCITTFaxDecode ._get_parameters (parameters = None , rows = IndirectObject (13 , 1 , Pdf ()))
273
+ parameters = CCITTFaxDecode ._get_parameters (
274
+ parameters = None , rows = IndirectObject (13 , 1 , Pdf ())
275
+ )
316
276
assert parameters .rows == 42
317
277
318
278
@@ -774,94 +734,24 @@ def test_flate_decode__not_rectangular(caplog):
774
734
assert caplog .messages == ["Image data is not rectangular. Adding padding." ]
775
735
776
736
777
- def test_main_decode_brotli_without_brotli_installed_subprocess (tmp_path ):
778
- """Test decode_stream_data raises ImportError via subprocess if brotli is not installed."""
779
- original_data = b"some data to be compressed with brotli"
780
- # We need brotli here in the main process to create the test data
781
- try :
782
- import brotli
783
737
784
- compressed_data = brotli .compress (original_data )
785
- except ImportError :
786
- pytest .skip ("brotli library not installed in the main test environment" )
787
738
788
- script = f"""
789
- import sys
790
- import pytest
791
- from pypdf import filters
792
- from pypdf.generic import DictionaryObject, NameObject
793
-
794
- # Simulate brotli not being installed
795
- sys.modules["brotli"] = None
796
- # Need to reload filters to make the None effective inside the module
797
- import importlib
798
- import pypdf.filters
799
- importlib.reload(pypdf.filters)
800
-
801
- # Simulate a stream dictionary indicating BrotliDecode
802
- stream = DictionaryObject()
803
- stream[NameObject("/Filter")] = NameObject("/BrotliDecode")
804
- # Pass compressed data as hex to avoid encoding issues in the script string
805
- stream._data = bytes.fromhex('{ compressed_data .hex ()} ')
806
-
807
- # Call the main decode function and expect an error
808
- with pytest.raises(ImportError) as exc_info:
809
- filters.decode_stream_data(stream)
810
-
811
- assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
812
- print("Test finished successfully: main decode without brotli") # Add print to confirm script completion
813
- """
814
- _run_script_without_brotli (tmp_path , script )
815
-
816
-
817
- # Renamed from test_main_decode_brotli
818
739
def test_main_decode_brotli_installed ():
819
740
"""Test the main decode function with Brotli filter using a real PDF."""
820
741
if importlib .util .find_spec ("brotli" ) is None :
821
742
pytest .skip ("brotli library not installed" )
822
743
823
- # Use the prototype PDF provided by PDF Association
744
+ # Use the test PDF generated by resources/create_brotli_test_pdf.py
824
745
pdf_path = RESOURCE_ROOT / "brotli-test-pdfs" / "minimal-brotli-compressed.pdf"
825
- if not pdf_path .exists ():
826
- pytest .skip (f"Brotli test PDF not found at { pdf_path } " )
827
746
828
747
reader = PdfReader (pdf_path )
829
- # Assuming the first page's content stream uses Brotli
830
- # Access the raw stream object. Need to get the indirect object first.
831
748
page = reader .pages [0 ]
832
- content_stream_ref = page [NameObject ("/Contents" )]
833
- # Handle cases where /Contents might be an array
834
- if isinstance (content_stream_ref , ArrayObject ):
835
- # For simplicity, let's assume the first stream in the array uses Brotli
836
- # A more robust test might check all streams or find one specifically with /BrotliDecode
837
- if not content_stream_ref :
838
- pytest .skip ("Content stream array is empty." )
839
- stream_obj = content_stream_ref [0 ].get_object ()
840
- else :
841
- stream_obj = content_stream_ref .get_object ()
842
-
843
- # Check if the stream actually uses BrotliDecode
844
- filters = stream_obj .get (SA .FILTER , ())
845
- if isinstance (filters , IndirectObject ):
846
- filters = cast (ArrayObject , filters .get_object ())
847
- if not isinstance (filters , (ArrayObject , list )):
848
- filters = (filters ,)
849
-
850
- if FT .BROTLI_DECODE not in filters and FTA .BR not in filters :
851
- pytest .skip ("Selected stream does not use BrotliDecode filter." )
852
-
853
- # Call the main decode function directly on the stream object
854
- from pypdf import filters
855
749
750
+ # Extract text - this will implicitly use the BrotliDecode filter
856
751
try :
857
- decoded_data = filters . decode_stream_data ( stream_obj )
752
+ extracted_text = page . extract_text ( )
858
753
except Exception as e :
859
- pytest .fail (f"decode_stream_data failed with error: { e } " )
860
-
861
- # Since we don't know the exact content, assert that decoding succeeded
862
- # and returned some non-empty data.
863
- assert isinstance (decoded_data , bytes )
864
- assert len (decoded_data ) > 0
865
- # We could add a basic check, e.g., if we expect text content
866
- # assert b"some_expected_keyword" in decoded_data
867
- # But without knowing the content, checking non-empty is the safest bet.
754
+ pytest .fail (f"page.extract_text() failed with error: { e } " )
755
+
756
+ # Verify the expected text content
757
+ assert extracted_text .strip () == "Hello Brotli!"
0 commit comments