File tree Expand file tree Collapse file tree 2 files changed +18
-16
lines changed Expand file tree Collapse file tree 2 files changed +18
-16
lines changed Original file line number Diff line number Diff line change @@ -121,10 +121,10 @@ def count_doc_pages(self) -> int:
121
121
return len (pdf )
122
122
123
123
def process_pdf (
124
- self ,
125
- behavior : str ,
126
- on_min_pages : int ,
127
- page_indexes : Sequence ,
124
+ self ,
125
+ behavior : str ,
126
+ on_min_pages : int ,
127
+ page_indexes : Sequence ,
128
128
) -> None :
129
129
"""Run any required processing on a PDF file."""
130
130
if self .is_pdf_empty ():
Original file line number Diff line number Diff line change 1
1
import io
2
2
from pathlib import Path
3
3
4
- import pikepdf
4
+ import pypdfium2 as pdfium
5
5
import pytest
6
6
7
7
from mindee .error .mimetype_error import MimeTypeError
18
18
19
19
FILE_TYPES_DIR = Path ("./tests/data/file_types" )
20
20
21
+
21
22
#
22
23
# PDF
23
24
#
@@ -55,18 +56,19 @@ def test_pdf_cut_n_pages(numb_pages: int):
55
56
)
56
57
assert input_obj .count_doc_pages () == numb_pages
57
58
58
- # Each page in the PDF has a unique (and increasing) /Content /Length.
59
- # We use this to make sure we have the correct pages
60
- cut_pdf = pikepdf . open (input_obj .file_object )
61
- with pikepdf . open (
62
- FILE_TYPES_DIR / " pdf" / f"multipage_cut- { numb_pages } .pdf"
63
- ) as pdf :
64
- for idx , page in enumerate ( pdf . pages ):
65
- assert (
66
- page [ "/Contents" ][ "/Length" ]
67
- == cut_pdf . pages [ idx ][ "/Contents" ][ "/Length" ]
68
- )
59
+ # Currently the least verbose way of comparing pages with pypdfium2
60
+ # I.e. each page is read & rendered as a rasterized image. These images are then compared as raw byte sequences.
61
+ cut_pdf = pdfium . PdfDocument (input_obj .file_object )
62
+ pdf = pdfium . PdfDocument ( FILE_TYPES_DIR / "pdf" / f"multipage_cut- { numb_pages } .pdf" )
63
+ for idx in range ( len ( pdf )):
64
+ pdf_page = pdf . get_page ( idx )
65
+ pdf_page_render = pdfium . PdfPage . render ( pdf_page )
66
+ cut_pdf_page = cut_pdf . get_page ( idx )
67
+ cut_pdf_page_render = pdfium . PdfPage . render ( cut_pdf_page )
68
+
69
+ assert bytes ( pdf_page_render . buffer ) == bytes ( cut_pdf_page_render . buffer )
69
70
cut_pdf .close ()
71
+ pdf .close ()
70
72
71
73
72
74
def test_pdf_keep_5_first_pages ():
You can’t perform that action at this time.
0 commit comments