Skip to content

Commit 0c00c07

Browse files
temp save
1 parent 8d2539a commit 0c00c07

File tree

3 files changed

+26
-35
lines changed

3 files changed

+26
-35
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ repos:
4141
exclude: "tests/|examples/|docs/"
4242
additional_dependencies:
4343
- toml
44-
- pikepdf
44+
- pypdfium2
4545
- types-pytz
4646
- types-requests
4747
- types-setuptools

mindee/input/sources.py

Lines changed: 24 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pathlib import Path
88
from typing import BinaryIO, Optional, Sequence, Tuple, Union
99

10-
import pikepdf
10+
import pypdfium2 as pdfium
1111

1212
from mindee.error.mimetype_error import MimeTypeError
1313
from mindee.error.mindee_error import MindeeError, MindeeSourceError
@@ -117,14 +117,14 @@ def count_doc_pages(self) -> int:
117117
:return: the number of pages.
118118
"""
119119
self.file_object.seek(0)
120-
with pikepdf.open(self.file_object) as pdf:
121-
return len(pdf.pages)
120+
pdf = pdfium.PdfDocument(self.file_object)
121+
return len(pdf)
122122

123123
def process_pdf(
124-
self,
125-
behavior: str,
126-
on_min_pages: int,
127-
page_indexes: Sequence,
124+
self,
125+
behavior: str,
126+
on_min_pages: int,
127+
page_indexes: Sequence,
128128
) -> None:
129129
"""Run any required processing on a PDF file."""
130130
if self.is_pdf_empty():
@@ -163,14 +163,13 @@ def merge_pdf_pages(self, page_numbers: set) -> None:
163163
:return: None
164164
"""
165165
self.file_object.seek(0)
166-
new_pdf = pikepdf.Pdf.new()
167-
with pikepdf.open(self.file_object) as pdf:
168-
for page_id in page_numbers:
169-
page = pdf.pages[page_id]
170-
new_pdf.pages.append(page)
166+
new_pdf = pdfium.PdfDocument.new()
167+
pdf = pdfium.PdfDocument(self.file_object)
168+
new_pdf.import_pages(pdf, list(page_numbers))
171169
self.file_object.close()
172-
self.file_object = io.BytesIO()
173-
new_pdf.save(self.file_object)
170+
bytes_io = io.BytesIO()
171+
new_pdf.save(bytes_io)
172+
self.file_object = bytes_io
174173

175174
def is_pdf_empty(self) -> bool:
176175
"""
@@ -179,24 +178,17 @@ def is_pdf_empty(self) -> bool:
179178
:return: ``True`` if the PDF is empty
180179
"""
181180
self.file_object.seek(0)
182-
with pikepdf.open(self.file_object) as pdf:
183-
for page in pdf.pages:
184-
# mypy incorrectly identifies the "/Length" key's value as
185-
# an object rather than an int.
186-
try:
187-
total_size = page["/Contents"]["/Length"]
188-
except ValueError:
189-
total_size = 0 # type: ignore
190-
for content in page["/Contents"]: # type: ignore
191-
total_size += content["/Length"]
192-
has_data = total_size > 1000 # type: ignore
193-
194-
has_font = "/Font" in page["/Resources"].keys()
195-
has_xobj = "/XObject" in page["/Resources"].keys()
196-
197-
if has_font or has_xobj or has_data:
198-
return False
199-
return True
181+
pdf = pdfium.PdfDocument(self.file_object)
182+
for i in range(len(pdf)):
183+
page = pdf.get_page(i)
184+
185+
has_objects = False
186+
for _ in page.get_objects():
187+
has_objects = True
188+
break
189+
if has_objects:
190+
return False
191+
return True
200192

201193
def read_contents(self, close_file: bool) -> Tuple[str, bytes]:
202194
"""

setup.cfg

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ packages = find:
3232
include_package_data = True
3333
python_requires = >=3.7
3434
install_requires =
35-
pikepdf~=8.6;python_version>="3.8"
36-
pikepdf==6.2.9;python_version<"3.8"
35+
pypdfium2~=4.7
3736
pytz>=2023.3
3837
requests~=2.31
3938

0 commit comments

Comments
 (0)