Skip to content

Commit 5d180c9

Browse files
authored
Merge pull request #1 from mettta/stanislaw/development
Import all files from the parent repository
2 parents 8e15d57 + 408993a commit 5d180c9

File tree

17 files changed

+691
-1
lines changed

17 files changed

+691
-1
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
.idea/
2+
**/.wdm/
3+
tests/integration/.lit_test_times.txt
4+
tests/integration/**/Output/
5+

LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
TBD

README.md

Whitespace-only changes.

hpdf/hpdf.py

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
# mypy: disable-error-code="no-untyped-call,no-untyped-def"
2+
import argparse
3+
import atexit
4+
import base64
5+
import os.path
6+
import sys
7+
import tempfile
8+
from pathlib import Path
9+
from shutil import copy
10+
from typing import Optional, List
11+
12+
import requests
13+
from requests import Response
14+
from selenium import webdriver
15+
from selenium.webdriver.chrome.options import Options
16+
from selenium.webdriver.chrome.service import Service
17+
from webdriver_manager.chrome import ChromeDriverManager
18+
from webdriver_manager.core.download_manager import WDMDownloadManager
19+
from webdriver_manager.core.driver import Driver
20+
from webdriver_manager.core.driver_cache import DriverCacheManager
21+
from webdriver_manager.core.file_manager import FileManager
22+
from webdriver_manager.core.http import HttpClient
23+
from webdriver_manager.core.os_manager import OperationSystemManager
24+
25+
__version__ = "0.0.1"
26+
27+
# HTML2PDF.js prints unicode symbols to console. The following makes it work on
28+
# Windows which otherwise complains:
29+
# UnicodeEncodeError: 'charmap' codec can't encode characters in position 129-130: character maps to <undefined>
30+
# How to make python 3 print() utf8
31+
# https://stackoverflow.com/questions/3597480/how-to-make-python-3-print-utf8
32+
sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf8", closefd=False)
33+
34+
35+
class HTML2PDF_HTTPClient(HttpClient):
36+
def get(self, url, params=None, **kwargs) -> Response:
37+
"""
38+
Add you own logic here like session or proxy etc.
39+
"""
40+
last_error: Optional[Exception] = None
41+
for attempt in range(1, 3):
42+
print( # noqa: T201
43+
f"HTML2PDF_HTTPClient: sending GET request attempt {attempt}: {url}"
44+
)
45+
try:
46+
return requests.get(url, params, timeout=(5, 5), **kwargs)
47+
except requests.exceptions.ConnectTimeout as connect_timeout_:
48+
last_error = connect_timeout_
49+
except requests.exceptions.ReadTimeout as read_timeout_:
50+
last_error = read_timeout_
51+
except Exception as exception_:
52+
raise AssertionError(
53+
"HTML2PDF_HTTPClient: unknown exception", exception_
54+
) from None
55+
print( # noqa: T201
56+
f"HTML2PDF_HTTPClient: "
57+
f"failed to get response for URL: {url} with error: {last_error}"
58+
)
59+
60+
61+
class HTML2PDF_CacheManager(DriverCacheManager):
62+
def __init__(self, file_manager: FileManager, path_to_cache_dir: str):
63+
super().__init__(file_manager=file_manager)
64+
self.path_to_cache_dir: str = path_to_cache_dir
65+
66+
def find_driver(self, driver: Driver):
67+
path_to_cached_chrome_driver_dir = os.path.join(
68+
self.path_to_cache_dir, "chromedriver"
69+
)
70+
71+
os_type = self.get_os_type()
72+
browser_type = driver.get_browser_type()
73+
browser_version = self._os_system_manager.get_browser_version_from_os(
74+
browser_type
75+
)
76+
77+
path_to_cached_chrome_driver_dir = os.path.join(
78+
path_to_cached_chrome_driver_dir, browser_version, os_type
79+
)
80+
path_to_cached_chrome_driver = os.path.join(
81+
path_to_cached_chrome_driver_dir, "chromedriver"
82+
)
83+
if os.path.isfile(path_to_cached_chrome_driver):
84+
print( # noqa: T201
85+
f"HTML2PDF_CacheManager: chromedriver exists in StrictDoc's local cache: "
86+
f"{path_to_cached_chrome_driver}"
87+
)
88+
return path_to_cached_chrome_driver
89+
print( # noqa: T201
90+
f"HTML2PDF_CacheManager: chromedriver does not exist in StrictDoc's local cache: "
91+
f"{path_to_cached_chrome_driver}"
92+
)
93+
path_to_downloaded_chrome_driver = super().find_driver(driver)
94+
if path_to_downloaded_chrome_driver is None:
95+
print( # noqa: T201
96+
f"HTML2PDF_CacheManager: could not get a downloaded Chrome driver: "
97+
f"{path_to_cached_chrome_driver}"
98+
)
99+
return None
100+
101+
print( # noqa: T201
102+
f"HTML2PDF_CacheManager: saving chromedriver to StrictDoc's local cache: "
103+
f"{path_to_downloaded_chrome_driver} -> {path_to_cached_chrome_driver}"
104+
)
105+
Path(path_to_cached_chrome_driver_dir).mkdir(
106+
parents=True, exist_ok=True
107+
)
108+
copy(path_to_downloaded_chrome_driver, path_to_cached_chrome_driver)
109+
110+
return path_to_cached_chrome_driver
111+
112+
113+
def get_inches_from_millimeters(mm: float) -> float:
114+
return mm / 25.4
115+
116+
117+
def get_pdf_from_html(driver, url) -> bytes:
118+
print(f"HTML2PDF: opening URL with Chrome Driver: {url}") # noqa: T201
119+
120+
driver.get(url)
121+
122+
# https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-printToPDF
123+
calculated_print_options = {
124+
"landscape": False,
125+
"displayHeaderFooter": False,
126+
"printBackground": True,
127+
# This is an experimental feature that generates a document outline
128+
# (table of contents).
129+
"generateDocumentOutline": True,
130+
# Whether to prefer page size as defined by css. Defaults to
131+
# false, in which case the content will be scaled to fit the paper size.
132+
"preferCSSPageSize": True,
133+
# Paper width in inches. Defaults to 8.5 inches.
134+
"paperWidth": get_inches_from_millimeters(210),
135+
# Paper height in inches. Defaults to 11 inches.
136+
"paperHeight": get_inches_from_millimeters(297),
137+
# WIP: Changing the margin settings has no effect.
138+
# Top margin in inches. Defaults to 1cm (~0.4 inches).
139+
"marginTop": get_inches_from_millimeters(12),
140+
# Bottom margin in inches. Defaults to 1cm (~0.4 inches).
141+
"marginBottom": get_inches_from_millimeters(12),
142+
# Left margin in inches. Defaults to 1cm (~0.4 inches).
143+
"marginLeft": get_inches_from_millimeters(21),
144+
# Right margin in inches. Defaults to 1cm (~0.4 inches).
145+
"marginRight": get_inches_from_millimeters(21),
146+
}
147+
148+
print("HTML2PDF: executing print command with Chrome Driver.") # noqa: T201
149+
result = driver.execute_cdp_cmd("Page.printToPDF", calculated_print_options)
150+
151+
print("HTML2PDF: JS logs from the print session:") # noqa: T201
152+
print('"""') # noqa: T201
153+
for entry in driver.get_log("browser"):
154+
print(entry) # noqa: T201
155+
print('"""') # noqa: T201
156+
157+
data = base64.b64decode(result["data"])
158+
return data
159+
160+
161+
def create_webdriver(chromedriver: Optional[str], path_to_cache_dir: str):
162+
print("HTML2PDF: creating Chrome Driver service.", flush=True) # noqa: T201
163+
if chromedriver is None:
164+
cache_manager = HTML2PDF_CacheManager(
165+
file_manager=FileManager(
166+
os_system_manager=OperationSystemManager()
167+
),
168+
path_to_cache_dir=path_to_cache_dir,
169+
)
170+
171+
http_client = HTML2PDF_HTTPClient()
172+
download_manager = WDMDownloadManager(http_client)
173+
path_to_chrome = ChromeDriverManager(
174+
download_manager=download_manager, cache_manager=cache_manager
175+
).install()
176+
else:
177+
path_to_chrome = chromedriver
178+
print(f"HTML2PDF: Chrome Driver available at path: {path_to_chrome}") # noqa: T201
179+
180+
service = Service(path_to_chrome)
181+
182+
webdriver_options = Options()
183+
webdriver_options.add_argument("start-maximized")
184+
webdriver_options.add_argument("disable-infobars")
185+
webdriver_options.add_argument("--headless")
186+
webdriver_options.add_argument("--disable-extensions")
187+
188+
webdriver_options.add_experimental_option("useAutomationExtension", False)
189+
webdriver_options.add_experimental_option(
190+
"excludeSwitches", ["enable-automation"]
191+
)
192+
193+
# Enable the capturing of everything in JS console.
194+
webdriver_options.set_capability("goog:loggingPrefs", {"browser": "ALL"})
195+
196+
print("HTML2PDF: creating Chrome Driver.", flush=True) # noqa: T201
197+
198+
driver = webdriver.Chrome(
199+
options=webdriver_options,
200+
service=service,
201+
)
202+
driver.set_page_load_timeout(60)
203+
204+
return driver
205+
206+
207+
def main():
208+
# By default, all driver binaries are saved to user.home/.wdm folder.
209+
# You can override this setting and save binaries to project.root/.wdm.
210+
os.environ["WDM_LOCAL"] = "1"
211+
212+
parser = argparse.ArgumentParser(description="HTML2PDF printer script.")
213+
parser.add_argument(
214+
"--chromedriver",
215+
type=str,
216+
help="Optional chromedriver path. Downloaded if not given.",
217+
)
218+
parser.add_argument(
219+
"--cache-dir",
220+
type=str,
221+
help="Optional path to a cache directory whereto the Chrome driver is downloaded.",
222+
)
223+
parser.add_argument("paths", nargs='+', help="Paths to input HTML file.")
224+
args = parser.parse_args()
225+
226+
paths: List[str] = args.paths
227+
228+
path_to_cache_dir: str = (
229+
args.cache_dir
230+
if args.cache_dir is not None
231+
else (
232+
os.path.join(
233+
tempfile.gettempdir(), "strictdoc_cache", "chromedriver"
234+
)
235+
)
236+
)
237+
driver = create_webdriver(args.chromedriver, path_to_cache_dir)
238+
239+
@atexit.register
240+
def exit_handler():
241+
print("HTML2PDF: exit handler: quitting the Chrome Driver.") # noqa: T201
242+
driver.quit()
243+
244+
for separate_path_pair_ in paths:
245+
path_to_input_html, path_to_output_pdf = separate_path_pair_.split(":")
246+
assert os.path.isfile(path_to_input_html), path_to_input_html
247+
248+
path_to_output_pdf_dir = os.path.dirname(path_to_output_pdf)
249+
Path(path_to_output_pdf_dir).mkdir(parents=True, exist_ok=True)
250+
251+
url = Path(os.path.abspath(path_to_input_html)).as_uri()
252+
253+
pdf_bytes = get_pdf_from_html(driver, url)
254+
with open(path_to_output_pdf, "wb") as f:
255+
f.write(pdf_bytes)
256+
257+
258+
if __name__ == "__main__":
259+
main()

pyproject.toml

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
[build-system]
2+
requires = ["hatchling"]
3+
build-backend = "hatchling.build"
4+
5+
[tool.hatch.version]
6+
path = "hpdf/hpdf.py"
7+
8+
[tool.hatch.build]
9+
include = [
10+
"/hpdf/",
11+
"LICENSE",
12+
"README.md",
13+
"pyproject.toml"
14+
]
15+
16+
exclude = [
17+
"/submodules",
18+
"/tests",
19+
]
20+
21+
[project]
22+
name = "hpdf"
23+
dynamic = ["version"]
24+
description = "Python client for HTML2PDF JavaScript library."
25+
readme = "README.md"
26+
license = "TBD"
27+
requires-python = ">=3.8"
28+
authors = [
29+
{ name = "Stanislav Pankevich", email = "s.pankevich@gmail.com" },
30+
{ name = "Maryna Balioura", email = "mettta@gmail.com" },
31+
]
32+
classifiers = [
33+
"License :: OSI Approved :: BSD License",
34+
"Operating System :: OS Independent",
35+
"Programming Language :: Python :: 3",
36+
"Programming Language :: Python :: 3.8",
37+
"Programming Language :: Python :: 3.9",
38+
"Programming Language :: Python :: 3.10",
39+
"Programming Language :: Python :: 3.11",
40+
"Programming Language :: Python :: Implementation :: CPython",
41+
"Programming Language :: Python :: Implementation :: PyPy",
42+
]
43+
44+
dependencies = [
45+
# HTML2PDF dependencies
46+
"selenium",
47+
"webdriver-manager",
48+
49+
# requests is used by HTML2PDF_HTTPClient.
50+
"requests",
51+
]
52+
53+
[project.optional-dependencies]
54+
development = [
55+
# Development tasks
56+
"invoke>=1.4.1",
57+
"tox>=4.4.8",
58+
]
59+
60+
[project.scripts]
61+
hpdf = "hpdf:main"
62+
63+
[project.urls]
64+
Changelog = "https://github.com/mettta/html2pdf_python/releases/"
65+
# Funding = "https://..."
66+
Homepage = "https://github.com/mettta/html2pdf_python/"
67+
Source = "https://github.com/mettta/html2pdf_python/"
68+
69+
[tool.pytest.ini_options]
70+
addopts = "--import-mode=importlib"
71+
pythonpath = [
72+
"."
73+
]

requirements.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
invoke
2+
3+
#
4+
# Integration tests
5+
#
6+
lit
7+
filecheck==0.0.24
8+
9+
# Integration tests use PyPDF to check the contents of the printed PDF.
10+
pypdf==3.9.0

0 commit comments

Comments
 (0)