Skip to content

Commit 01f4c0b

Browse files
committed
html2print: minor cleanups on windows
- get chrome binary_location from pybrowsers on windows - --headless=new seems more stable on Windows, and seems to works in Linux too - add entrypoint.sh to .gitattributes so that it keeps the line-endings (windows/docker) - remvoe --no-sandbox as html2print no longer runs as root in the docker image - some added typing to make mypy happy
1 parent 660ef53 commit 01f4c0b

File tree

3 files changed

+46
-18
lines changed

3 files changed

+46
-18
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
entrypoint.sh text eol=lf

html2print/html2print.py

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from time import sleep
1414
from typing import Dict, List, Optional
1515

16+
import browsers
1617
import requests
1718
from requests import Response
1819
from selenium import webdriver
@@ -39,7 +40,8 @@
3940

4041

4142
class ChromeDriverManager:
42-
def get_chrome_driver(self, path_to_cache_dir: str):
43+
def get_chrome_driver(self, path_to_cache_dir: str) -> str:
44+
"""Return path to downloaded chromedriver."""
4345
chrome_version = self.get_chrome_version()
4446

4547
# If Web Driver Manager cannot detect Chrome, it returns None.
@@ -104,13 +106,24 @@ def get_chrome_driver(self, path_to_cache_dir: str):
104106

105107
@staticmethod
106108
def _download_chromedriver(
107-
chrome_major_version,
109+
chrome_major_version: str,
108110
os_type: str,
109-
path_to_driver_cache_dir,
110-
path_to_cached_chrome_driver,
111-
):
111+
path_to_driver_cache_dir: str,
112+
path_to_cached_chrome_driver: str,
113+
) -> str:
112114
url = "https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json"
113-
response = ChromeDriverManager.send_http_get_request(url).json()
115+
response = ChromeDriverManager.send_http_get_request(url)
116+
if response is None:
117+
raise RuntimeError(
118+
"Could not download known-good-versions-with-downloads.json"
119+
)
120+
121+
response = response.json()
122+
if response is None:
123+
raise RuntimeError(
124+
"Could not parse known-good-versions-with-downloads.json"
125+
)
126+
assert isinstance(response, dict)
114127

115128
matching_versions = [
116129
item
@@ -143,6 +156,11 @@ def _download_chromedriver(
143156
)
144157
response = ChromeDriverManager.send_http_get_request(driver_url)
145158

159+
if response is None:
160+
raise Exception(
161+
f"Could not download ChromeDriver from {driver_url}"
162+
)
163+
146164
Path(path_to_driver_cache_dir).mkdir(parents=True, exist_ok=True)
147165
zip_path = os.path.join(path_to_driver_cache_dir, "chromedriver.zip")
148166
print( # noqa: T201
@@ -160,7 +178,7 @@ def _download_chromedriver(
160178
return path_to_cached_chrome_driver
161179

162180
@staticmethod
163-
def send_http_get_request(url, params=None, **kwargs) -> Response:
181+
def send_http_get_request(url, params=None, **kwargs) -> Optional[Response]:
164182
last_error: Optional[Exception] = None
165183
for attempt in range(1, 4):
166184
print( # noqa: T201
@@ -180,9 +198,10 @@ def send_http_get_request(url, params=None, **kwargs) -> Response:
180198
f"html2print: "
181199
f"failed to get response for URL: {url} with error: {last_error}"
182200
)
201+
return None
183202

184203
@staticmethod
185-
def get_chrome_version():
204+
def get_chrome_version() -> str:
186205
# Special case: GitHub Actions macOS CI machines have both
187206
# Google Chrome for Testing and normal Google Chrome installed, and
188207
# sometimes their versions are of different major version families.
@@ -225,7 +244,7 @@ def get_chrome_version():
225244
)
226245

227246
os_manager = OperationSystemManager(os_type=None)
228-
version = os_manager.get_browser_version_from_os(ChromeType.GOOGLE)
247+
version = str(os_manager.get_browser_version_from_os(ChromeType.GOOGLE))
229248
return version
230249

231250

@@ -312,30 +331,35 @@ def create_webdriver(
312331
) -> webdriver.Chrome:
313332
print("html2print: creating ChromeDriver service.", flush=True) # noqa: T201
314333
if chromedriver is None:
315-
path_to_chrome = ChromeDriverManager().get_chrome_driver(
334+
path_to_chromedriver = ChromeDriverManager().get_chrome_driver(
316335
path_to_cache_dir
317336
)
318337
else:
319-
path_to_chrome = chromedriver
320-
print(f"html2print: ChromeDriver available at path: {path_to_chrome}") # noqa: T201
338+
path_to_chromedriver = chromedriver
339+
print(f"html2print: ChromeDriver available at path: {path_to_chromedriver}") # noqa: T201
321340

322341
if debug:
323342
service = Service(
324-
path_to_chrome, log_output=PATH_TO_CHROME_DRIVER_DEBUG_LOG
343+
path_to_chromedriver, log_output=PATH_TO_CHROME_DRIVER_DEBUG_LOG
325344
)
326345
else:
327-
service = Service(path_to_chrome)
346+
service = Service(path_to_chromedriver)
347+
348+
path_to_chrome = ""
349+
if platform.system() == "Windows":
350+
path_to_chrome = browsers.get("chrome")["path"]
351+
print(f"html2print: Chrome available at path: {path_to_chrome}") # noqa: T201
328352

329353
webdriver_options = Options()
354+
webdriver_options.binary_location = path_to_chrome
330355
webdriver_options.add_argument("start-maximized")
331356
webdriver_options.add_argument("disable-infobars")
332357
# Doesn't seem to be needed.
333358
# webdriver_options.add_argument('--disable-gpu') # noqa: ERA001
334359
webdriver_options.add_argument("--disable-extensions")
335-
webdriver_options.add_argument("--headless=chrome")
336-
# FIXME: This is not nice but otherwise it does not work in Ubuntu 24-based Docker image.
337-
# https://github.com/SeleniumHQ/selenium/issues/15327#issuecomment-2689287561
338-
webdriver_options.add_argument("--no-sandbox")
360+
# Use --headless=new, as it seems to be more stable on Windows (available since Chrome 109).
361+
# see https://www.selenium.dev/blog/2023/headless-is-going-away/
362+
webdriver_options.add_argument("--headless=new")
339363

340364
# The Chrome option --disable-dev-shm-usage disables the use of /dev/shm
341365
# (shared memory) for temporary storage in Chrome.

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ dependencies = [
5454

5555
# requests is used by HTML2PDF_HTTPClient.
5656
"requests",
57+
58+
# to detect the browers path (needed on windows)
59+
"pybrowsers",
5760
]
5861

5962
[project.optional-dependencies]

0 commit comments

Comments
 (0)