Skip to content

Commit 8ca5562

Browse files
authored
Switch to a custom Chrome downloader (#18)
It turns out that the Web Driver Manager downloads the very latest version of Chrome every time which causes conflicts on the machines that have slightly older versions: ``` selenium.common.exceptions.SessionNotCreatedException: Message: session not created: This version of ChromeDriver only supports Chrome version 133 Current browser version is 132.0.6834.110 with binary path /Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing ``` strictdoc-project/strictdoc#2077 (comment)
1 parent df34a3a commit 8ca5562

File tree

1 file changed

+171
-78
lines changed

1 file changed

+171
-78
lines changed

html2print/html2print.py

Lines changed: 171 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33
import atexit
44
import base64
55
import os.path
6+
import platform
7+
import re
8+
import subprocess
69
import sys
10+
import zipfile
711
from datetime import datetime
812
from pathlib import Path
9-
from shutil import copy
1013
from time import sleep
1114
from typing import Dict, List, Optional
1215

@@ -15,15 +18,9 @@
1518
from selenium import webdriver
1619
from selenium.webdriver.chrome.options import Options
1720
from selenium.webdriver.chrome.service import Service
18-
from webdriver_manager.chrome import ChromeDriverManager
19-
from webdriver_manager.core.download_manager import WDMDownloadManager
20-
from webdriver_manager.core.driver import Driver
21-
from webdriver_manager.core.driver_cache import DriverCacheManager
22-
from webdriver_manager.core.file_manager import FileManager
23-
from webdriver_manager.core.http import HttpClient
24-
from webdriver_manager.core.os_manager import OperationSystemManager
21+
from webdriver_manager.core.os_manager import ChromeType, OperationSystemManager
2522

26-
__version__ = "0.0.8"
23+
__version__ = "0.0.12"
2724

2825
PATH_TO_HTML2PDF_JS = os.path.join(
2926
os.path.dirname(os.path.join(__file__)), "html2pdf_js", "html2pdf.min.js"
@@ -39,52 +36,40 @@
3936
sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf8", closefd=False)
4037

4138

42-
class HTML2Print_HTTPClient(HttpClient):
43-
def get(self, url, params=None, **kwargs) -> Response:
44-
last_error: Optional[Exception] = None
45-
for attempt in range(1, 3):
46-
print( # noqa: T201
47-
f"html2print: sending GET request attempt {attempt}: {url}"
48-
)
49-
try:
50-
return requests.get(url, params, timeout=(5, 5), **kwargs)
51-
except requests.exceptions.ConnectTimeout as connect_timeout_:
52-
last_error = connect_timeout_
53-
except requests.exceptions.ReadTimeout as read_timeout_:
54-
last_error = read_timeout_
55-
except Exception as exception_:
56-
raise AssertionError(
57-
"html2print: unknown exception", exception_
58-
) from None
39+
class ChromeDriverManager:
40+
def get_chrome_driver(self, path_to_cache_dir: str):
41+
chrome_version = self.get_chrome_version()
42+
chrome_major_version = chrome_version.split(".")[0]
43+
5944
print( # noqa: T201
60-
f"html2print: "
61-
f"failed to get response for URL: {url} with error: {last_error}"
45+
f"html2print: Installed Chrome version: {chrome_version}"
6246
)
6347

48+
system_map = {
49+
"Windows": "win32",
50+
"Darwin": "mac-arm64"
51+
if platform.machine() == "arm64"
52+
else "mac-x64",
53+
"Linux": "linux64",
54+
}
55+
os_type = system_map[platform.system()]
56+
is_windows = platform.system() == "Windows"
6457

65-
class HTML2Print_CacheManager(DriverCacheManager):
66-
def __init__(self, file_manager: FileManager, path_to_cache_dir: str):
67-
super().__init__(file_manager=file_manager)
68-
self.path_to_cache_dir: str = path_to_cache_dir
69-
70-
def find_driver(self, driver: Driver):
71-
path_to_cached_chrome_driver_dir = os.path.join(
72-
self.path_to_cache_dir, "chromedriver"
73-
)
74-
75-
os_type = self.get_os_type()
76-
browser_type = driver.get_browser_type()
77-
browser_version = self._os_system_manager.get_browser_version_from_os(
78-
browser_type
58+
print( # noqa: T201
59+
f"html2print: OS system: {platform.system()}, OS type: {os_type}."
7960
)
80-
assert browser_version is not None, browser_version
8161

8262
path_to_cached_chrome_driver_dir = os.path.join(
83-
path_to_cached_chrome_driver_dir, browser_version, os_type
63+
path_to_cache_dir, chrome_major_version
8464
)
8565
path_to_cached_chrome_driver = os.path.join(
86-
path_to_cached_chrome_driver_dir, "chromedriver"
66+
path_to_cached_chrome_driver_dir,
67+
f"chromedriver-{os_type}",
68+
"chromedriver",
8769
)
70+
if is_windows:
71+
path_to_cached_chrome_driver += ".exe"
72+
8873
if os.path.isfile(path_to_cached_chrome_driver):
8974
print( # noqa: T201
9075
f"html2print: ChromeDriver exists in the local cache: "
@@ -95,25 +80,144 @@ def find_driver(self, driver: Driver):
9580
f"html2print: ChromeDriver does not exist in the local cache: "
9681
f"{path_to_cached_chrome_driver}"
9782
)
98-
path_to_downloaded_chrome_driver = super().find_driver(driver)
99-
if path_to_downloaded_chrome_driver is None:
100-
print( # noqa: T201
101-
f"html2print: could not get a downloaded ChromeDriver: "
102-
f"{path_to_cached_chrome_driver}"
83+
84+
path_to_downloaded_chrome_driver = self._download_chromedriver(
85+
chrome_major_version,
86+
os_type,
87+
path_to_cached_chrome_driver_dir,
88+
path_to_cached_chrome_driver,
89+
)
90+
assert os.path.isfile(path_to_downloaded_chrome_driver)
91+
os.chmod(path_to_downloaded_chrome_driver, 0o755)
92+
93+
return path_to_downloaded_chrome_driver
94+
95+
@staticmethod
96+
def _download_chromedriver(
97+
chrome_major_version,
98+
os_type: str,
99+
path_to_driver_cache_dir,
100+
path_to_cached_chrome_driver,
101+
):
102+
url = "https://googlechromelabs.github.io/chrome-for-testing/known-good-versions-with-downloads.json"
103+
response = ChromeDriverManager.send_http_get_request(url).json()
104+
105+
matching_versions = [
106+
item
107+
for item in response["versions"]
108+
if item["version"].startswith(chrome_major_version)
109+
]
110+
111+
if not matching_versions:
112+
raise Exception(
113+
f"No compatible ChromeDriver found for Chrome version {chrome_major_version}"
114+
)
115+
116+
latest_version = matching_versions[-1]
117+
118+
driver_url: str
119+
chrome_downloadable_versions = latest_version["downloads"][
120+
"chromedriver"
121+
]
122+
for chrome_downloadable_version_ in chrome_downloadable_versions:
123+
if chrome_downloadable_version_["platform"] == os_type:
124+
driver_url = chrome_downloadable_version_["url"]
125+
break
126+
else:
127+
raise RuntimeError(
128+
f"Could not find a downloadable URL from downloadable versions: {chrome_downloadable_versions}"
103129
)
104-
return None
105130

106131
print( # noqa: T201
107-
f"html2print: saving chromedriver to StrictDoc's local cache: "
108-
f"{path_to_downloaded_chrome_driver} -> {path_to_cached_chrome_driver}"
132+
f"html2print: downloading ChromeDriver from: {driver_url}"
109133
)
110-
Path(path_to_cached_chrome_driver_dir).mkdir(
111-
parents=True, exist_ok=True
134+
response = ChromeDriverManager.send_http_get_request(driver_url)
135+
136+
Path(path_to_driver_cache_dir).mkdir(parents=True, exist_ok=True)
137+
zip_path = os.path.join(path_to_driver_cache_dir, "chromedriver.zip")
138+
print( # noqa: T201
139+
f"html2print: saving downloaded ChromeDriver to path: {zip_path}"
112140
)
113-
copy(path_to_downloaded_chrome_driver, path_to_cached_chrome_driver)
141+
with open(zip_path, "wb") as file:
142+
file.write(response.content)
114143

144+
with zipfile.ZipFile(zip_path, "r") as zip_ref:
145+
zip_ref.extractall(path_to_driver_cache_dir)
146+
147+
print( # noqa: T201
148+
f"html2print: ChromeDriver downloaded to: {path_to_cached_chrome_driver}"
149+
)
115150
return path_to_cached_chrome_driver
116151

152+
@staticmethod
153+
def send_http_get_request(url, params=None, **kwargs) -> Response:
154+
last_error: Optional[Exception] = None
155+
for attempt in range(1, 4):
156+
print( # noqa: T201
157+
f"html2print: sending GET request attempt {attempt}: {url}"
158+
)
159+
try:
160+
return requests.get(url, params, timeout=(5, 5), **kwargs)
161+
except requests.exceptions.ConnectTimeout as connect_timeout_:
162+
last_error = connect_timeout_
163+
except requests.exceptions.ReadTimeout as read_timeout_:
164+
last_error = read_timeout_
165+
except Exception as exception_:
166+
raise AssertionError(
167+
"html2print: unknown exception", exception_
168+
) from None
169+
print( # noqa: T201
170+
f"html2print: "
171+
f"failed to get response for URL: {url} with error: {last_error}"
172+
)
173+
174+
@staticmethod
175+
def get_chrome_version():
176+
# Special case: GitHub Actions macOS CI machines have both
177+
# Google Chrome for Testing and normal Google Chrome installed, and
178+
# sometimes their versions are of different major version families.
179+
# The solution is to check if the Google Chrome for Testing is available,
180+
# and use its version instead of the normal one.
181+
if platform.system() == "Darwin":
182+
chrome_path = "/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing"
183+
try:
184+
print( # noqa: T201
185+
"html2print: "
186+
"checking if there is Google Chrome for Testing instead of "
187+
"a normal Chrome available."
188+
)
189+
190+
version_output = subprocess.run(
191+
[chrome_path, "--version"],
192+
capture_output=True,
193+
text=True,
194+
check=True,
195+
)
196+
chrome_version = version_output.stdout.strip()
197+
match = re.search(r"\d+(\.\d+)+", chrome_version)
198+
if not match:
199+
raise RuntimeError(
200+
"Cannot extract the version part using regex."
201+
)
202+
203+
chrome_version = match.group(0)
204+
205+
print( # noqa: T201
206+
f"html2print: Google Chrome for Testing Version: {chrome_version}"
207+
)
208+
209+
return chrome_version
210+
except FileNotFoundError:
211+
print("html2print: Chrome for Testing not available.") # noqa: T201
212+
except Exception as e:
213+
print( # noqa: T201
214+
f"html2print: Error getting Google Chrome for Testing version: {e}"
215+
)
216+
217+
os_manager = OperationSystemManager(os_type=None)
218+
version = os_manager.get_browser_version_from_os(ChromeType.GOOGLE)
219+
return version
220+
117221

118222
def get_inches_from_millimeters(mm: float) -> float:
119223
return mm / 25.4
@@ -190,23 +294,12 @@ class Done(Exception):
190294
return data
191295

192296

193-
def get_chrome_driver(path_to_cache_dir: str) -> str:
194-
cache_manager = HTML2Print_CacheManager(
195-
file_manager=FileManager(os_system_manager=OperationSystemManager()),
196-
path_to_cache_dir=path_to_cache_dir,
197-
)
198-
199-
http_client = HTML2Print_HTTPClient()
200-
download_manager = WDMDownloadManager(http_client)
201-
path_to_chrome = ChromeDriverManager(
202-
download_manager=download_manager, cache_manager=cache_manager
203-
).install()
204-
return path_to_chrome
205-
206-
207297
def create_webdriver(chromedriver: Optional[str], path_to_cache_dir: str):
298+
print("html2print: creating ChromeDriver service.", flush=True) # noqa: T201
208299
if chromedriver is None:
209-
path_to_chrome = get_chrome_driver(path_to_cache_dir)
300+
path_to_chrome = ChromeDriverManager().get_chrome_driver(
301+
path_to_cache_dir
302+
)
210303
else:
211304
path_to_chrome = chromedriver
212305
print(f"html2print: ChromeDriver available at path: {path_to_chrome}") # noqa: T201
@@ -254,6 +347,8 @@ def main():
254347
command_subparsers = parser.add_subparsers(title="command", dest="command")
255348
command_subparsers.required = True
256349

350+
print(f"html2print: version {__version__}") # noqa: T201
351+
257352
#
258353
# Get driver command.
259354
#
@@ -295,22 +390,20 @@ def main():
295390
path_to_cache_dir: str
296391
if args.command == "get_driver":
297392
path_to_cache_dir = (
298-
args.cache_dir
299-
if args.cache_dir is not None
300-
else (DEFAULT_CACHE_DIR)
393+
args.cache_dir if args.cache_dir is not None else DEFAULT_CACHE_DIR
301394
)
302395

303-
path_to_chrome = get_chrome_driver(path_to_cache_dir)
396+
path_to_chrome = ChromeDriverManager().get_chrome_driver(
397+
path_to_cache_dir
398+
)
304399
print(f"html2print: ChromeDriver available at path: {path_to_chrome}") # noqa: T201
305400
sys.exit(0)
306401

307402
elif args.command == "print":
308403
paths: List[str] = args.paths
309404

310405
path_to_cache_dir = (
311-
args.cache_dir
312-
if args.cache_dir is not None
313-
else (DEFAULT_CACHE_DIR)
406+
args.cache_dir if args.cache_dir is not None else DEFAULT_CACHE_DIR
314407
)
315408
driver = create_webdriver(args.chromedriver, path_to_cache_dir)
316409

0 commit comments

Comments
 (0)