strictdoc-project
diff --git a/‎.gitignore
Lines changed: 5 additions & 0 deletions b/‎.gitignore
Lines changed: 5 additions & 0 deletions
diff --git a/‎LICENSE
Lines changed: 1 addition & 0 deletions b/‎LICENSE
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md b/‎README.md
diff --git a/‎hpdf/hpdf.py
Lines changed: 259 additions & 0 deletions b/‎hpdf/hpdf.py
Lines changed: 259 additions & 0 deletions
diff --git a/‎pyproject.toml
Lines changed: 73 additions & 0 deletions b/‎pyproject.toml
Lines changed: 73 additions & 0 deletions
diff --git a/‎requirements.txt
Lines changed: 10 additions & 0 deletions b/‎requirements.txt
Lines changed: 10 additions & 0 deletions
diff --git a/‎submodules/html2pdf b/‎submodules/html2pdf
@@ -0,0 +1,5 @@
+.idea/
+**/.wdm/
+tests/integration/.lit_test_times.txt
+tests/integration/**/Output/
+
@@ -0,0 +1 @@
+TBD
@@ -0,0 +1,259 @@
+# mypy: disable-error-code="no-untyped-call,no-untyped-def"
+import argparse
+import atexit
+import base64
+import os.path
+import sys
+import tempfile
+from pathlib import Path
+from shutil import copy
+from typing import Optional, List
+
+import requests
+from requests import Response
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.chrome.service import Service
+from webdriver_manager.chrome import ChromeDriverManager
+from webdriver_manager.core.download_manager import WDMDownloadManager
+from webdriver_manager.core.driver import Driver
+from webdriver_manager.core.driver_cache import DriverCacheManager
+from webdriver_manager.core.file_manager import FileManager
+from webdriver_manager.core.http import HttpClient
+from webdriver_manager.core.os_manager import OperationSystemManager
+
+__version__ = "0.0.1"
+
+# HTML2PDF.js prints unicode symbols to console. The following makes it work on
+# Windows which otherwise complains:
+# UnicodeEncodeError: 'charmap' codec can't encode characters in position 129-130: character maps to <undefined>
+# How to make python 3 print() utf8
+# https://stackoverflow.com/questions/3597480/how-to-make-python-3-print-utf8
+sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf8", closefd=False)
+
+
+class HTML2PDF_HTTPClient(HttpClient):
+    def get(self, url, params=None, **kwargs) -> Response:
+        """
+        Add you own logic here like session or proxy etc.
+        """
+        last_error: Optional[Exception] = None
+        for attempt in range(1, 3):
+            print(  # noqa: T201
+                f"HTML2PDF_HTTPClient: sending GET request attempt {attempt}: {url}"
+            )
+            try:
+                return requests.get(url, params, timeout=(5, 5), **kwargs)
+            except requests.exceptions.ConnectTimeout as connect_timeout_:
+                last_error = connect_timeout_
+            except requests.exceptions.ReadTimeout as read_timeout_:
+                last_error = read_timeout_
+            except Exception as exception_:
+                raise AssertionError(
+                    "HTML2PDF_HTTPClient: unknown exception", exception_
+                ) from None
+        print(  # noqa: T201
+            f"HTML2PDF_HTTPClient: "
+            f"failed to get response for URL: {url} with error: {last_error}"
+        )
+
+
+class HTML2PDF_CacheManager(DriverCacheManager):
+    def __init__(self, file_manager: FileManager, path_to_cache_dir: str):
+        super().__init__(file_manager=file_manager)
+        self.path_to_cache_dir: str = path_to_cache_dir
+
+    def find_driver(self, driver: Driver):
+        path_to_cached_chrome_driver_dir = os.path.join(
+            self.path_to_cache_dir, "chromedriver"
+        )
+
+        os_type = self.get_os_type()
+        browser_type = driver.get_browser_type()
+        browser_version = self._os_system_manager.get_browser_version_from_os(
+            browser_type
+        )
+
+        path_to_cached_chrome_driver_dir = os.path.join(
+            path_to_cached_chrome_driver_dir, browser_version, os_type
+        )
+        path_to_cached_chrome_driver = os.path.join(
+            path_to_cached_chrome_driver_dir, "chromedriver"
+        )
+        if os.path.isfile(path_to_cached_chrome_driver):
+            print(  # noqa: T201
+                f"HTML2PDF_CacheManager: chromedriver exists in StrictDoc's local cache: "
+                f"{path_to_cached_chrome_driver}"
+            )
+            return path_to_cached_chrome_driver
+        print(  # noqa: T201
+            f"HTML2PDF_CacheManager: chromedriver does not exist in StrictDoc's local cache: "
+            f"{path_to_cached_chrome_driver}"
+        )
+        path_to_downloaded_chrome_driver = super().find_driver(driver)
+        if path_to_downloaded_chrome_driver is None:
+            print(  # noqa: T201
+                f"HTML2PDF_CacheManager: could not get a downloaded Chrome driver: "
+                f"{path_to_cached_chrome_driver}"
+            )
+            return None
+
+        print(  # noqa: T201
+            f"HTML2PDF_CacheManager: saving chromedriver to StrictDoc's local cache: "
+            f"{path_to_downloaded_chrome_driver} -> {path_to_cached_chrome_driver}"
+        )
+        Path(path_to_cached_chrome_driver_dir).mkdir(
+            parents=True, exist_ok=True
+        )
+        copy(path_to_downloaded_chrome_driver, path_to_cached_chrome_driver)
+
+        return path_to_cached_chrome_driver
+
+
+def get_inches_from_millimeters(mm: float) -> float:
+    return mm / 25.4
+
+
+def get_pdf_from_html(driver, url) -> bytes:
+    print(f"HTML2PDF: opening URL with Chrome Driver: {url}")  # noqa: T201
+
+    driver.get(url)
+
+    # https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-printToPDF
+    calculated_print_options = {
+        "landscape": False,
+        "displayHeaderFooter": False,
+        "printBackground": True,
+        # This is an experimental feature that generates a document outline
+        # (table of contents).
+        "generateDocumentOutline": True,
+        # Whether to prefer page size as defined by css. Defaults to
+        # false, in which case the content will be scaled to fit the paper size.
+        "preferCSSPageSize": True,
+        # Paper width in inches. Defaults to 8.5 inches.
+        "paperWidth": get_inches_from_millimeters(210),
+        # Paper height in inches. Defaults to 11 inches.
+        "paperHeight": get_inches_from_millimeters(297),
+        # WIP: Changing the margin settings has no effect.
+        # Top margin in inches. Defaults to 1cm (~0.4 inches).
+        "marginTop": get_inches_from_millimeters(12),
+        # Bottom margin in inches. Defaults to 1cm (~0.4 inches).
+        "marginBottom": get_inches_from_millimeters(12),
+        # Left margin in inches. Defaults to 1cm (~0.4 inches).
+        "marginLeft": get_inches_from_millimeters(21),
+        # Right margin in inches. Defaults to 1cm (~0.4 inches).
+        "marginRight": get_inches_from_millimeters(21),
+    }
+
+    print("HTML2PDF: executing print command with Chrome Driver.")  # noqa: T201
+    result = driver.execute_cdp_cmd("Page.printToPDF", calculated_print_options)
+
+    print("HTML2PDF: JS logs from the print session:")  # noqa: T201
+    print('"""')  # noqa: T201
+    for entry in driver.get_log("browser"):
+        print(entry)  # noqa: T201
+    print('"""')  # noqa: T201
+
+    data = base64.b64decode(result["data"])
+    return data
+
+
+def create_webdriver(chromedriver: Optional[str], path_to_cache_dir: str):
+    print("HTML2PDF: creating Chrome Driver service.", flush=True)  # noqa: T201
+    if chromedriver is None:
+        cache_manager = HTML2PDF_CacheManager(
+            file_manager=FileManager(
+                os_system_manager=OperationSystemManager()
+            ),
+            path_to_cache_dir=path_to_cache_dir,
+        )
+
+        http_client = HTML2PDF_HTTPClient()
+        download_manager = WDMDownloadManager(http_client)
+        path_to_chrome = ChromeDriverManager(
+            download_manager=download_manager, cache_manager=cache_manager
+        ).install()
+    else:
+        path_to_chrome = chromedriver
+    print(f"HTML2PDF: Chrome Driver available at path: {path_to_chrome}")  # noqa: T201
+
+    service = Service(path_to_chrome)
+
+    webdriver_options = Options()
+    webdriver_options.add_argument("start-maximized")
+    webdriver_options.add_argument("disable-infobars")
+    webdriver_options.add_argument("--headless")
+    webdriver_options.add_argument("--disable-extensions")
+
+    webdriver_options.add_experimental_option("useAutomationExtension", False)
+    webdriver_options.add_experimental_option(
+        "excludeSwitches", ["enable-automation"]
+    )
+
+    # Enable the capturing of everything in JS console.
+    webdriver_options.set_capability("goog:loggingPrefs", {"browser": "ALL"})
+
+    print("HTML2PDF: creating Chrome Driver.", flush=True)  # noqa: T201
+
+    driver = webdriver.Chrome(
+        options=webdriver_options,
+        service=service,
+    )
+    driver.set_page_load_timeout(60)
+
+    return driver
+
+
+def main():
+    # By default, all driver binaries are saved to user.home/.wdm folder.
+    # You can override this setting and save binaries to project.root/.wdm.
+    os.environ["WDM_LOCAL"] = "1"
+
+    parser = argparse.ArgumentParser(description="HTML2PDF printer script.")
+    parser.add_argument(
+        "--chromedriver",
+        type=str,
+        help="Optional chromedriver path. Downloaded if not given.",
+    )
+    parser.add_argument(
+        "--cache-dir",
+        type=str,
+        help="Optional path to a cache directory whereto the Chrome driver is downloaded.",
+    )
+    parser.add_argument("paths", nargs='+', help="Paths to input HTML file.")
+    args = parser.parse_args()
+
+    paths: List[str] = args.paths
+
+    path_to_cache_dir: str = (
+        args.cache_dir
+        if args.cache_dir is not None
+        else (
+            os.path.join(
+                tempfile.gettempdir(), "strictdoc_cache", "chromedriver"
+            )
+        )
+    )
+    driver = create_webdriver(args.chromedriver, path_to_cache_dir)
+
+    @atexit.register
+    def exit_handler():
+        print("HTML2PDF: exit handler: quitting the Chrome Driver.")  # noqa: T201
+        driver.quit()
+
+    for separate_path_pair_ in paths:
+        path_to_input_html, path_to_output_pdf = separate_path_pair_.split(":")
+        assert os.path.isfile(path_to_input_html), path_to_input_html
+
+        path_to_output_pdf_dir = os.path.dirname(path_to_output_pdf)
+        Path(path_to_output_pdf_dir).mkdir(parents=True, exist_ok=True)
+
+        url = Path(os.path.abspath(path_to_input_html)).as_uri()
+
+        pdf_bytes = get_pdf_from_html(driver, url)
+        with open(path_to_output_pdf, "wb") as f:
+            f.write(pdf_bytes)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,73 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.version]
+path = "hpdf/hpdf.py"
+
+[tool.hatch.build]
+include = [
+    "/hpdf/",
+    "LICENSE",
+    "README.md",
+    "pyproject.toml"
+]
+
+exclude = [
+    "/submodules",
+    "/tests",
+]
+
+[project]
+name = "hpdf"
+dynamic = ["version"]
+description = "Python client for HTML2PDF JavaScript library."
+readme = "README.md"
+license = "TBD"
+requires-python = ">=3.8"
+authors = [
+    { name = "Stanislav Pankevich", email = "s.pankevich@gmail.com" },
+    { name = "Maryna Balioura", email = "mettta@gmail.com" },
+]
+classifiers = [
+    "License :: OSI Approved :: BSD License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: Implementation :: CPython",
+    "Programming Language :: Python :: Implementation :: PyPy",
+]
+
+dependencies = [
+    # HTML2PDF dependencies
+    "selenium",
+    "webdriver-manager",
+
+    # requests is used by HTML2PDF_HTTPClient.
+    "requests",
+]
+
+[project.optional-dependencies]
+development = [
+    # Development tasks
+    "invoke>=1.4.1",
+    "tox>=4.4.8",
+]
+
+[project.scripts]
+hpdf = "hpdf:main"
+
+[project.urls]
+Changelog = "https://github.com/mettta/html2pdf_python/releases/"
+# Funding = "https://..."
+Homepage = "https://github.com/mettta/html2pdf_python/"
+Source = "https://github.com/mettta/html2pdf_python/"
+
+[tool.pytest.ini_options]
+addopts = "--import-mode=importlib"
+pythonpath = [
+  "."
+]
@@ -0,0 +1,10 @@
+invoke
+
+#
+# Integration tests
+#
+lit
+filecheck==0.0.24
+
+# Integration tests use PyPDF to check the contents of the printed PDF.
+pypdf==3.9.0