From 3bac701895c1ec222b33f52148a76b285d17db13 Mon Sep 17 00:00:00 2001 From: ckounter Date: Wed, 28 Aug 2024 11:51:38 +0300 Subject: [PATCH 1/4] Collect pandoc urls from github api --- pypandoc/pandoc_download.py | 40 ++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/pypandoc/pandoc_download.py b/pypandoc/pandoc_download.py index 3a7da71..651744f 100644 --- a/pypandoc/pandoc_download.py +++ b/pypandoc/pandoc_download.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- - +import json import logging import os import os.path @@ -14,8 +14,10 @@ import urllib try: from urllib.request import urlopen + from urllib.parse import urlparse except ImportError: from urllib import urlopen + from urlparse import urlparse from .handler import logger, _check_log_handler @@ -42,36 +44,38 @@ def _get_pandoc_urls(version="latest"): :return: str version: actual pandoc version. (e.g. "latest" will be resolved to the actual one) """ # url to pandoc download page - url = "https://github.com/jgm/pandoc/releases/" + \ - ("tag/" if version != "latest" else "") + version + url = "https://api.github.com/repos/jgm/pandoc/releases/" + \ + ("tags/" if version != "latest" else "") + version # try to open the url try: response = urlopen(url) - version_url_frags = response.url.split("/") - version = version_url_frags[-1] except urllib.error.HTTPError as e: raise RuntimeError("Invalid pandoc version {}.".format(version)) - return - # read the HTML content - response = urlopen(f"https://github.com/jgm/pandoc/releases/expanded_assets/{version}") - content = response.read() + # read json response + data = json.loads(response.read()) # regex for the binaries uname = platform.uname()[4] - processor_architecture = "arm" if uname.startswith("arm") or uname.startswith("aarch") else "amd" - regex = re.compile(fr"/jgm/pandoc/releases/download/.*(?:{processor_architecture}|x86|mac).*\.(?:msi|deb|pkg)") - # a list of urls to the binaries - pandoc_urls_list = regex.findall(content.decode("utf-8")) + processor_architecture = ( + "arm" if uname.startswith("arm") or uname.startswith("aarch") else "amd" + ) + regex = re.compile( + rf"/jgm/pandoc/releases/download/.*(?:{processor_architecture}|x86|mac)" + r".*\.(?:msi|deb|pkg)" + ) # actual pandoc version - version = pandoc_urls_list[0].split('/')[5] + version = data["tag_name"] # dict that lookup the platform from binary extension ext2platform = { 'msi': 'win32', 'deb': 'linux', 'pkg': 'darwin' - } - # parse pandoc_urls from list to dict - # py26 don't like dict comprehension. Use this one instead when py26 support is dropped - pandoc_urls = {ext2platform[url_frag[-3:]]: (f"https://github.com{url_frag}") for url_frag in pandoc_urls_list} + } + # collect pandoc urls from json content + pandoc_urls = dict() + for asset in data["assets"]: + download_url = asset["browser_download_url"] + if regex.match(urlparse(download_url).path): + pandoc_urls[ext2platform.get(asset["name"][-3:])] = download_url return pandoc_urls, version From 1395ca2bef85f30a74941888dafd383e58750f67 Mon Sep 17 00:00:00 2001 From: ckounter Date: Sat, 26 Oct 2024 20:14:24 +0300 Subject: [PATCH 2/4] Collect pandoc urls from github api Add support for GITHUB_TOKEN --- .github/workflows/ci.yaml | 2 +- pypandoc/pandoc_download.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8a386e2..2bcf028 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -45,7 +45,7 @@ jobs: run: poetry install if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - name: Download pandoc - run: poetry run python setup_binary.py download_pandoc + run: GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} poetry run python setup_binary.py download_pandoc - name: run tests run: poetry run python tests.py diff --git a/pypandoc/pandoc_download.py b/pypandoc/pandoc_download.py index 651744f..b1f124c 100644 --- a/pypandoc/pandoc_download.py +++ b/pypandoc/pandoc_download.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import json -import logging import os import os.path import platform @@ -13,10 +12,10 @@ import urllib try: - from urllib.request import urlopen + from urllib.request import urlopen, Request from urllib.parse import urlparse except ImportError: - from urllib import urlopen + from urllib2 import urlopen, Request from urlparse import urlparse from .handler import logger, _check_log_handler @@ -46,9 +45,17 @@ def _get_pandoc_urls(version="latest"): # url to pandoc download page url = "https://api.github.com/repos/jgm/pandoc/releases/" + \ ("tags/" if version != "latest" else "") + version + github_token = os.getenv("GITHUB_TOKEN") + if github_token: + headers = { + "Authorization": f"Bearer {github_token}" + } + src = Request(url, headers=headers) + else: + src = url # try to open the url try: - response = urlopen(url) + response = urlopen(src) except urllib.error.HTTPError as e: raise RuntimeError("Invalid pandoc version {}.".format(version)) # read json response From a132b75e92413922b7df5e8ecfd812418a212540 Mon Sep 17 00:00:00 2001 From: ckounter Date: Sat, 26 Oct 2024 20:40:38 +0300 Subject: [PATCH 3/4] Collect pandoc urls from github api Update version --- pypandoc/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pypandoc/__init__.py b/pypandoc/__init__.py index 28340b1..f56356d 100644 --- a/pypandoc/__init__.py +++ b/pypandoc/__init__.py @@ -21,7 +21,7 @@ __author_email__ = "bebraw@gmail.com" __maintainer__ = u'Jessica Tegner' __url__ = 'https://github.com/JessicaTegner/pypandoc' -__version__ = '1.14' +__version__ = '1.15' __license__ = 'MIT' __description__ = "Thin wrapper for pandoc." __python_requires__ = ">=3.6" diff --git a/pyproject.toml b/pyproject.toml index 9ec23a6..14368ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pypandoc" -version = "1.14" +version = "1.15" description = "Thin wrapper for pandoc" authors = ["JessicaTegner "] license = "MIT" From 60319f93852c2400ac436cbfe69bc401c42a6d31 Mon Sep 17 00:00:00 2001 From: ckounter Date: Sat, 26 Oct 2024 21:07:25 +0300 Subject: [PATCH 4/4] Collect pandoc urls from github api Fix ci --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2bcf028..677ad0c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -89,6 +89,7 @@ jobs: - name: Build binary Archive uses: pypa/cibuildwheel@v2.16.5 env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} CIBW_BEFORE_ALL: "python3 -m pip install --break-system-packages setuptools && mv setup_binary.py setup.py && python3 setup.py download_pandoc" CIBW_BUILD: cp39-* # Build any 1 python version as this wheel is not version dependent # We skip some variants because: