Skip to content

Commit 0718f51

Browse files
committed
Some cleanups; replace final pypi data retrieval using asyncio.
Signed-off-by: Thomas Neidhart <thomas.neidhart@gmail.com>
1 parent 31990dd commit 0718f51

File tree

6 files changed

+92
-73
lines changed

6 files changed

+92
-73
lines changed

requirements.txt

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
attrs==22.1.0
2-
beautifulsoup4==4.11.1
3-
certifi==2022.6.15
4-
charset-normalizer==2.1.0
5-
click==8.1.3
6-
colorama==0.4.5
7-
commoncode==30.2.0
1+
attrs==23.1.0
2+
beautifulsoup4==4.12.2
3+
certifi==2023.11.17
4+
charset-normalizer==3.3.2
5+
click==8.1.7
6+
colorama==0.4.6
7+
commoncode==31.0.3
88
dparse2==0.7.0
99
idna==3.3
1010
importlib-metadata==4.12.0
@@ -14,13 +14,14 @@ packaging==21.3
1414
packvers==21.5
1515
pip-requirements-parser==32.0.1
1616
pkginfo2==30.0.0
17-
pyparsing==3.0.9
18-
PyYAML==6.0
19-
requests==2.28.1
17+
pyparsing==3.1.1
18+
PyYAML==6.0.1
19+
requests==2.31.0
2020
resolvelib >= 1.0.0
21-
saneyaml==0.5.2
22-
soupsieve==2.3.2.post1
21+
saneyaml==0.6.0
22+
soupsieve==2.5
2323
text-unidecode==1.3
2424
toml==0.10.2
25-
urllib3==1.26.11
26-
zipp==3.8.1
25+
urllib3==2.1.0
26+
zipp==3.17.0
27+
aiohttp==3.9.1

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ install_requires =
6969
toml >= 0.10.0
7070
mock >= 3.0.5
7171
packvers >= 21.5
72+
aiohttp >= 3.9
73+
7274
[options.packages.find]
7375
where = src
7476

src/python_inspector/api.py

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
# See https://aboutcode-orgnexB/python-inspector for support or download.
99
# See https://aboutcode.org for more information about nexB OSS projects.
1010
#
11-
11+
import asyncio
1212
import os
1313
from netrc import netrc
1414
from typing import Dict
@@ -26,7 +26,6 @@
2626
from _packagedcode.pypi import PipRequirementsFileHandler
2727
from _packagedcode.pypi import PythonSetupPyHandler
2828
from _packagedcode.pypi import can_process_dependent_package
29-
from python_inspector import DEFAULT_PYTHON_VERSION
3029
from python_inspector import dependencies
3130
from python_inspector import utils
3231
from python_inspector import utils_pypi
@@ -231,7 +230,7 @@ def resolve_dependencies(
231230
if not direct_dependencies:
232231
return Resolution(
233232
packages=[],
234-
resolution=[],
233+
resolution={},
235234
files=files,
236235
)
237236

@@ -288,19 +287,21 @@ def resolve_dependencies(
288287
ignore_errors=ignore_errors,
289288
)
290289

291-
packages = []
290+
async def gather_pypi_data():
291+
async def get_pypi_data(package):
292+
if verbose:
293+
printer(f" package '{package}'")
292294

293-
for package in purls:
294-
packages.extend(
295-
[
296-
pkg.to_dict()
297-
for pkg in list(
298-
get_pypi_data_from_purl(
299-
package, repos=repos, environment=environment, prefer_source=prefer_source
300-
)
301-
)
302-
],
303-
)
295+
return await get_pypi_data_from_purl(
296+
package, repos=repos, environment=environment, prefer_source=prefer_source
297+
)
298+
299+
if verbose:
300+
printer(f"retrieve data from pypi:")
301+
302+
return await asyncio.gather(*[get_pypi_data(package) for package in purls])
303+
304+
packages = [pkg.to_dict() for pkg in asyncio.run(gather_pypi_data()) if pkg is not None]
304305

305306
if verbose:
306307
printer("done!")
@@ -316,14 +317,14 @@ def resolve_dependencies(
316317

317318

318319
def resolve(
319-
direct_dependencies,
320-
environment,
321-
repos=tuple(),
322-
as_tree=False,
323-
max_rounds=200000,
324-
pdt_output=False,
325-
analyze_setup_py_insecurely=False,
326-
ignore_errors=False,
320+
direct_dependencies: List[DependentPackage],
321+
environment: Environment,
322+
repos: Sequence[utils_pypi.PypiSimpleRepository] = tuple(),
323+
as_tree: bool = False,
324+
max_rounds: int = 200000,
325+
pdt_output: bool = False,
326+
analyze_setup_py_insecurely: bool = False,
327+
ignore_errors: bool = False,
327328
):
328329
"""
329330
Resolve dependencies given a ``direct_dependencies`` list of

src/python_inspector/package_data.py

Lines changed: 35 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# See https://aboutcode.org for more information about nexB OSS projects.
1010
#
1111

12-
from typing import List
12+
from typing import List, Iterable, Optional
1313

1414
from packageurl import PackageURL
1515

@@ -24,9 +24,9 @@
2424
from python_inspector.utils_pypi import PypiSimpleRepository
2525

2626

27-
def get_pypi_data_from_purl(
27+
async def get_pypi_data_from_purl(
2828
purl: str, environment: Environment, repos: List[PypiSimpleRepository], prefer_source: bool
29-
) -> PackageData:
29+
) -> Optional[PackageData]:
3030
"""
3131
Generate `Package` object from the `purl` string of pypi type
3232
@@ -36,18 +36,19 @@ def get_pypi_data_from_purl(
3636
``prefer_source`` is a boolean value to prefer source distribution over wheel,
3737
if no source distribution is available then wheel is used
3838
"""
39-
purl = PackageURL.from_string(purl)
40-
name = purl.name
41-
version = purl.version
39+
parsed_purl = PackageURL.from_string(purl)
40+
name = parsed_purl.name
41+
version = parsed_purl.version
4242
if not version:
4343
raise Exception("Version is not specified in the purl")
4444
base_path = "https://pypi.org/pypi"
4545
api_url = f"{base_path}/{name}/{version}/json"
46-
from python_inspector.resolution import get_response
4746

48-
response = get_response(api_url)
47+
from python_inspector.utils import get_response_async
48+
response = await get_response_async(api_url)
4949
if not response:
50-
return []
50+
return None
51+
5152
info = response.get("info") or {}
5253
homepage_url = info.get("home_page")
5354
project_urls = info.get("project_urls") or {}
@@ -56,13 +57,9 @@ def get_pypi_data_from_purl(
5657
python_version = get_python_version_from_env_tag(python_version=environment.python_version)
5758
valid_distribution_urls = []
5859

59-
valid_distribution_urls.append(
60-
get_sdist_download_url(
61-
purl=purl,
62-
repos=repos,
63-
python_version=python_version,
64-
)
65-
)
60+
sdist_url = get_sdist_download_url(purl=parsed_purl, repos=repos, python_version=python_version)
61+
if sdist_url:
62+
valid_distribution_urls.append(sdist_url)
6663

6764
valid_distribution_urls = [url for url in valid_distribution_urls if url]
6865

@@ -71,24 +68,27 @@ def get_pypi_data_from_purl(
7168
if not valid_distribution_urls or not prefer_source:
7269
wheel_urls = list(
7370
get_wheel_download_urls(
74-
purl=purl,
71+
purl=parsed_purl,
7572
repos=repos,
7673
environment=environment,
7774
python_version=python_version,
7875
)
7976
)
8077
wheel_url = choose_single_wheel(wheel_urls)
8178
if wheel_url:
82-
valid_distribution_urls.append(wheel_url)
79+
valid_distribution_urls.insert(0, wheel_url)
8380

84-
urls = response.get("urls") or []
85-
for url in urls:
86-
dist_url = url.get("url")
87-
if dist_url not in valid_distribution_urls:
81+
urls = {url.get("url"): url for url in response.get("urls", [])}
82+
# iterate over the valid distribution urls and return the first
83+
# one that is matching.
84+
for dist_url in valid_distribution_urls:
85+
if dist_url not in urls:
8886
continue
89-
digests = url.get("digests") or {}
9087

91-
yield PackageData(
88+
url_data = urls.get(dist_url)
89+
digests = url_data.get("digests", {})
90+
91+
return PackageData(
9292
primary_language="Python",
9393
description=get_description(info),
9494
homepage_url=homepage_url,
@@ -98,10 +98,10 @@ def get_pypi_data_from_purl(
9898
license_expression=info.get("license_expression"),
9999
declared_license=get_declared_license(info),
100100
download_url=dist_url,
101-
size=url.get("size"),
102-
md5=digests.get("md5") or url.get("md5_digest"),
101+
size=url_data.get("size"),
102+
md5=digests.get("md5") or url_data.get("md5_digest"),
103103
sha256=digests.get("sha256"),
104-
release_date=url.get("upload_time"),
104+
release_date=url_data.get("upload_time"),
105105
keywords=get_keywords(info),
106106
parties=get_parties(
107107
info,
@@ -110,9 +110,11 @@ def get_pypi_data_from_purl(
110110
maintainer_key="maintainer",
111111
maintainer_email_key="maintainer_email",
112112
),
113-
**purl.to_dict(),
113+
**parsed_purl.to_dict(),
114114
)
115115

116+
return None
117+
116118

117119
def choose_single_wheel(wheel_urls):
118120
"""
@@ -125,18 +127,18 @@ def choose_single_wheel(wheel_urls):
125127

126128
def get_pypi_bugtracker_url(project_urls):
127129
bug_tracking_url = project_urls.get("Tracker")
128-
if not (bug_tracking_url):
130+
if not bug_tracking_url:
129131
bug_tracking_url = project_urls.get("Issue Tracker")
130-
if not (bug_tracking_url):
132+
if not bug_tracking_url:
131133
bug_tracking_url = project_urls.get("Bug Tracker")
132134
return bug_tracking_url
133135

134136

135137
def get_pypi_codeview_url(project_urls):
136138
code_view_url = project_urls.get("Source")
137-
if not (code_view_url):
139+
if not code_view_url:
138140
code_view_url = project_urls.get("Code")
139-
if not (code_view_url):
141+
if not code_view_url:
140142
code_view_url = project_urls.get("Source Code")
141143
return code_view_url
142144

@@ -146,7 +148,7 @@ def get_wheel_download_urls(
146148
repos: List[PypiSimpleRepository],
147149
environment: Environment,
148150
python_version: str,
149-
) -> List[str]:
151+
) -> Iterable[str]:
150152
"""
151153
Return a list of download urls for the given purl.
152154
"""

src/python_inspector/resolution.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import ast
1111
import operator
1212
import os
13-
import re
1413
import tarfile
1514
from typing import Dict
1615
from typing import Generator

src/python_inspector/utils.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@
1111

1212
import json
1313
import os
14-
from typing import Dict
14+
from typing import Dict, Optional
1515
from typing import List
1616
from typing import NamedTuple
1717

18+
import aiohttp
1819
import requests
1920

2021

@@ -67,13 +68,26 @@ class Candidate(NamedTuple):
6768
def get_response(url: str) -> Dict:
6869
"""
6970
Return a mapping of the JSON response from fetching ``url``
70-
or None if the ``url`` cannot be fetched..
71+
or None if the ``url`` cannot be fetched.
7172
"""
7273
resp = requests.get(url)
7374
if resp.status_code == 200:
7475
return resp.json()
7576

7677

78+
async def get_response_async(url: str) -> Optional[Dict]:
79+
"""
80+
Return a mapping of the JSON response from fetching ``url``
81+
or None if the ``url`` cannot be fetched.
82+
"""
83+
async with aiohttp.ClientSession() as session:
84+
async with session.get(url) as response:
85+
if response.status == 200:
86+
return await response.json()
87+
else:
88+
return None
89+
90+
7791
def remove_test_data_dir_variable_prefix(path, placeholder="<file>"):
7892
"""
7993
Return a clean path, removing variable test path prefix or using a ``placeholder``.

0 commit comments

Comments
 (0)