Skip to content

Commit 6a3c5b0

Browse files
committed
Update thirdparty fetching utilities
These were buggy in some corner cases. They have been updated such that: * --latest-version works. * we can reliable fetch combinations of wheels and sdists for multiple OS combos at once * we now support macOS universal wheels (for ARM CPUs) Caching is now simpler: we have essentially a single file-based cache under .cache. PyPI indexes are fetched and not cached, unless the new --use-cached-index is used which can be useful when fetching many thirdparty in a short timeframe. The first PyPI repository in a list has precendence and we never fetch from other repositories if we find wheels and sdsists there. This avoid pounding too much on the self-hosted repo. Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
1 parent 5d48c1c commit 6a3c5b0

File tree

6 files changed

+480
-659
lines changed

6 files changed

+480
-659
lines changed

etc/scripts/check_thirdparty.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
@click.command()
1717
@click.option(
1818
"-d",
19-
"--dest_dir",
19+
"--dest",
2020
type=click.Path(exists=True, readable=True, path_type=str, file_okay=False),
2121
required=True,
2222
help="Path to the thirdparty directory to check.",
@@ -35,7 +35,7 @@
3535
)
3636
@click.help_option("-h", "--help")
3737
def check_thirdparty_dir(
38-
dest_dir,
38+
dest,
3939
wheels,
4040
sdists,
4141
):
@@ -45,7 +45,7 @@ def check_thirdparty_dir(
4545
# check for problems
4646
print(f"==> CHECK FOR PROBLEMS")
4747
utils_thirdparty.find_problems(
48-
dest_dir=dest_dir,
48+
dest_dir=dest,
4949
report_missing_sources=sdists,
5050
report_missing_wheels=wheels,
5151
)

etc/scripts/fetch_thirdparty.py

Lines changed: 78 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,17 @@
100100
"index_urls",
101101
type=str,
102102
metavar="INDEX",
103-
default=utils_thirdparty.PYPI_INDEXES,
103+
default=utils_thirdparty.PYPI_INDEX_URLS,
104104
show_default=True,
105105
multiple=True,
106106
help="PyPI index URL(s) to use for wheels and sources, in order of preferences.",
107107
)
108+
@click.option(
109+
"--use-cached-index",
110+
is_flag=True,
111+
help="Use on disk cached PyPI indexes list of packages and versions and do not refetch if present.",
112+
)
113+
108114
@click.help_option("-h", "--help")
109115
def fetch_thirdparty(
110116
requirements_files,
@@ -116,26 +122,34 @@ def fetch_thirdparty(
116122
wheels,
117123
sdists,
118124
index_urls,
125+
use_cached_index,
119126
):
120127
"""
121-
Download to --dest-dir THIRDPARTY_DIR the PyPI wheels, source distributions,
128+
Download to --dest THIRDPARTY_DIR the PyPI wheels, source distributions,
122129
and their ABOUT metadata, license and notices files.
123130
124131
Download the PyPI packages listed in the combination of:
125132
- the pip requirements --requirements REQUIREMENT-FILE(s),
126133
- the pip name==version --specifier SPECIFIER(s)
127134
- any pre-existing wheels or sdsists found in --dest-dir THIRDPARTY_DIR.
128135
129-
Download wheels with the --wheels option for the ``--python-version`` PYVER(s)
130-
and ``--operating_system`` OS(s) combinations defaulting to all supported combinations.
136+
Download wheels with the --wheels option for the ``--python-version``
137+
PYVER(s) and ``--operating_system`` OS(s) combinations defaulting to all
138+
supported combinations.
131139
132140
Download sdists tarballs with the --sdists option.
133141
134-
Generate or Download .ABOUT, .LICENSE and .NOTICE files for all the wheels and sources fetched.
142+
Generate or Download .ABOUT, .LICENSE and .NOTICE files for all the wheels
143+
and sources fetched.
135144
136-
Download wheels and sdists the provided PyPI simple --index-url INDEX(s) URLs.
145+
Download from the provided PyPI simple --index-url INDEX(s) URLs.
137146
"""
147+
if not (wheels or sdists):
148+
print("Error: one or both of --wheels and --sdists is required.")
149+
sys.exit(1)
150+
138151
print(f"COLLECTING REQUIRED NAMES & VERSIONS FROM {dest_dir}")
152+
139153
existing_packages_by_nv = {
140154
(package.name, package.version): package
141155
for package in utils_thirdparty.get_local_packages(directory=dest_dir)
@@ -151,134 +165,88 @@ def fetch_thirdparty(
151165
required_name_versions.update(nvs)
152166

153167
for specifier in specifiers:
154-
nv = utils_requirements.get_name_version(
168+
nv = utils_requirements.get_required_name_version(
155169
requirement=specifier,
156170
with_unpinned=latest_version,
157171
)
158172
required_name_versions.add(nv)
159173

174+
if latest_version:
175+
names = set(name for name, _version in sorted(required_name_versions))
176+
required_name_versions = {(n, None) for n in names}
177+
160178
if not required_name_versions:
161179
print("Error: no requirements requested.")
162180
sys.exit(1)
163181

164-
if not os.listdir(dest_dir) and not (wheels or sdists):
165-
print("Error: one or both of --wheels and --sdists is required.")
166-
sys.exit(1)
167-
168-
if latest_version:
169-
latest_name_versions = set()
170-
names = set(name for name, _version in sorted(required_name_versions))
171-
for name in sorted(names):
172-
latests = utils_thirdparty.PypiPackage.sorted(
173-
utils_thirdparty.get_package_versions(
174-
name=name, version=None, index_urls=index_urls
175-
)
176-
)
177-
if not latests:
178-
print(f"No distribution found for: {name}")
179-
continue
180-
latest = latests[-1]
181-
latest_name_versions.add((latest.name, latest.version))
182-
required_name_versions = latest_name_versions
183-
184-
if TRACE:
185-
print("required_name_versions:", required_name_versions)
182+
if TRACE_DEEP:
183+
print("required_name_versions:")
184+
for n, v in required_name_versions:
185+
print(f" {n} @ {v}")
186186

187+
# create the environments matrix we need for wheels
188+
environments = None
187189
if wheels:
188-
# create the environments matrix we need for wheels
189190
evts = itertools.product(python_versions, operating_systems)
190191
environments = [utils_thirdparty.Environment.from_pyver_and_os(pyv, os) for pyv, os in evts]
191192

192-
wheels_not_found = {}
193-
sdists_not_found = {}
194-
# iterate over requirements, one at a time
193+
# Collect PyPI repos
194+
repos = []
195+
for index_url in index_urls:
196+
index_url = index_url.strip("/")
197+
existing = utils_thirdparty.DEFAULT_PYPI_REPOS_BY_URL.get(index_url)
198+
if existing:
199+
existing.use_cached_index = use_cached_index
200+
repos.append(existing)
201+
else:
202+
repo = utils_thirdparty.PypiSimpleRepository(
203+
index_url=index_url,
204+
use_cached_index=use_cached_index,
205+
)
206+
repos.append(repo)
207+
208+
wheels_fetched = []
209+
wheels_not_found = []
210+
211+
sdists_fetched = []
212+
sdists_not_found = []
213+
195214
for name, version in sorted(required_name_versions):
196215
nv = name, version
197-
existing_package = existing_packages_by_nv.get(nv)
216+
print(f"Processing: {name} @ {version}")
198217
if wheels:
199218
for environment in environments:
200-
if existing_package:
201-
existing_wheels = list(
202-
existing_package.get_supported_wheels(environment=environment)
203-
)
204-
else:
205-
existing_wheels = None
206-
207-
if existing_wheels:
208-
if TRACE_DEEP:
209-
print(
210-
f"====> Wheels already available: {name}=={version} on: {environment}: {existing_package.wheels!r}"
211-
)
212-
if all(w.is_pure() for w in existing_wheels):
213-
break
214-
else:
215-
continue
216-
217-
if TRACE_DEEP:
218-
print(f"Fetching wheel for: {name}=={version} on: {environment}")
219-
220-
try:
221-
(
222-
fetched_wheel_filenames,
223-
existing_wheel_filenames,
224-
) = utils_thirdparty.download_wheel(
225-
name=name,
226-
version=version,
227-
environment=environment,
228-
dest_dir=dest_dir,
229-
index_urls=index_urls,
230-
)
231-
if TRACE:
232-
if existing_wheel_filenames:
233-
print(
234-
f" ====> Wheels already available: {name}=={version} on: {environment}"
235-
)
236-
for whl in existing_wheel_filenames:
237-
print(f" {whl}")
238-
if fetched_wheel_filenames:
239-
print(f" ====> Wheels fetched: {name}=={version} on: {environment}")
240-
for whl in fetched_wheel_filenames:
241-
print(f" {whl}")
242-
243-
fwfns = fetched_wheel_filenames + existing_wheel_filenames
244-
245-
if all(utils_thirdparty.Wheel.from_filename(f).is_pure() for f in fwfns):
246-
break
247-
248-
except utils_thirdparty.DistributionNotFound as e:
249-
wheels_not_found[f"{name}=={version}"] = str(e)
250-
251-
if sdists:
252-
if existing_package and existing_package.sdist:
253219
if TRACE:
254-
print(
255-
f" ====> Sdist already available: {name}=={version}: {existing_package.sdist!r}"
256-
)
257-
continue
258-
259-
if TRACE:
260-
print(f" Fetching sdist for: {name}=={version}")
261-
262-
try:
263-
fetched = utils_thirdparty.download_sdist(
220+
print(f" ==> Fetching wheel for envt: {environment}")
221+
fwfns = utils_thirdparty.download_wheel(
264222
name=name,
265223
version=version,
224+
environment=environment,
266225
dest_dir=dest_dir,
267-
index_urls=index_urls,
226+
repos=repos,
268227
)
228+
if fwfns:
229+
wheels_fetched.extend(fwfns)
230+
else:
231+
wheels_not_found.append(f"{name}=={version} for: {environment}")
232+
if TRACE:
233+
print(f" NOT FOUND")
269234

235+
if sdists:
236+
if TRACE:
237+
print(f" ==> Fetching sdist: {name}=={version}")
238+
fetched = utils_thirdparty.download_sdist(
239+
name=name,
240+
version=version,
241+
dest_dir=dest_dir,
242+
repos=repos,
243+
)
244+
if fetched:
245+
sdists_fetched.append(fetched)
246+
else:
247+
sdists_not_found.append(f"{name}=={version}")
270248
if TRACE:
271-
if not fetched:
272-
print(
273-
f" ====> Sdist already available: {name}=={version}"
274-
)
275-
else:
276-
print(
277-
f" ====> Sdist fetched: {fetched} for {name}=={version}"
278-
)
279-
280-
except utils_thirdparty.DistributionNotFound as e:
281-
sdists_not_found[f"{name}=={version}"] = str(e)
249+
print(f" NOT FOUND")
282250

283251
if wheels and wheels_not_found:
284252
print(f"==> MISSING WHEELS")
@@ -291,7 +259,7 @@ def fetch_thirdparty(
291259
print(f" {sd}")
292260

293261
print(f"==> FETCHING OR CREATING ABOUT AND LICENSE FILES")
294-
utils_thirdparty.fetch_abouts_and_licenses(dest_dir=dest_dir)
262+
utils_thirdparty.fetch_abouts_and_licenses(dest_dir=dest_dir, use_cached_index=use_cached_index)
295263
utils_thirdparty.clean_about_files(dest_dir=dest_dir)
296264

297265
# check for problems

etc/scripts/gen_pypi_simple.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,26 +25,26 @@ class InvalidDistributionFilename(Exception):
2525

2626
def get_package_name_from_filename(filename):
2727
"""
28-
Return the package name extracted from a package ``filename``.
29-
Optionally ``normalize`` the name according to distribution name rules.
28+
Return the normalized package name extracted from a package ``filename``.
29+
Normalization is done according to distribution name rules.
3030
Raise an ``InvalidDistributionFilename`` if the ``filename`` is invalid::
3131
3232
>>> get_package_name_from_filename("foo-1.2.3_rc1.tar.gz")
3333
'foo'
34-
>>> get_package_name_from_filename("foo-bar-1.2-py27-none-any.whl")
34+
>>> get_package_name_from_filename("foo_bar-1.2-py27-none-any.whl")
3535
'foo-bar'
3636
>>> get_package_name_from_filename("Cython-0.17.2-cp26-none-linux_x86_64.whl")
3737
'cython'
3838
>>> get_package_name_from_filename("python_ldap-2.4.19-cp27-none-macosx_10_10_x86_64.whl")
3939
'python-ldap'
40-
>>> get_package_name_from_filename("foo.whl")
41-
Traceback (most recent call last):
42-
...
43-
InvalidDistributionFilename: ...
44-
>>> get_package_name_from_filename("foo.png")
45-
Traceback (most recent call last):
46-
...
47-
InvalidFilePackageName: ...
40+
>>> try:
41+
... get_package_name_from_filename("foo.whl")
42+
... except InvalidDistributionFilename:
43+
... pass
44+
>>> try:
45+
... get_package_name_from_filename("foo.png")
46+
... except InvalidDistributionFilename:
47+
... pass
4848
"""
4949
if not filename or not filename.endswith(dist_exts):
5050
raise InvalidDistributionFilename(filename)

etc/scripts/requirements.txt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
aboutcode_toolkit
2-
github-release-retry2
32
attrs
43
commoncode
54
click
65
requests
76
saneyaml
8-
romp
97
pip
108
setuptools
119
twine
12-
wheel
10+
wheel
11+
build

etc/scripts/utils_requirements.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
# See https://github.com/nexB/skeleton for support or download.
99
# See https://aboutcode.org for more information about nexB OSS projects.
1010
#
11-
import os
1211
import re
1312
import subprocess
1413

@@ -42,23 +41,23 @@ def get_required_name_versions(requirement_lines, with_unpinned=False):
4241
if req_line.startswith("-") or (not with_unpinned and not "==" in req_line):
4342
print(f"Requirement line is not supported: ignored: {req_line}")
4443
continue
45-
yield get_name_version(requirement=req_line, with_unpinned=with_unpinned)
44+
yield get_required_name_version(requirement=req_line, with_unpinned=with_unpinned)
4645

4746

48-
def get_name_version(requirement, with_unpinned=False):
47+
def get_required_name_version(requirement, with_unpinned=False):
4948
"""
5049
Return a (name, version) tuple given a`requirement` specifier string.
5150
Requirement version must be pinned. If ``with_unpinned`` is True, unpinned
5251
requirements are accepted and only the name portion is returned.
5352
5453
For example:
55-
>>> assert get_name_version("foo==1.2.3") == ("foo", "1.2.3")
56-
>>> assert get_name_version("fooA==1.2.3.DEV1") == ("fooa", "1.2.3.dev1")
57-
>>> assert get_name_version("foo==1.2.3", with_unpinned=False) == ("foo", "1.2.3")
58-
>>> assert get_name_version("foo", with_unpinned=True) == ("foo", "")
59-
>>> assert get_name_version("foo>=1.2", with_unpinned=True) == ("foo", ""), get_name_version("foo>=1.2")
54+
>>> assert get_required_name_version("foo==1.2.3") == ("foo", "1.2.3")
55+
>>> assert get_required_name_version("fooA==1.2.3.DEV1") == ("fooa", "1.2.3.dev1")
56+
>>> assert get_required_name_version("foo==1.2.3", with_unpinned=False) == ("foo", "1.2.3")
57+
>>> assert get_required_name_version("foo", with_unpinned=True) == ("foo", "")
58+
>>> assert get_required_name_version("foo>=1.2", with_unpinned=True) == ("foo", ""), get_required_name_version("foo>=1.2")
6059
>>> try:
61-
... assert not get_name_version("foo", with_unpinned=False)
60+
... assert not get_required_name_version("foo", with_unpinned=False)
6261
... except Exception as e:
6362
... assert "Requirement version must be pinned" in str(e)
6463
"""
@@ -112,7 +111,7 @@ def get_installed_reqs(site_packages_dir):
112111
as a text.
113112
"""
114113
if not os.path.exists(site_packages_dir):
115-
raise Exception(f"site_packages directort: {site_packages_dir!r} does not exists")
114+
raise Exception(f"site_packages directory: {site_packages_dir!r} does not exists")
116115
# Also include these packages in the output with --all: wheel, distribute,
117116
# setuptools, pip
118117
args = ["pip", "freeze", "--exclude-editable", "--all", "--path", site_packages_dir]

0 commit comments

Comments
 (0)