Skip to content

Commit 16b267d

Browse files
authored
Merge pull request #112 from nexB/purl2sym-github-packages
Add metadata support for packages hosted on GitHub
2 parents b3b2052 + 210eec0 commit 16b267d

File tree

192 files changed

+80834
-311
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

192 files changed

+80834
-311
lines changed

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ install_requires =
5858
packageurl-python
5959
requests
6060
python-dateutil
61+
python-dotenv
6162

6263

6364
[options.packages.find]

src/fetchcode/package.py

Lines changed: 33 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,16 @@
2121
from urllib.parse import urljoin
2222

2323
import htmllistparse
24-
import requests
2524
from packageurl import PackageURL
2625
from packageurl.contrib.route import NoRouteAvailable
2726
from packageurl.contrib.route import Router
2827

29-
from fetchcode.ipkg_release_info import IPKG_RELEASES
28+
from fetchcode.package_util import GITHUB_SOURCE_BY_PACKAGE
29+
from fetchcode.package_util import IPKG_RELEASES
30+
from fetchcode.package_util import GitHubSource
31+
from fetchcode.package_util import MiniupnpPackagesGitHubSource
3032
from fetchcode.packagedcode_models import Package
33+
from fetchcode.utils import get_response
3134

3235
router = Router()
3336

@@ -44,17 +47,6 @@ def info(url):
4447
return
4548

4649

47-
def get_response(url):
48-
"""
49-
Generate `Package` object for a `url` string
50-
"""
51-
resp = requests.get(url)
52-
if resp.status_code == 200:
53-
return resp.json()
54-
55-
raise Exception(f"Failed to fetch: {url}")
56-
57-
5850
def get_pypi_bugtracker_url(project_urls):
5951
bug_tracking_url = project_urls.get("Tracker")
6052
if not (bug_tracking_url):
@@ -216,53 +208,38 @@ def get_pypi_data_from_purl(purl):
216208
@router.route("pkg:github/.*")
217209
def get_github_data_from_purl(purl):
218210
"""
219-
Generate `Package` object from the `purl` string of github type
211+
Yield `Package` object from the `purl` string of github type
220212
"""
221213
purl = PackageURL.from_string(purl)
222214
name = purl.name
223215
namespace = purl.namespace
224-
base_path = "https://api.github.com/repos"
225-
api_url = f"{base_path}/{namespace}/{name}"
226-
response = get_response(api_url)
227-
homepage_url = response.get("homepage")
228-
vcs_url = response.get("git_url")
229-
github_url = "https://github.com"
230-
bug_tracking_url = f"{github_url}/{namespace}/{name}/issues"
231-
code_view_url = f"{github_url}/{namespace}/{name}"
232-
license_data = response.get("license") or {}
233-
declared_license = license_data.get("spdx_id")
234-
primary_language = response.get("language")
235-
yield Package(
236-
homepage_url=homepage_url,
237-
vcs_url=vcs_url,
238-
api_url=api_url,
239-
bug_tracking_url=bug_tracking_url,
240-
code_view_url=code_view_url,
241-
declared_license=declared_license,
242-
primary_language=primary_language,
243-
**purl.to_dict(),
216+
217+
gh_package = f"{namespace}/{name}"
218+
gh_source_class = GITHUB_SOURCE_BY_PACKAGE.get(gh_package, GitHubSource)
219+
220+
return gh_source_class.get_package_info(purl)
221+
222+
223+
@router.route(
224+
"pkg:generic/miniupnpc.*",
225+
"pkg:generic/miniupnpd.*",
226+
"pkg:generic/minissdpd.*",
227+
)
228+
def get_github_data_for_miniupnp(purl):
229+
"""
230+
Yield `Package` object for miniupnp packages from GitHub.
231+
"""
232+
generic_purl = PackageURL.from_string(purl)
233+
github_repo_purl = PackageURL(
234+
type="github",
235+
namespace="miniupnp",
236+
name="miniupnp",
237+
version=generic_purl.version,
238+
)
239+
240+
return MiniupnpPackagesGitHubSource.get_package_info(
241+
gh_purl=github_repo_purl, package_name=generic_purl.name
244242
)
245-
release_url = f"{api_url}/releases"
246-
releases = get_response(release_url)
247-
for release in releases:
248-
version = release.get("name")
249-
version_purl = PackageURL(
250-
type=purl.type, namespace=namespace, name=name, version=version
251-
)
252-
download_url = release.get("tarball_url")
253-
code_view_url = f"{github_url}/{namespace}/{name}/tree/{version}"
254-
version_vcs_url = f"{vcs_url}@{version}"
255-
yield Package(
256-
homepage_url=homepage_url,
257-
vcs_url=version_vcs_url,
258-
api_url=api_url,
259-
bug_tracking_url=bug_tracking_url,
260-
code_view_url=code_view_url,
261-
declared_license=declared_license,
262-
primary_language=primary_language,
263-
download_url=download_url,
264-
**version_purl.to_dict(),
265-
)
266243

267244

268245
@router.route("pkg:bitbucket/.*")
@@ -408,7 +385,7 @@ def get_package_info(cls, package_url):
408385
)
409386

410387
else:
411-
for version, data in archives.items():
388+
for version, data in IPKG_RELEASES.items():
412389
purl = PackageURL(type="generic", name="ipkg", version=version)
413390
yield Package(
414391
homepage_url=cls.source_url,

src/fetchcode/ipkg_release_info.py renamed to src/fetchcode/package_util.py

Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,229 @@
1616

1717
# Since there will be no new releases of ipkg, it's better to
1818
# store them in a dictionary rather than fetching them every time.
19+
20+
import dataclasses
21+
import re
22+
23+
import attr
24+
25+
from fetchcode import utils
26+
from fetchcode.packagedcode_models import Package
27+
28+
29+
def package_from_dict(package_data):
30+
"""
31+
Return a Package built from a `package_data` mapping.
32+
Ignore unknown and unsupported fields.
33+
"""
34+
supported = {attr.name for attr in attr.fields(Package)}
35+
cleaned_package_data = {
36+
key: value for key, value in package_data.items() if key in supported
37+
}
38+
return Package(**cleaned_package_data)
39+
40+
41+
@dataclasses.dataclass
42+
class GitHubSource:
43+
version_regex: re.Pattern = dataclasses.field(
44+
default=None,
45+
metadata={
46+
"help_text": "Regular expression pattern to match and extract version from tag."
47+
},
48+
)
49+
ignored_tag_regex: re.Pattern = dataclasses.field(
50+
default=None,
51+
metadata={"help_text": "Regex to ignore tag."},
52+
)
53+
54+
@classmethod
55+
def get_default_package(cls, purl):
56+
"""Return a Package object populated with default for this data source."""
57+
name = purl.name
58+
namespace = purl.namespace
59+
base_path = "https://api.github.com/repos"
60+
api_url = f"{base_path}/{namespace}/{name}"
61+
response = utils.get_response(api_url)
62+
homepage_url = response.get("homepage")
63+
vcs_url = response.get("git_url")
64+
github_url = "https://github.com"
65+
bug_tracking_url = f"{github_url}/{namespace}/{name}/issues"
66+
code_view_url = f"{github_url}/{namespace}/{name}"
67+
license_data = response.get("license") or {}
68+
declared_license = license_data.get("spdx_id")
69+
primary_language = response.get("language")
70+
return Package(
71+
homepage_url=homepage_url,
72+
vcs_url=vcs_url,
73+
api_url=api_url,
74+
bug_tracking_url=bug_tracking_url,
75+
code_view_url=code_view_url,
76+
declared_license=declared_license,
77+
primary_language=primary_language,
78+
**purl.to_dict(),
79+
)
80+
81+
@classmethod
82+
def get_package_info(cls, package_url):
83+
yield from get_github_packages(
84+
package_url,
85+
cls.version_regex,
86+
cls.ignored_tag_regex,
87+
cls.get_default_package(package_url),
88+
)
89+
90+
91+
def get_github_packages(purl, version_regex, ignored_tag_regex, default_package):
92+
"""
93+
Yield package data from a directory listing for the given source_archive_url.
94+
"""
95+
for package in _get_github_packages(
96+
purl, version_regex, ignored_tag_regex, default_package
97+
):
98+
# Don't yield all packages when a specific version is requested.
99+
if purl.version and package.version != purl.version:
100+
continue
101+
102+
yield package
103+
104+
# If a version is specified in purl and we have found a matching package,
105+
# we don't need to continue searching.
106+
if purl.version:
107+
break
108+
109+
110+
def _get_github_packages(purl, version_regex, ignored_tag_regex, default_package):
111+
"Yield package for GitHub purl"
112+
archive_download_url = (
113+
"https://github.com/{org}/{name}/archive/refs/tags/{tag_name}.tar.gz"
114+
)
115+
116+
package_dict = default_package.to_dict()
117+
for tag, date in utils.fetch_github_tags_gql(purl):
118+
if ignored_tag_regex and ignored_tag_regex.match(tag):
119+
continue
120+
121+
if version_regex:
122+
match = version_regex.match(tag)
123+
if not match:
124+
continue
125+
version = match.group("version")
126+
else:
127+
version = tag
128+
129+
version = version.strip("Vv").strip()
130+
if not version:
131+
continue
132+
133+
download_url = archive_download_url.format(
134+
org=purl.namespace, name=purl.name, tag_name=tag
135+
)
136+
137+
date = date.strftime("%Y-%m-%dT%H:%M:%S")
138+
package_dict.update(
139+
{
140+
"download_url": download_url,
141+
"release_date": date,
142+
"version": version,
143+
}
144+
)
145+
146+
yield package_from_dict(package_dict)
147+
148+
149+
class UBootGitHubSource(GitHubSource):
150+
version_regex = re.compile(r"(?P<version>v\d{4}\.\d{2})(?![\w.-])")
151+
ignored_tag_regex = None
152+
153+
154+
class Genext2fsGitHubSource(GitHubSource):
155+
version_regex = None
156+
ignored_tag_regex = re.compile(r"debian_version\S+upstream_version\S+")
157+
158+
159+
class SquashfsToolsGitHubSource(GitHubSource):
160+
version_regex = re.compile(r"\b[vV]?(?P<version>(?:\d+(\.\d+){1,2}))\b")
161+
ignored_tag_regex = None
162+
163+
164+
class PupnpGitHubSource(GitHubSource):
165+
version_regex = re.compile(r"\brelease-?(?P<version>(?:\d+(\.\d+){1,2}))\b")
166+
ignored_tag_regex = None
167+
168+
169+
class BrotliGitHubSource(GitHubSource):
170+
version_regex = re.compile(r"\b[vV]?(?P<version>(?:\d+(\.\d+){1,2}))\b")
171+
ignored_tag_regex = None
172+
173+
174+
class BpftoolGitHubSource(GitHubSource):
175+
version_regex = re.compile(r"\b[vV]?(?P<version>(?:\d+(\.\d+){1,2}))\b")
176+
ignored_tag_regex = None
177+
178+
179+
class SqliteGitHubSource(GitHubSource):
180+
version_regex = re.compile(r"\bversion-?(?P<version>(?:\d+(\.\d+){1,2}))\b")
181+
ignored_tag_regex = None
182+
183+
184+
class LlvmGitHubSource(GitHubSource):
185+
version_regex = re.compile(r"llvmorg-(?P<version>.+)")
186+
ignored_tag_regex = None
187+
188+
189+
class RpmGitHubSource(GitHubSource):
190+
version_regex = re.compile(r"rpm-(?P<version>[^-]+(?:-(?!release).*)?|-release)")
191+
ignored_tag_regex = None
192+
193+
194+
GITHUB_SOURCE_BY_PACKAGE = {
195+
"avahi/avahi": GitHubSource,
196+
"bestouff/genext2fs": Genext2fsGitHubSource,
197+
"dosfstools/dosfstools": GitHubSource,
198+
"google/brotli": BrotliGitHubSource,
199+
"hewlettpackard/wireless-tools": GitHubSource,
200+
"inotify-tools/inotify-tools": GitHubSource,
201+
"libbpf/bpftool": BpftoolGitHubSource,
202+
"llvm/llvm-project": LlvmGitHubSource,
203+
"nixos/nix": GitHubSource,
204+
"plougher/squashfs-tools": SquashfsToolsGitHubSource,
205+
"pupnp/pupnp": PupnpGitHubSource,
206+
"python/cpython": GitHubSource,
207+
"rpm-software-management/rpm": RpmGitHubSource,
208+
"shadow-maint/shadow": GitHubSource,
209+
"sqlite/sqlite": SqliteGitHubSource,
210+
"u-boot/u-boot": UBootGitHubSource,
211+
}
212+
213+
214+
class MiniupnpPackagesGitHubSource(GitHubSource):
215+
version_regex = None
216+
ignored_tag_regex = None
217+
version_regex_template = r"{}_(?P<version>.+)"
218+
219+
@classmethod
220+
def get_package_info(cls, gh_purl, package_name):
221+
cls.version_regex = re.compile(
222+
cls.version_regex_template.format(re.escape(package_name))
223+
)
224+
225+
packages = get_github_packages(
226+
gh_purl,
227+
cls.version_regex,
228+
cls.ignored_tag_regex,
229+
cls.get_default_package(gh_purl),
230+
)
231+
232+
for package in packages:
233+
package_dict = package.to_dict()
234+
package_dict["namespace"] = None
235+
package_dict["name"] = package_name
236+
package_dict["type"] = "generic"
237+
package_dict["version"] = package_dict["version"].replace("_", ".")
238+
239+
yield package_from_dict(package_dict)
240+
241+
19242
IPKG_RELEASES = {
20243
"0.99.88": {
21244
"url": "https://web.archive.org/web/20090326020239/http:/handhelds.org/download/packages/ipkg/ipkg-0.99.88.tar.gz",

0 commit comments

Comments
 (0)