Skip to content

Commit f056c59

Browse files
authored
Merge pull request #3091 from nexB/3083-no-pkg-info-deps
Prefer using PKG-INFO from .egg-info in assemble #3083
2 parents 2e971c4 + 1539c49 commit f056c59

File tree

71 files changed

+10600
-897
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+10600
-897
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
/src/*.egg-info
1212
*.egg-info
1313
!tests/packagedcode/data/pypi/source-package/pip-22.0.4/src/pip.egg-info
14+
!tests/packagedcode/data/pypi/unpacked_sdist/prefer-egg-info-pkg-info/celery/celery.egg-info
1415
/dist
1516
/build
1617
/bin

CHANGELOG.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,23 @@ Package detection:
1414
- OpenWRT packages.
1515
- Yocto/BitBake .bb recipes.
1616

17+
- Update ``GemfileLockParser`` to track the gem which the Gemfile.lock is for,
18+
which we assign to the new ``GemfileLockParser.primary_gem`` field. Update
19+
``GemfileLockHandler.parse()`` to handle the case where there is a primary gem
20+
detected from a gemfile.lock. If there is a primary gem, a single ``Package``
21+
is created and the detected gem data within the gemfile.lock are assigned as
22+
dependencies. If there is no primary gem, then all of the dependencies are
23+
collected into Package with no name and yielded.
24+
25+
https://github.com/nexB/scancode-toolkit/issues/3072
26+
27+
- Fix issue where dependencies were not reported when scanning an extracted
28+
Python project by modifying ``BaseExtractedPythonLayout.assemble()`` to favor
29+
using package data from a PKG-INFO file from an egg-info directory. Package
30+
data from a PKG-INFO file from an egg-info directory contains the dependency
31+
information collected from the requirements.txt file along side PKG-INFO.
32+
33+
https://github.com/nexB/scancode-toolkit/issues/3083
1734

1835
License detection:
1936
~~~~~~~~~~~~~~~~~~~

src/packagedcode/pypi.py

Lines changed: 60 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import pkginfo2
3030
from commoncode import fileutils
3131
from commoncode.fileutils import as_posixpath
32+
from commoncode.resource import Resource
3233
from packaging.specifiers import SpecifierSet
3334
from packageurl import PackageURL
3435
from packaging import markers
@@ -133,6 +134,16 @@ def create_package_from_package_data(package_data, datafile_path):
133134
return package
134135

135136

137+
def is_egg_info_directory(resource):
138+
"""
139+
Return True if `resource` is a Python .egg-info directory
140+
"""
141+
return (
142+
isinstance(resource, Resource)
143+
and resource.path.endswith('.egg-info')
144+
)
145+
146+
136147
class BaseExtractedPythonLayout(BasePypiHandler):
137148
"""
138149
Base class for development repos, sdist tarballs and other related extracted
@@ -152,7 +163,33 @@ def assemble(cls, package_data, resource, codebase, package_adder):
152163

153164
package_resource = None
154165
if resource.name == 'PKG-INFO':
166+
# Initially use current Resource as `package_resource`.
167+
# We'll want update `package_resource` with the Resource of a
168+
# PKG-INFO file that's in an .egg-info Directory.
155169
package_resource = resource
170+
# We want to use the PKG-INFO file from an .egg-info directory, as
171+
# the package info collected from a *.egg_info/PKG-INFO file has
172+
# dependency information that a PKG-INFO from the root of a Python
173+
# project lacks.
174+
parent_resource = resource.parent(codebase)
175+
if not is_egg_info_directory(parent_resource):
176+
# If we are not in an .egg-info directory, we assume we are at
177+
# the root of a Python codebase and we want to find the
178+
# .egg_info dir
179+
egg_info_dir = None
180+
for sibling in resource.siblings(codebase):
181+
if sibling.path.endswith('.egg-info'):
182+
egg_info_dir = sibling
183+
break
184+
185+
# If we find the .egg_info dir, then we look for the PKG-INFO
186+
# file in it and use that as our package_resource
187+
if egg_info_dir:
188+
for child in egg_info_dir.children(codebase):
189+
if not child.name == 'PKG-INFO':
190+
continue
191+
package_resource = child
192+
break
156193
elif resource.name in datafile_name_patterns:
157194
if resource.has_parent():
158195
siblings = resource.siblings(codebase)
@@ -221,7 +258,14 @@ def assemble(cls, package_data, resource, codebase, package_adder):
221258
package.license_expression = compute_normalized_license(package.declared_license)
222259
package_uid = package.package_uid
223260

224-
root = package_resource.parent(codebase)
261+
package_resource_parent = package_resource.parent(codebase)
262+
if is_egg_info_directory(package_resource_parent):
263+
root = package_resource_parent.parent(codebase)
264+
else:
265+
# We're assuming that our package resource is already at the
266+
# root
267+
root = package_resource_parent
268+
225269
if root:
226270
for py_res in cls.walk_pypi(resource=root, codebase=codebase):
227271
if py_res.is_dir:
@@ -724,14 +768,16 @@ def parse(cls, location):
724768
name="python",
725769
)
726770
resolved_purl = get_resolved_purl(purl=purl, specifiers=SpecifierSet(python_requires_specifier))
727-
dependent_packages.append(models.DependentPackage(
728-
purl=str(resolved_purl.purl),
729-
scope=scope,
730-
is_runtime=True,
731-
is_optional=False,
732-
is_resolved=resolved_purl.is_resolved,
733-
extracted_requirement=f"python_requires{python_requires_specifier}",
734-
))
771+
dependent_packages.append(
772+
models.DependentPackage(
773+
purl=str(resolved_purl.purl),
774+
scope=scope,
775+
is_runtime=True,
776+
is_optional=False,
777+
is_resolved=resolved_purl.is_resolved,
778+
extracted_requirement=f"python_requires{python_requires_specifier}",
779+
)
780+
)
735781

736782
if section.name == "options.extras_require":
737783
for sub_section in section:
@@ -931,11 +977,7 @@ def get_requirements_txt_dependencies(location, include_nested=False):
931977
purl = None
932978

933979
purl = purl and purl.to_string() or None
934-
935-
if req.is_editable:
936-
requirement = req.dumps()
937-
else:
938-
requirement = req.dumps(with_name=False)
980+
requirement = req.dumps()
939981

940982
if location.endswith(
941983
(
@@ -1311,15 +1353,16 @@ def get_requires_dependencies(requires, default_scope='install'):
13111353
is_resolved = True
13121354
purl = purl._replace(version=specifier.version)
13131355

1314-
# we use the extra as scope if avialble
1315-
scope = get_extra(req.marker) or default_scope
1356+
# we use the extra as scope if available
1357+
extra = get_extra(req.marker)
1358+
scope = extra or default_scope
13161359

13171360
dependent_packages.append(
13181361
models.DependentPackage(
13191362
purl=purl.to_string(),
13201363
scope=scope,
13211364
is_runtime=True,
1322-
is_optional=False,
1365+
is_optional=True if bool(extra) else False,
13231366
is_resolved=is_resolved,
13241367
extracted_requirement=str(req),
13251368
))

tests/cluecode/data/plugin_email_url/emails-threshold.expected.json

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--email": true,
8-
"--json": "<file>",
9-
"--max-email": "2",
10-
"--strip-root": true
11-
},
12-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
13-
"output_format_version": "2.0.0",
14-
"message": null,
15-
"errors": [],
16-
"warnings": [],
17-
"extra_data": {
18-
"system_environment": {
19-
"operating_system": "linux",
20-
"cpu_architecture": "64",
21-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
22-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
23-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
24-
},
25-
"spdx_license_list_version": "3.16",
26-
"files_count": 2
27-
}
28-
}
29-
],
302
"files": [
313
{
324
"path": "3w-xxxx.c",

tests/cluecode/data/plugin_email_url/emails.expected.json

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--email": true,
8-
"--json": "<file>",
9-
"--strip-root": true
10-
},
11-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
12-
"output_format_version": "2.0.0",
13-
"message": null,
14-
"errors": [],
15-
"warnings": [],
16-
"extra_data": {
17-
"system_environment": {
18-
"operating_system": "linux",
19-
"cpu_architecture": "64",
20-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
21-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
22-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
23-
},
24-
"spdx_license_list_version": "3.16",
25-
"files_count": 2
26-
}
27-
}
28-
],
292
"files": [
303
{
314
"path": "3w-xxxx.c",

tests/cluecode/data/plugin_email_url/urls-threshold.expected.json

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--json": "<file>",
8-
"--max-url": "2",
9-
"--strip-root": true,
10-
"--url": true
11-
},
12-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
13-
"output_format_version": "2.0.0",
14-
"message": null,
15-
"errors": [],
16-
"warnings": [],
17-
"extra_data": {
18-
"system_environment": {
19-
"operating_system": "linux",
20-
"cpu_architecture": "64",
21-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
22-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
23-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
24-
},
25-
"spdx_license_list_version": "3.16",
26-
"files_count": 2
27-
}
28-
}
29-
],
302
"files": [
313
{
324
"path": "3w-xxxx.c",

tests/cluecode/data/plugin_email_url/urls.expected.json

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--json": "<file>",
8-
"--strip-root": true,
9-
"--url": true
10-
},
11-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
12-
"output_format_version": "2.0.0",
13-
"message": null,
14-
"errors": [],
15-
"warnings": [],
16-
"extra_data": {
17-
"system_environment": {
18-
"operating_system": "linux",
19-
"cpu_architecture": "64",
20-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
21-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
22-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
23-
},
24-
"spdx_license_list_version": "3.16",
25-
"files_count": 2
26-
}
27-
}
28-
],
292
"files": [
303
{
314
"path": "3w-xxxx.c",

tests/cluecode/data/plugin_filter_clues/filtered-expected.json

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--copyright": true,
8-
"--email": true,
9-
"--filter-clues": true,
10-
"--info": true,
11-
"--json": "<file>",
12-
"--license": true,
13-
"--url": true
14-
},
15-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
16-
"output_format_version": "2.0.0",
17-
"message": null,
18-
"errors": [],
19-
"warnings": [],
20-
"extra_data": {
21-
"system_environment": {
22-
"operating_system": "linux",
23-
"cpu_architecture": "64",
24-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
25-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
26-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
27-
},
28-
"spdx_license_list_version": "3.16",
29-
"files_count": 1
30-
}
31-
}
32-
],
332
"files": [
343
{
354
"path": "LICENSE",

tests/cluecode/data/plugin_filter_clues/filtered-expected2.json

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--copyright": true,
8-
"--email": true,
9-
"--filter-clues": true,
10-
"--info": true,
11-
"--json": "<file>",
12-
"--license": true,
13-
"--url": true
14-
},
15-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
16-
"output_format_version": "2.0.0",
17-
"message": null,
18-
"errors": [],
19-
"warnings": [],
20-
"extra_data": {
21-
"system_environment": {
22-
"operating_system": "linux",
23-
"cpu_architecture": "64",
24-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
25-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
26-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
27-
},
28-
"spdx_license_list_version": "3.16",
29-
"files_count": 1
30-
}
31-
}
32-
],
332
"files": [
343
{
354
"path": "LICENSE2",

0 commit comments

Comments
 (0)