Skip to content

Commit d6b68d2

Browse files
committed
Prefer using PKG-INFO from .egg-info in assemble #3083
* Add test for checking that the .egg-info PKG-INFO is the only Package source reported * Update test expectations Signed-off-by: Jono Yang <jyang@nexb.com>
1 parent 2e971c4 commit d6b68d2

File tree

19 files changed

+10158
-183
lines changed

19 files changed

+10158
-183
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
/src/*.egg-info
1212
*.egg-info
1313
!tests/packagedcode/data/pypi/source-package/pip-22.0.4/src/pip.egg-info
14+
!tests/packagedcode/data/pypi/unpacked_sdist/prefer-egg-info-pkg-info/celery/celery.egg-info
1415
/dist
1516
/build
1617
/bin

src/packagedcode/pypi.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import pkginfo2
3030
from commoncode import fileutils
3131
from commoncode.fileutils import as_posixpath
32+
from commoncode.resource import Resource
3233
from packaging.specifiers import SpecifierSet
3334
from packageurl import PackageURL
3435
from packaging import markers
@@ -133,6 +134,16 @@ def create_package_from_package_data(package_data, datafile_path):
133134
return package
134135

135136

137+
def is_egg_info_directory(resource):
138+
"""
139+
Return True if `resource` is a Python .egg-info directory
140+
"""
141+
return (
142+
isinstance(resource, Resource)
143+
and resource.path.endswith('.egg-info')
144+
)
145+
146+
136147
class BaseExtractedPythonLayout(BasePypiHandler):
137148
"""
138149
Base class for development repos, sdist tarballs and other related extracted
@@ -152,7 +163,33 @@ def assemble(cls, package_data, resource, codebase, package_adder):
152163

153164
package_resource = None
154165
if resource.name == 'PKG-INFO':
166+
# Initially use current Resource as `package_resource`.
167+
# We'll want update `package_resource` with the Resource of a
168+
# PKG-INFO file that's in an .egg-info Directory.
155169
package_resource = resource
170+
# We want to use the PKG-INFO file from an .egg-info directory, as
171+
# the package info collected from a *.egg_info/PKG-INFO file has
172+
# dependency information that a PKG-INFO from the root of a Python
173+
# project lacks.
174+
parent_resource = resource.parent(codebase)
175+
if not is_egg_info_directory(parent_resource):
176+
# If we are not in an .egg-info directory, we assume we are at
177+
# the root of a Python codebase and we want to find the
178+
# .egg_info dir
179+
egg_info_dir = None
180+
for sibling in resource.siblings(codebase):
181+
if sibling.path.endswith('.egg-info'):
182+
egg_info_dir = sibling
183+
break
184+
185+
# If we find the .egg_info dir, then we look for the PKG-INFO
186+
# file in it and use that as our package_resource
187+
if egg_info_dir:
188+
for child in egg_info_dir.children(codebase):
189+
if not child.name == 'PKG-INFO':
190+
continue
191+
package_resource = child
192+
break
156193
elif resource.name in datafile_name_patterns:
157194
if resource.has_parent():
158195
siblings = resource.siblings(codebase)
@@ -221,7 +258,14 @@ def assemble(cls, package_data, resource, codebase, package_adder):
221258
package.license_expression = compute_normalized_license(package.declared_license)
222259
package_uid = package.package_uid
223260

224-
root = package_resource.parent(codebase)
261+
package_resource_parent = package_resource.parent(codebase)
262+
if is_egg_info_directory(package_resource_parent):
263+
root = package_resource_parent.parent(codebase)
264+
else:
265+
# We're assuming that our package resource is already at the
266+
# root
267+
root = package_resource_parent
268+
225269
if root:
226270
for py_res in cls.walk_pypi(resource=root, codebase=codebase):
227271
if py_res.is_dir:

tests/packagedcode/data/about/aboutfiles.expected.json

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--json": "<file>",
8-
"--package": true
9-
},
10-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
11-
"output_format_version": "2.0.0",
12-
"message": null,
13-
"errors": [],
14-
"warnings": [],
15-
"extra_data": {
16-
"system_environment": {
17-
"operating_system": "linux",
18-
"cpu_architecture": "64",
19-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
20-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
21-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
22-
},
23-
"spdx_license_list_version": "3.16",
24-
"files_count": 3
25-
}
26-
}
27-
],
282
"dependencies": [],
293
"packages": [
304
{

tests/packagedcode/data/build/bazel/end2end-expected.json

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--json-pp": "<file>",
8-
"--package": true
9-
},
10-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
11-
"output_format_version": "2.0.0",
12-
"message": null,
13-
"errors": [],
14-
"warnings": [],
15-
"extra_data": {
16-
"system_environment": {
17-
"operating_system": "linux",
18-
"cpu_architecture": "64",
19-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
20-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
21-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
22-
},
23-
"spdx_license_list_version": "3.16",
24-
"files_count": 6
25-
}
26-
}
27-
],
282
"dependencies": [],
293
"packages": [
304
{

tests/packagedcode/data/build/buck/end2end-expected.json

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--json-pp": "<file>",
8-
"--package": true
9-
},
10-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
11-
"output_format_version": "2.0.0",
12-
"message": null,
13-
"errors": [],
14-
"warnings": [],
15-
"extra_data": {
16-
"system_environment": {
17-
"operating_system": "linux",
18-
"cpu_architecture": "64",
19-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
20-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
21-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
22-
},
23-
"spdx_license_list_version": "3.16",
24-
"files_count": 7
25-
}
26-
}
27-
],
282
"dependencies": [],
293
"packages": [
304
{

tests/packagedcode/data/cocoapods/assemble/solo/Podfile-expected.json

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--json": "<file>",
8-
"--package": true
9-
},
10-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
11-
"output_format_version": "2.0.0",
12-
"message": null,
13-
"errors": [],
14-
"warnings": [],
15-
"extra_data": {
16-
"system_environment": {
17-
"operating_system": "linux",
18-
"cpu_architecture": "64",
19-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
20-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
21-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
22-
},
23-
"spdx_license_list_version": "3.16",
24-
"files_count": 1
25-
}
26-
}
27-
],
282
"dependencies": [],
293
"packages": [],
304
"files": [

tests/packagedcode/data/cocoapods/assemble/solo/RxDataSources.podspec-expected.json

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--json": "<file>",
8-
"--package": true
9-
},
10-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
11-
"output_format_version": "2.0.0",
12-
"message": null,
13-
"errors": [],
14-
"warnings": [],
15-
"extra_data": {
16-
"system_environment": {
17-
"operating_system": "linux",
18-
"cpu_architecture": "64",
19-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
20-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
21-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
22-
},
23-
"spdx_license_list_version": "3.16",
24-
"files_count": 1
25-
}
26-
}
27-
],
282
"dependencies": [],
293
"packages": [
304
{

tests/packagedcode/data/npm/get_package_resources.scan.expected.json

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,4 @@
11
{
2-
"headers": [
3-
{
4-
"tool_name": "scancode-toolkit",
5-
"options": {
6-
"input": "<path>",
7-
"--json": "<file>",
8-
"--package": true
9-
},
10-
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
11-
"output_format_version": "2.0.0",
12-
"message": null,
13-
"errors": [],
14-
"warnings": [],
15-
"extra_data": {
16-
"system_environment": {
17-
"operating_system": "linux",
18-
"cpu_architecture": "64",
19-
"platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic",
20-
"platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022",
21-
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
22-
},
23-
"spdx_license_list_version": "3.16",
24-
"files_count": 3
25-
}
26-
}
27-
],
282
"dependencies": [],
293
"packages": [
304
{

0 commit comments

Comments
 (0)