Skip to content

Commit 44b2734

Browse files
Initial implementation of --package-only
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 66d7166 commit 44b2734

File tree

7 files changed

+180
-20
lines changed

7 files changed

+180
-20
lines changed

src/packagedcode/bower.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class BowerJsonHandler(models.DatafileHandler):
2525
documentation_url = 'https://bower.io'
2626

2727
@classmethod
28-
def parse(cls, location):
28+
def parse(cls, location, package_only=False):
2929
with io.open(location, encoding='utf-8') as loc:
3030
package_data = json.load(loc)
3131

@@ -87,7 +87,12 @@ def parse(cls, location):
8787
)
8888
)
8989

90-
yield models.PackageData(
90+
if package_only:
91+
package_klass = models.PackageDataOnly
92+
else:
93+
package_klass = models.PackageData
94+
95+
yield package_klass(
9196
datasource_id=cls.datasource_id,
9297
type=cls.default_package_type,
9398
name=name,
@@ -98,5 +103,5 @@ def parse(cls, location):
98103
parties=parties,
99104
homepage_url=homepage_url,
100105
vcs_url=vcs_url,
101-
dependencies=dependencies
106+
dependencies=dependencies,
102107
)

src/packagedcode/models.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,17 @@ def get_license_detections_and_expression(self):
905905
)
906906

907907

908+
class PackageDataOnly(PackageData):
909+
"""
910+
PackageData class which skips the license/copyright detection during instance
911+
creation.
912+
"""
913+
914+
def __attrs_post_init__(self):
915+
if self.extracted_license_statement and not isinstance(self.extracted_license_statement, str):
916+
self.extracted_license_statement = saneyaml.dump(self.extracted_license_statement)
917+
918+
908919
def get_default_relation_license(datasource_id):
909920
from packagedcode import HANDLER_BY_DATASOURCE_ID
910921
handler = HANDLER_BY_DATASOURCE_ID[datasource_id]

src/packagedcode/plugin_package.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ def get_available_package_parsers(docs=False):
112112
return all_data_packages
113113

114114

115-
116115
@scan_impl
117116
class PackageScanner(ScanPlugin):
118117
"""
@@ -161,7 +160,19 @@ class PackageScanner(ScanPlugin):
161160
help_group=SCAN_GROUP,
162161
sort_order=21,
163162
),
164-
163+
PluggableCommandLineOption(
164+
(
165+
'--package-only',
166+
),
167+
is_flag=True,
168+
default=False,
169+
help=(
170+
'Only detect package information and skip license/copyright detection steps, '
171+
'in application package and dependency manifests, lockfiles and related data.'
172+
),
173+
help_group=SCAN_GROUP,
174+
sort_order=22,
175+
),
165176
PluggableCommandLineOption(
166177
('--list-packages',),
167178
is_flag=True,
@@ -172,10 +183,10 @@ class PackageScanner(ScanPlugin):
172183
),
173184
]
174185

175-
def is_enabled(self, package, system_package, **kwargs):
176-
return package or system_package
186+
def is_enabled(self, package, system_package, package_only, **kwargs):
187+
return package or system_package or package_only
177188

178-
def get_scanner(self, package=True, system_package=False, **kwargs):
189+
def get_scanner(self, package=True, system_package=False, package_only=False, **kwargs):
179190
"""
180191
Return a scanner callable to scan a file for package data.
181192
"""
@@ -185,9 +196,10 @@ def get_scanner(self, package=True, system_package=False, **kwargs):
185196
get_package_data,
186197
application=package,
187198
system=system_package,
199+
package_only=package_only,
188200
)
189201

190-
def process_codebase(self, codebase, strip_root=False, **kwargs):
202+
def process_codebase(self, codebase, strip_root=False, package_only=False, **kwargs):
191203
"""
192204
Populate the ``codebase`` top level ``packages`` and ``dependencies``
193205
with package and dependency instances, assembling parsed package data
@@ -196,6 +208,11 @@ def process_codebase(self, codebase, strip_root=False, **kwargs):
196208
Also perform additional package license detection that depends on either
197209
file license detection or the package detections.
198210
"""
211+
# If we only want purls, we want to skip both the package
212+
# assembly and the extra package license detection steps
213+
if package_only:
214+
return
215+
199216
has_licenses = hasattr(codebase.root, 'license_detections')
200217

201218
# These steps add proper license detections to package_data and hence

src/packagedcode/recognize.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def recognize_package_data(
4444
location,
4545
application=True,
4646
system=False,
47+
package_only=False,
4748
):
4849
"""
4950
Return a list of Package objects if any package_data were recognized for
@@ -55,19 +56,24 @@ def recognize_package_data(
5556
if not filetype.is_file(location):
5657
return []
5758

58-
assert application or system
59-
if application and system:
59+
assert application or system or package_only
60+
if package_only or (application and system):
6061
datafile_handlers = ALL_DATAFILE_HANDLERS
6162
elif application:
6263
datafile_handlers = APPLICATION_PACKAGE_DATAFILE_HANDLERS
6364
elif system:
6465
datafile_handlers = SYSTEM_PACKAGE_DATAFILE_HANDLERS
6566

66-
return list(_parse(location, datafile_handlers=datafile_handlers))
67+
return list(_parse(
68+
location=location,
69+
package_only=package_only,
70+
datafile_handlers=datafile_handlers,
71+
))
6772

6873

6974
def _parse(
7075
location,
76+
package_only=False,
7177
datafile_handlers=APPLICATION_PACKAGE_DATAFILE_HANDLERS,
7278
):
7379
"""
@@ -85,7 +91,7 @@ def _parse(
8591
logger_debug(f'_parse:.is_datafile: {location}')
8692

8793
try:
88-
for parsed in handler.parse(location):
94+
for parsed in handler.parse(location=location, package_only=package_only):
8995
if TRACE:
9096
logger_debug(f' _parse: parsed: {parsed!r}')
9197
yield parsed

src/scancode/api.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -248,20 +248,21 @@ def get_licenses(
248248
SCANCODE_DEBUG_PACKAGE_API = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False)
249249

250250

251-
def _get_package_data(location, application=True, system=False, **kwargs):
251+
def _get_package_data(location, application=True, system=False, package_only=False, **kwargs):
252252
"""
253253
Return a mapping of package manifest information detected in the file at ``location``.
254254
Include ``application`` packages (such as pypi) and/or ``system`` packages.
255255
Note that all exceptions are caught if there are any errors while parsing a
256256
package manifest.
257257
"""
258-
assert application or system
258+
assert application or system or package_only
259259
from packagedcode.recognize import recognize_package_data
260260
try:
261261
return recognize_package_data(
262262
location=location,
263263
application=application,
264-
system=system
264+
system=system,
265+
package_only=package_only,
265266
) or []
266267

267268
except Exception as e:
@@ -291,7 +292,7 @@ def get_package_info(location, **kwargs):
291292
return dict(packages=[p.to_dict() for p in packages])
292293

293294

294-
def get_package_data(location, application=True, system=False, **kwargs):
295+
def get_package_data(location, application=True, system=False, package_only=False, **kwargs):
295296
"""
296297
Return a mapping of package manifest information detected in the file at
297298
`location`.
@@ -304,6 +305,7 @@ def get_package_data(location, application=True, system=False, **kwargs):
304305
location=location,
305306
application=application,
306307
system=system,
308+
package_only=package_only,
307309
**kwargs,
308310
) or []
309311

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
{
2+
"packages": [],
3+
"dependencies": [],
4+
"files": [
5+
{
6+
"path": "scan",
7+
"type": "directory",
8+
"package_data": [],
9+
"for_packages": [],
10+
"scan_errors": []
11+
},
12+
{
13+
"path": "scan/bower.json",
14+
"type": "file",
15+
"package_data": [
16+
{
17+
"type": "bower",
18+
"namespace": null,
19+
"name": "John Doe",
20+
"version": null,
21+
"qualifiers": {},
22+
"subpath": null,
23+
"primary_language": null,
24+
"description": "Physics-like animations for pretty particles",
25+
"release_date": null,
26+
"parties": [
27+
{
28+
"type": null,
29+
"role": "author",
30+
"name": "Betty Beta <bbeta@example.com>",
31+
"email": null,
32+
"url": null
33+
},
34+
{
35+
"type": null,
36+
"role": "author",
37+
"name": "John Doe",
38+
"email": "john@doe.com",
39+
"url": "http://johndoe.com"
40+
}
41+
],
42+
"keywords": [
43+
"motion",
44+
"physics",
45+
"particles"
46+
],
47+
"homepage_url": null,
48+
"download_url": null,
49+
"size": null,
50+
"sha1": null,
51+
"md5": null,
52+
"sha256": null,
53+
"sha512": null,
54+
"bug_tracking_url": null,
55+
"code_view_url": null,
56+
"vcs_url": null,
57+
"copyright": null,
58+
"holder": null,
59+
"declared_license_expression": null,
60+
"declared_license_expression_spdx": null,
61+
"license_detections": [],
62+
"other_license_expression": null,
63+
"other_license_expression_spdx": null,
64+
"other_license_detections": [],
65+
"extracted_license_statement": "- MIT\n- Apache 2.0\n- BSD-3-Clause\n",
66+
"notice_text": null,
67+
"source_packages": [],
68+
"file_references": [],
69+
"extra_data": {},
70+
"dependencies": [
71+
{
72+
"purl": "pkg:bower/get-size",
73+
"extracted_requirement": "~1.2.2",
74+
"scope": "dependencies",
75+
"is_runtime": true,
76+
"is_optional": false,
77+
"is_resolved": false,
78+
"resolved_package": {},
79+
"extra_data": {}
80+
},
81+
{
82+
"purl": "pkg:bower/eventEmitter",
83+
"extracted_requirement": "~4.2.11",
84+
"scope": "dependencies",
85+
"is_runtime": true,
86+
"is_optional": false,
87+
"is_resolved": false,
88+
"resolved_package": {},
89+
"extra_data": {}
90+
},
91+
{
92+
"purl": "pkg:bower/qunit",
93+
"extracted_requirement": "~1.16.0",
94+
"scope": "devDependencies",
95+
"is_runtime": false,
96+
"is_optional": true,
97+
"is_resolved": false,
98+
"resolved_package": {},
99+
"extra_data": {}
100+
}
101+
],
102+
"repository_homepage_url": null,
103+
"repository_download_url": null,
104+
"api_data_url": null,
105+
"datasource_id": "bower_json",
106+
"purl": "pkg:bower/John%20Doe"
107+
}
108+
],
109+
"for_packages": [],
110+
"scan_errors": []
111+
}
112+
]
113+
}

tests/packagedcode/test_bower.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
from packagedcode import bower
1313
from packages_test_utils import PackageTester
1414
from scancode_config import REGEN_TEST_FIXTURES
15+
from scancode.cli_test_utils import check_json_scan
16+
from scancode.cli_test_utils import run_scan_click
1517

1618

1719
class TestBower(PackageTester):
@@ -40,11 +42,15 @@ def test_parse_bower_json_author_objects(self):
4042
self.check_packages_data(package, expected_loc, regen=REGEN_TEST_FIXTURES)
4143

4244
def test_end2end_bower_scan_is_moved_to_parent(self):
43-
from scancode.cli_test_utils import check_json_scan
44-
from scancode.cli_test_utils import run_scan_click
45-
4645
test_file = self.get_test_loc('bower/scan')
4746
expected_file = self.get_test_loc('bower/scan-expected.json')
4847
result_file = self.get_temp_file('results.json')
4948
run_scan_click(['--package', test_file, '--json-pp', result_file])
5049
check_json_scan(expected_file, result_file, regen=REGEN_TEST_FIXTURES)
50+
51+
def test_end2end_bower_scan_is_moved_to_parent_package_only(self):
52+
test_file = self.get_test_loc('bower/scan')
53+
expected_file = self.get_test_loc('bower/scan-package-only-expected.json')
54+
result_file = self.get_temp_file('results.json')
55+
run_scan_click(['--package-only', test_file, '--json-pp', result_file])
56+
check_json_scan(expected_file, result_file, regen=REGEN_TEST_FIXTURES)

0 commit comments

Comments
 (0)