Skip to content

Commit 85b0491

Browse files
Use SCTK package_only in inspect_packages pipeline (#1118)
* Update inspect_packages pipeline * Split package/dependencies creation in a seperate step * Only create packages/dependencies from Assemblable PackageData Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Support fast package-only scan in inspect_packages Support the new only_packages attributes in scancode get_package_data API, to only scan for package data and skip license and copyright detection. Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Add CHANGELOG entry Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> * Make resolve_dependencies integration test faster Reference: #1087 Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> --------- Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 3097fc8 commit 85b0491

File tree

5 files changed

+36
-10
lines changed

5 files changed

+36
-10
lines changed

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ v34.1.0 (unreleased)
2323
https://github.com/nexB/scancode.io/issues/1121
2424
https://github.com/nexB/scancode.io/issues/1122
2525

26+
- Use the `package_only` option in scancode `get_package_data` API in
27+
`inspect_packages` pipeline, to skip license and copyright detection in
28+
extracted license and copyright statements found in package metadata.
29+
https://github.com/nexB/scancode-toolkit/pull/3689
30+
2631
- Rename the ``match_to_purldb`` pipeline to ``match_to_matchcode``, and add
2732
MatchCode.io API settings to ScanCode.io settings.
2833

scanpipe/pipelines/inspect_packages.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def steps(cls):
4949
cls.flag_empty_files,
5050
cls.flag_ignored_resources,
5151
cls.scan_for_application_packages,
52+
cls.create_packages_and_dependencies,
5253
)
5354

5455
def scan_for_application_packages(self):
@@ -60,5 +61,11 @@ def scan_for_application_packages(self):
6061
# only detect package_data in resources and create
6162
# Package/Dependency instances directly instead of assembling
6263
# the packages and assigning files to them
63-
scancode.scan_for_application_packages(self.project, assemble=False)
64+
scancode.scan_for_application_packages(
65+
project=self.project,
66+
assemble=False,
67+
package_only=True,
68+
)
69+
70+
def create_packages_and_dependencies(self):
6471
scancode.process_package_data(self.project)

scanpipe/pipes/scancode.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -219,14 +219,18 @@ def scan_file(location, with_threading=True, min_license_score=0, **kwargs):
219219
return _scan_resource(location, scanners, with_threading=with_threading)
220220

221221

222-
def scan_for_package_data(location, with_threading=True, **kwargs):
222+
def scan_for_package_data(location, with_threading=True, package_only=False, **kwargs):
223223
"""
224224
Run a package scan on provided `location` using the scancode-toolkit direct API.
225225
226226
Return a dict of scan `results` and a list of `errors`.
227227
"""
228+
scancode_get_packages = partial(
229+
scancode_api.get_package_data,
230+
package_only=package_only,
231+
)
228232
scanners = [
229-
Scanner("package_data", scancode_api.get_package_data),
233+
Scanner("package_data", scancode_get_packages),
230234
]
231235
return _scan_resource(location, scanners, with_threading=with_threading)
232236

@@ -343,7 +347,9 @@ def scan_for_files(project, resource_qs=None, progress_logger=None):
343347
)
344348

345349

346-
def scan_for_application_packages(project, assemble=True, progress_logger=None):
350+
def scan_for_application_packages(
351+
project, assemble=True, package_only=False, progress_logger=None
352+
):
347353
"""
348354
Run a package scan on resources without a status for a `project`,
349355
and add them in their respective `package_data` attribute.
@@ -359,13 +365,18 @@ def scan_for_application_packages(project, assemble=True, progress_logger=None):
359365
"""
360366
resource_qs = project.codebaseresources.no_status()
361367

368+
scan_func_kwargs = {
369+
"package_only": package_only,
370+
}
371+
362372
# Collect detected Package data and save it to the CodebaseResource it was
363373
# detected from.
364374
scan_resources(
365375
resource_qs=resource_qs,
366376
scan_func=scan_for_package_data,
367377
save_func=save_scan_package_results,
368378
progress_logger=progress_logger,
379+
scan_func_kwargs=scan_func_kwargs,
369380
)
370381

371382
# Iterate through CodebaseResources with Package data and handle them using
@@ -460,15 +471,19 @@ def process_package_data(project):
460471
logger.info(f" Processing: {resource.path}")
461472
for package_mapping in resource.package_data:
462473
pd = packagedcode_models.PackageData.from_dict(mapping=package_mapping)
474+
if not pd.can_assemble:
475+
continue
476+
463477
logger.info(f" Package data: {pd.purl}")
464478

465479
package_data = pd.to_dict()
466480
dependencies = package_data.pop("dependencies")
467-
pipes.update_or_create_package(project, package_data)
468-
469481
for dep in dependencies:
470482
pipes.update_or_create_dependency(project, dep)
471483

484+
if pd.purl:
485+
pipes.update_or_create_package(project, package_data)
486+
472487

473488
def get_packages_with_purl_from_resources(project):
474489
"""
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
click==8.1.3

scanpipe/tests/test_pipelines.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -922,17 +922,15 @@ def test_scanpipe_resolve_dependencies_pipeline_integration_misc(self):
922922
pipeline_name = "resolve_dependencies"
923923
project1 = Project.objects.create(name="Analysis")
924924

925-
input_location = (
926-
self.data_location / "manifests" / "python-inspector-0.10.0.zip"
927-
)
925+
input_location = self.data_location / "manifests" / "requirements.txt"
928926
project1.copy_input_from(input_location)
929927

930928
run = project1.add_pipeline(pipeline_name)
931929
pipeline = run.make_pipeline_instance()
932930

933931
exitcode, out = pipeline.execute()
934932
self.assertEqual(0, exitcode, msg=out)
935-
self.assertEqual(26, project1.discoveredpackages.count())
933+
self.assertEqual(1, project1.discoveredpackages.count())
936934

937935
@mock.patch("scanpipe.pipes.resolve.resolve_dependencies")
938936
def test_scanpipe_resolve_dependencies_pipeline_pypi_integration(

0 commit comments

Comments
 (0)