Skip to content

Commit 3ca21c2

Browse files
authored
Add full test coverage for the enrich_with_purldb Pipeline (#1331)
Signed-off-by: tdruez <tdruez@nexb.com>
1 parent c22d10e commit 3ca21c2

File tree

7 files changed

+173
-9
lines changed

7 files changed

+173
-9
lines changed

CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ Changelog
44
v34.7.2 (unreleased)
55
--------------------
66

7+
- Add a new ``enrich_with_purldb`` add-on pipeline to enrich the discovered packages
8+
with data available in the PurlDB.
9+
https://github.com/nexB/scancode.io/issues/1182
10+
711
- Add the ability to define a results_url on the Pipeline class.
812
When available, that link is displayed in the UI to easily reach the results view
913
related to the Pipeline run.

docs/built-in-pipelines.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,20 @@ Collect symbols and string with Tree-Sitter (addon)
7474
:members:
7575
:member-order: bysource
7676

77+
.. _pipeline_enrich_with_purldb:
78+
79+
Enrich With PurlDB (addon)
80+
--------------------------
81+
82+
.. warning::
83+
This pipeline requires access to a PurlDB service.
84+
Refer to :ref:`scancodeio_settings_purldb` to configure access to PurlDB in your
85+
ScanCode.io instance.
86+
87+
.. autoclass:: scanpipe.pipelines.enrich_with_purldb.EnrichWithPurlDB()
88+
:members:
89+
:member-order: bysource
90+
7791
.. _pipeline_find_vulnerabilities:
7892

7993
Find Vulnerabilities (addon)

scanpipe/pipelines/enrich_with_purldb.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class EnrichWithPurlDB(Pipeline):
2929

3030
download_inputs = False
3131
is_addon = True
32+
results_url = "/project/{slug}/packages/?extra_data=" + purldb.ENRICH_EXTRA_DATA_KEY
3233

3334
@classmethod
3435
def steps(cls):
@@ -39,4 +40,4 @@ def steps(cls):
3940

4041
def enrich_discovered_packages_with_purldb(self):
4142
"""Lookup discovered packages in PurlDB."""
42-
purldb.enrich_discovered_packages_with_purldb(self.project, logger=self.log)
43+
purldb.enrich_discovered_packages(self.project, logger=self.log)

scanpipe/pipes/purldb.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import logging
2525

2626
from django.conf import settings
27+
from django.template.defaultfilters import pluralize
2728
from django.utils.text import slugify
2829

2930
import requests
@@ -461,25 +462,28 @@ def get_run_status(run, **kwargs):
461462
return run.status
462463

463464

464-
def enrich_package_with_purldb_data(package):
465+
def enrich_package(package):
465466
"""Enrich the provided ``package`` with the PurlDB data."""
466467
purldb_entry = get_package_by_purl(package.package_url)
467468
if purldb_entry:
468469
package_data = _clean_package_data(purldb_entry)
469-
updated_fields = package.update_from_data(package_data)
470-
return updated_fields
470+
if updated_fields := package.update_from_data(package_data):
471+
package.update_extra_data({ENRICH_EXTRA_DATA_KEY: updated_fields})
472+
return updated_fields
471473

472474

473-
def enrich_discovered_packages_with_purldb(project, logger=logger.info):
475+
def enrich_discovered_packages(project, logger=logger.info):
474476
"""Enrich all project discovered packages with the PurlDB data."""
475477
packages = project.discoveredpackages.all()
476478

477479
updated_package_count = 0
478480
for package in packages:
479-
updated_fields = enrich_package_with_purldb_data(package)
480-
if updated_fields:
481-
package.update_extra_data({ENRICH_EXTRA_DATA_KEY: updated_fields})
481+
if updated_fields := enrich_package(package):
482482
logger(f"{package} {updated_fields}")
483483
updated_package_count += 1
484484

485-
logger(f"{updated_package_count} discovered packages enriched with the PurlDB.")
485+
logger(
486+
f"{updated_package_count} discovered package{pluralize(updated_package_count)} "
487+
f"enriched with the PurlDB."
488+
)
489+
return updated_package_count
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
{
2+
"url": "https://purldb.io/api/packages/f4300f8b-e5a8-4258-8eb1-3d01aeae8623/?format=json",
3+
"uuid": "f4300f8b-e5a8-4258-8eb1-3d01aeae8623",
4+
"filename": null,
5+
"package_sets": [],
6+
"package_content": null,
7+
"purl": "pkg:npm/csvtojson@2.0.10",
8+
"type": "npm",
9+
"namespace": "",
10+
"name": "csvtojson",
11+
"version": "2.0.10",
12+
"qualifiers": "",
13+
"subpath": "",
14+
"primary_language": null,
15+
"description": null,
16+
"release_date": "2019-06-26T00:00:00Z",
17+
"parties": [],
18+
"keywords": [],
19+
"homepage_url": "https://github.com/Keyang/node-csvtojson",
20+
"download_url": "https://registry.npmjs.com/csvtojson/-/csvtojson-2.0.10.tgz",
21+
"bug_tracking_url": null,
22+
"code_view_url": null,
23+
"vcs_url": null,
24+
"repository_homepage_url": null,
25+
"repository_download_url": null,
26+
"api_data_url": null,
27+
"size": null,
28+
"md5": null,
29+
"sha1": "11e7242cc630da54efce7958a45f443210357574",
30+
"sha256": "41ab7fecdc9cf7007696196d927560741cecdf7fc28c47565221178bfb3ae592",
31+
"sha512": null,
32+
"copyright": "Copyright (c) 2013 Keyang Xiang\nCopyright Joyent, Inc. and other Node contributors.\nCopyright JS Foundation and other contributors https://js.foundation\nCopyright Jeremy Ashkenas, DocumentCloud and Investigative Reporters Editors",
33+
"holder": null,
34+
"declared_license_expression": "mit",
35+
"declared_license_expression_spdx": "MIT",
36+
"license_detections": [],
37+
"other_license_expression": null,
38+
"other_license_expression_spdx": null,
39+
"other_license_detections": [],
40+
"extracted_license_statement": "MIT",
41+
"notice_text": null,
42+
"source_packages": [],
43+
"extra_data": {},
44+
"package_uid": "pkg:npm/csvtojson@2.0.10?uuid=f4300f8b-e5a8-4258-8eb1-3d01aeae8623",
45+
"datasource_id": null,
46+
"file_references": [],
47+
"dependencies": [],
48+
"resources": "https://purldb.io/api/packages/f4300f8b-e5a8-4258-8eb1-3d01aeae8623/resources/?format=json",
49+
"history": "https://purldb.io/api/packages/f4300f8b-e5a8-4258-8eb1-3d01aeae8623/history/?format=json"
50+
}

scanpipe/tests/pipes/test_purldb.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# Visit https://github.com/nexB/scancode.io for support and download.
2222

2323
import io
24+
import json
2425
from pathlib import Path
2526
from unittest import mock
2627

@@ -35,6 +36,7 @@
3536
from scanpipe.pipes import purldb
3637
from scanpipe.tests import dependency_data2
3738
from scanpipe.tests import dependency_data3
39+
from scanpipe.tests import make_package
3840
from scanpipe.tests import package_data1
3941

4042

@@ -234,3 +236,59 @@ def test_scanpipe_pipes_purldb_create_project_name(self):
234236
scannable_uri_uuid = "52b2930d-6e85-4b3e-ba3e-17dd9a618650"
235237
project_name = purldb.create_project_name(download_url, scannable_uri_uuid)
236238
self.assertEqual("httpsregistrynpmjsorgasdf-asdf-101tgz-52b2930d", project_name)
239+
240+
@mock.patch("scanpipe.pipes.purldb.get_package_by_purl")
241+
def test_scanpipe_pipes_purldb_enrich_package(self, mock_get_package_by_purl):
242+
package1 = make_package(self.project1, package_url="pkg:npm/csvtojson@2.0.10")
243+
244+
mock_get_package_by_purl.return_value = {}
245+
updated_fields = purldb.enrich_package(package=package1)
246+
self.assertIsNone(updated_fields)
247+
248+
purldb_entry_file = self.data / "purldb" / "csvtojson-2.0.10.json"
249+
purldb_entry = json.loads(purldb_entry_file.read_text())
250+
mock_get_package_by_purl.return_value = purldb_entry
251+
updated_fields = purldb.enrich_package(package=package1)
252+
self.assertTrue(updated_fields)
253+
self.assertIn("homepage_url", updated_fields)
254+
255+
package1.refresh_from_db()
256+
self.assertTrue(package1.extra_data.get("enrich_with_purldb"))
257+
self.assertEqual(purldb_entry.get("sha1"), package1.sha1)
258+
self.assertEqual(purldb_entry.get("sha256"), package1.sha256)
259+
self.assertEqual(purldb_entry.get("copyright"), package1.copyright)
260+
261+
@mock.patch("scanpipe.pipes.purldb.get_package_by_purl")
262+
def test_scanpipe_pipes_purldb_enrich_discovered_packages(
263+
self, mock_get_package_by_purl
264+
):
265+
package1 = make_package(self.project1, package_url="pkg:npm/csvtojson@2.0.10")
266+
267+
mock_get_package_by_purl.return_value = {}
268+
buffer = io.StringIO()
269+
updated_package_count = purldb.enrich_discovered_packages(
270+
project=self.project1,
271+
logger=buffer.write,
272+
)
273+
self.assertEqual(0, updated_package_count)
274+
expected_log = buffer.getvalue()
275+
self.assertIn("0 discovered packages enriched with the PurlDB.", expected_log)
276+
277+
purldb_entry_file = self.data / "purldb" / "csvtojson-2.0.10.json"
278+
purldb_entry = json.loads(purldb_entry_file.read_text())
279+
mock_get_package_by_purl.return_value = purldb_entry
280+
buffer = io.StringIO()
281+
updated_package_count = purldb.enrich_discovered_packages(
282+
project=self.project1,
283+
logger=buffer.write,
284+
)
285+
self.assertEqual(1, updated_package_count)
286+
package1.refresh_from_db()
287+
self.assertEqual(purldb_entry.get("sha1"), package1.sha1)
288+
self.assertEqual(purldb_entry.get("sha256"), package1.sha256)
289+
self.assertEqual(purldb_entry.get("copyright"), package1.copyright)
290+
self.assertTrue(package1.extra_data.get("enrich_with_purldb"))
291+
292+
expected_log = buffer.getvalue()
293+
self.assertIn("pkg:npm/csvtojson@2.0.10 ['release_date'", expected_log)
294+
self.assertIn("1 discovered package enriched with the PurlDB.", expected_log)

scanpipe/tests/test_pipelines.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
from scanpipe.pipes import scancode
5050
from scanpipe.pipes.input import copy_input
5151
from scanpipe.tests import FIXTURES_REGEN
52+
from scanpipe.tests import make_package
5253
from scanpipe.tests import package_data1
5354
from scanpipe.tests.pipelines.do_nothing import DoNothing
5455
from scanpipe.tests.pipelines.profile_step import ProfileStep
@@ -845,6 +846,7 @@ def test_scanpipe_inspect_packages_creates_packages_pypi(self):
845846
self.assertEqual(0, project1.discoveredpackages.count())
846847
self.assertEqual(26, project1.discovereddependencies.count())
847848

849+
@skipIf(sys.platform == "darwin", "Not supported on macOS")
848850
def test_scanpipe_inspect_packages_with_resolved_dependencies_npm(self):
849851
pipeline_name = "inspect_packages"
850852
project1 = Project.objects.create(name="Analysis")
@@ -872,6 +874,7 @@ def test_scanpipe_inspect_packages_with_resolved_dependencies_npm(self):
872874
)
873875
self.assertPipelineResultEqual(expected_file, result_file)
874876

877+
@skipIf(sys.platform == "darwin", "Not supported on macOS")
875878
def test_scanpipe_inspect_packages_with_resolved_dependencies_poetry(self):
876879
pipeline_name = "inspect_packages"
877880
project1 = Project.objects.create(name="Analysis")
@@ -1711,3 +1714,33 @@ def test_scanpipe_collect_symbols_tree_sitter_pipeline_integration(self):
17111714
expected_extra_data = json.load(f)
17121715

17131716
self.assertDictEqual(expected_extra_data, result_extra_data)
1717+
1718+
@mock.patch("scanpipe.pipes.purldb.is_available")
1719+
@mock.patch("scanpipe.pipes.purldb.is_configured")
1720+
@mock.patch("scanpipe.pipes.purldb.get_package_by_purl")
1721+
def test_scanpipe_enrich_with_purldb_pipeline_integration(
1722+
self, mock_get_package, mock_is_configured, mock_is_available
1723+
):
1724+
pipeline_name = "enrich_with_purldb"
1725+
project1 = Project.objects.create(name="Analysis")
1726+
package1 = make_package(project1, package_url="pkg:npm/csvtojson@2.0.10")
1727+
1728+
mock_is_configured.return_value = True
1729+
mock_is_available.return_value = True
1730+
1731+
purldb_entry_file = self.data / "purldb" / "csvtojson-2.0.10.json"
1732+
purldb_entry = json.loads(purldb_entry_file.read_text())
1733+
mock_get_package.return_value = purldb_entry
1734+
1735+
run = project1.add_pipeline(pipeline_name)
1736+
pipeline = run.make_pipeline_instance()
1737+
1738+
exitcode, out = pipeline.execute()
1739+
self.assertEqual(0, exitcode, msg=out)
1740+
1741+
package1.refresh_from_db()
1742+
self.assertTrue(package1.extra_data.get("enrich_with_purldb"))
1743+
1744+
run.refresh_from_db()
1745+
self.assertIn("pkg:npm/csvtojson@2.0.10 ['release_date'", run.log)
1746+
self.assertIn("1 discovered package enriched with the PurlDB.", run.log)

0 commit comments

Comments
 (0)