Skip to content

Commit 71f3d45

Browse files
authored
Load CycloneDX SBOMs dependencies #1145 (#1344)
Signed-off-by: tdruez <tdruez@nexb.com>
1 parent 0654894 commit 71f3d45

17 files changed

+377
-12
lines changed

CHANGELOG.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@ v34.7.2 (unreleased)
1919
"delivery".
2020
https://github.com/nexB/scancode.io/issues/1325
2121

22+
- Add support for creating dependencies using the ``load_sboms`` pipeline on CycloneDX
23+
SBOM inputs.
24+
https://github.com/nexB/scancode.io/issues/1145
25+
26+
- Add a new Dependency view that renders the project dependencies as a tree.
27+
https://github.com/nexB/scancode.io/issues/1145
28+
2229
v34.7.1 (2024-07-15)
2330
--------------------
2431

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,10 @@ doc8:
6868
@${ACTIVATE} doc8 --max-line-length 100 --ignore-path docs/_build/ --quiet docs/
6969

7070
valid:
71-
@echo "-> Run Ruff linter"
72-
@${ACTIVATE} ruff check --fix
7371
@echo "-> Run Ruff format"
7472
@${ACTIVATE} ruff format
73+
@echo "-> Run Ruff linter"
74+
@${ACTIVATE} ruff check --fix
7575

7676
check:
7777
@echo "-> Run Ruff linter validation (pycodestyle, bandit, isort, and more)"

scanpipe/models.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2976,6 +2976,20 @@ def filter(self, *args, **kwargs):
29762976

29772977
return super().filter(*args, **kwargs)
29782978

2979+
def non_root_packages(self):
2980+
"""
2981+
Return packages that have at least one package parent.
2982+
Those are used as part of a ``Dependency.resolved_to`` FK.
2983+
"""
2984+
return self.filter(resolved_from_dependencies__isnull=False)
2985+
2986+
def root_packages(self):
2987+
"""
2988+
Return packages that are directly related to the Project.
2989+
Those packages are not used as part of a ``Dependency.resolved_to`` FK.
2990+
"""
2991+
return self.filter(resolved_from_dependencies__isnull=True)
2992+
29792993

29802994
class AbstractPackage(models.Model):
29812995
"""These fields should be kept in line with `packagedcode.models.PackageData`."""

scanpipe/pipelines/load_sbom.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def steps(cls):
4646
cls.get_sbom_inputs,
4747
cls.get_packages_from_sboms,
4848
cls.create_packages_from_sboms,
49+
cls.create_dependencies_from_sboms,
4950
)
5051

5152
def get_sbom_inputs(self):
@@ -62,8 +63,12 @@ def get_packages_from_sboms(self):
6263
)
6364

6465
def create_packages_from_sboms(self):
65-
"""Create the packages and dependencies from the SBOM, in the database."""
66+
"""Create the packages declared in the SBOMs."""
6667
resolve.create_packages_and_dependencies(
6768
project=self.project,
6869
packages=self.packages,
6970
)
71+
72+
def create_dependencies_from_sboms(self):
73+
"""Create the dependency relationship declared in the SBOMs."""
74+
resolve.create_dependencies_from_packages_extra_data(project=self.project)

scanpipe/pipes/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ def update_or_create_dependency(
261261
where Dependency data is imported from a scancode-toolkit scan, where the
262262
root path segments are not stripped for `datafile_path`.
263263
If the dependency is resolved and a resolved package is created, we have the
264-
corresponsing package_uid at `resolved_to`.
264+
corresponding package_uid at `resolved_to`.
265265
"""
266266
if ignore_dependency_scope(project, dependency_data):
267267
return # Do not create the DiscoveredDependency record.

scanpipe/pipes/cyclonedx.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,18 @@ def is_cyclonedx_bom(input_location):
150150
return False
151151

152152

153-
def cyclonedx_component_to_package_data(cdx_component):
153+
def cyclonedx_component_to_package_data(cdx_component, dependencies=None):
154154
"""Return package_data from CycloneDX component."""
155+
dependencies = dependencies or {}
155156
extra_data = {}
156157

158+
# Store the original bom_ref and dependencies for future processing.
159+
bom_ref = str(cdx_component.bom_ref)
160+
if bom_ref:
161+
extra_data["bom_ref"] = bom_ref
162+
if depends_on := dependencies.get(bom_ref):
163+
extra_data["depends_on"] = depends_on
164+
157165
package_url_dict = {}
158166
if cdx_component.purl:
159167
package_url_dict = cdx_component.purl.to_dict(encode=True)
@@ -271,14 +279,15 @@ def is_empty(value):
271279
return cyclonedx_document_json
272280

273281

274-
def resolve_cyclonedx_packages(input_location):
275-
"""Resolve the packages from the `input_location` CycloneDX document file."""
282+
def get_bom_instance_from_file(input_location):
283+
"""Return a Bom instance from the `input_location` CycloneDX document file."""
276284
input_path = Path(input_location)
277285
document_data = input_path.read_text()
278286

279287
if str(input_location).endswith(".xml"):
280288
cyclonedx_document = SafeElementTree.fromstring(document_data)
281289
cyclonedx_bom = Bom.from_xml(cyclonedx_document)
290+
return cyclonedx_bom
282291

283292
elif str(input_location).endswith(".json"):
284293
cyclonedx_document = json.loads(document_data)
@@ -294,9 +303,25 @@ def resolve_cyclonedx_packages(input_location):
294303
raise ValueError(error_msg)
295304

296305
cyclonedx_bom = Bom.from_json(data=cyclonedx_document)
306+
return cyclonedx_bom
297307

298-
else:
308+
309+
def resolve_cyclonedx_packages(input_location):
310+
"""Resolve the packages from the `input_location` CycloneDX document file."""
311+
cyclonedx_bom = get_bom_instance_from_file(input_location)
312+
if not cyclonedx_bom:
299313
return []
300314

301315
components = get_components(cyclonedx_bom)
302-
return [cyclonedx_component_to_package_data(component) for component in components]
316+
317+
# Store the ``bom_ref`` and the ``depends_on`` values on the extra_data field for
318+
# the dependency resolution that take place after the package creation.
319+
dependencies = defaultdict(list)
320+
for entry in cyclonedx_bom.dependencies:
321+
if depends_on := [str(dep.ref) for dep in entry.dependencies]:
322+
dependencies[str(entry.ref)].extend(depends_on)
323+
324+
return [
325+
cyclonedx_component_to_package_data(component, dependencies)
326+
for component in components
327+
]

scanpipe/pipes/resolve.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,19 @@
2222

2323
import json
2424
import sys
25+
import uuid
2526
from pathlib import Path
2627

28+
from django.core.exceptions import MultipleObjectsReturned
29+
from django.core.exceptions import ObjectDoesNotExist
30+
2731
from attributecode.model import About
2832
from packagedcode import APPLICATION_PACKAGE_DATAFILE_HANDLERS
2933
from packagedcode.licensing import get_license_detections_and_expression
3034
from packageurl import PackageURL
3135
from python_inspector.api import resolve_dependencies
3236

37+
from scanpipe.models import DiscoveredDependency
3338
from scanpipe.models import DiscoveredPackage
3439
from scanpipe.pipes import cyclonedx
3540
from scanpipe.pipes import flag
@@ -108,6 +113,52 @@ def create_packages_and_dependencies(project, packages, resolved=False):
108113
update_or_create_dependency(project, dependency_data)
109114

110115

116+
def create_dependencies_from_packages_extra_data(project):
117+
"""
118+
Create Dependency objects from the Package extra_data values.
119+
The Package instances need to be saved first in the database before creating the
120+
Dependency objects.
121+
The dependencies declared in the SBOM are stored on the Package.extra_data field
122+
and resolved as Dependency objects in this function.
123+
"""
124+
project_packages = project.discoveredpackages.all()
125+
created_count = 0
126+
127+
packages_with_depends_on = project_packages.filter(
128+
extra_data__has_key="depends_on"
129+
).prefetch_related("codebase_resources")
130+
131+
for for_package in packages_with_depends_on:
132+
datafile_resource = None
133+
codebase_resources = for_package.codebase_resources.all()
134+
if len(codebase_resources) == 1:
135+
datafile_resource = codebase_resources[0]
136+
137+
for bom_ref in for_package.extra_data.get("depends_on", []):
138+
try:
139+
resolved_to_package = project_packages.get(extra_data__bom_ref=bom_ref)
140+
except (ObjectDoesNotExist, MultipleObjectsReturned):
141+
project.add_error(
142+
description=f"Could not find resolved_to package entry: {bom_ref}.",
143+
model="create_dependencies",
144+
)
145+
continue
146+
147+
DiscoveredDependency.objects.create(
148+
project=project,
149+
dependency_uid=str(uuid.uuid4()),
150+
for_package=for_package,
151+
resolved_to_package=resolved_to_package,
152+
datafile_resource=datafile_resource,
153+
is_runtime=True,
154+
is_resolved=True,
155+
is_direct=True,
156+
)
157+
created_count += 1
158+
159+
return created_count
160+
161+
111162
def get_packages_from_manifest(input_location, package_registry=None):
112163
"""
113164
Resolve packages or get packages data from a package manifest file/

scanpipe/templates/scanpipe/dependency_list.html

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616
</div>
1717

1818
<div class="container is-fluid mb-3">
19+
<a href="{% url 'project_dependency_tree' project.slug %}" class="is-pulled-right">
20+
<span class="icon">
21+
<i class="fa-solid fa-sitemap"></i>
22+
</span>
23+
<span>View the dependency tree</span>
24+
</a>
1925
<table class="table is-bordered is-striped is-narrow is-hoverable is-fullwidth">
2026
{% include 'scanpipe/includes/list_view_thead.html' %}
2127
<tbody>
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
{% extends "scanpipe/base.html" %}
2+
3+
{% block title %}ScanCode.io: {{ project.name }} - Dependency tree{% endblock %}
4+
5+
{% block content %}
6+
<div id="content-header" class="container is-max-widescreen mb-3">
7+
{% include 'scanpipe/includes/navbar_header.html' %}
8+
<section class="mx-5">
9+
<div class="is-flex is-justify-content-space-between">
10+
{% include 'scanpipe/includes/breadcrumb.html' with linked_project=True current="Dependency tree" %}
11+
</div>
12+
</section>
13+
</div>
14+
15+
<div class="container is-max-widescreen mb-3">
16+
{% if recursion_error %}
17+
<article class="message is-danger">
18+
<div class="message-body">
19+
The dependency tree cannot be rendered as it contains circular references.
20+
{{ message|linebreaksbr }}
21+
</div>
22+
</article>
23+
{% endif %}
24+
<div id="tree"></div>
25+
</div>
26+
{% endblock %}
27+
28+
{% block scripts %}
29+
<script src="https://d3js.org/d3.v7.min.js"></script>
30+
<script src="https://cdn.jsdelivr.net/npm/@observablehq/plot@0.6"></script>
31+
{{ dependency_tree|json_script:"dependency_tree" }}
32+
{{ row_count|json_script:"row_count" }}
33+
{{ max_depth|json_script:"max_depth" }}
34+
<script>
35+
const data = JSON.parse(document.getElementById("dependency_tree").textContent);
36+
const hierarchyData = d3.hierarchy(data);
37+
const columnWidth = 110;
38+
const rowWidth = 25;
39+
const columnCount = hierarchyData.height;
40+
const rowCount = hierarchyData.links().length;
41+
const width = columnWidth * (columnCount + 1);
42+
const height = rowWidth * (rowCount + 1);
43+
44+
function indent() {
45+
return (root) => {
46+
root.eachBefore((node, i) => {
47+
node.y = node.depth;
48+
node.x = i;
49+
});
50+
};
51+
}
52+
53+
// https://observablehq.com/plot/marks/tree
54+
const plot = Plot.plot({
55+
axis: null,
56+
margin: 10,
57+
marginLeft: 40,
58+
marginRight: 160,
59+
width: width,
60+
height: height,
61+
marks: [
62+
Plot.tree(hierarchyData.leaves(), {
63+
path: (node) => node.ancestors().reverse().map(({ data: { name } }) => name).join("|"),
64+
delimiter: "|",
65+
treeLayout: indent,
66+
strokeWidth: 1,
67+
curve: "step-before",
68+
fontSize: 14,
69+
textStroke: "none"
70+
})
71+
]
72+
});
73+
74+
document.getElementById("tree").appendChild(plot);
75+
</script>
76+
{% endblock %}

scanpipe/tests/pipes/test_cyclonedx.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def test_scanpipe_cyclonedx_component_to_package_data(self):
167167
"extracted_license_statement": "OFL-1.1\nApache-2.0",
168168
"version": "0.10.2",
169169
"extra_data": {
170+
"bom_ref": "pkg:pypi/toml@0.10.2?extension=tar.gz",
170171
"externalReferences": {
171172
"advisories": ["https://cyclonedx.org/advisories"],
172173
"bom": ["https://cyclonedx.org/bom"],
@@ -198,6 +199,22 @@ def test_scanpipe_cyclonedx_component_to_package_data_encoded_purl_name(self):
198199
expected = {"name": "a:/b:name", "version": "1.0", "type": "type"}
199200
self.assertEqual(expected, package_data)
200201

202+
def test_scanpipe_cyclonedx_get_bom_instance_from_file(self):
203+
input_location = self.data / "missing_schema.json"
204+
with self.assertRaises(ValueError) as cm:
205+
cyclonedx.get_bom_instance_from_file(input_location)
206+
expected_error = (
207+
'CycloneDX document "missing_schema.json" is not valid:\n'
208+
"Additional properties are not allowed ('invalid_entry' was unexpected)"
209+
)
210+
self.assertIn(expected_error, str(cm.exception))
211+
212+
input_location = self.data / "laravel-7.12.0" / "bom.1.4.json"
213+
bom = cyclonedx.get_bom_instance_from_file(input_location)
214+
self.assertIsInstance(bom, Bom)
215+
self.assertEqual(62, len(bom.components))
216+
self.assertEqual(63, len(bom.dependencies))
217+
201218
def test_scanpipe_cyclonedx_resolve_cyclonedx_packages(self):
202219
input_location = self.data / "missing_schema.json"
203220
with self.assertRaises(ValueError) as cm:
@@ -236,12 +253,28 @@ def test_scanpipe_cyclonedx_resolve_cyclonedx_packages(self):
236253
packages = cyclonedx.resolve_cyclonedx_packages(input_location)
237254
self.assertEqual(62, len(packages))
238255

256+
def test_scanpipe_cyclonedx_resolve_cyclonedx_packages_dependencies(self):
257+
input_location = self.data / "laravel-7.12.0" / "bom.1.4.json"
258+
packages = cyclonedx.resolve_cyclonedx_packages(input_location)
259+
self.assertEqual(62, len(packages))
260+
261+
extra_data = packages[0]["extra_data"]
262+
self.assertEqual("asm89/stack-cors-1.3.0.0", extra_data["bom_ref"])
263+
expected_depends_on = [
264+
"symfony/http-foundation-5.4.16.0",
265+
"symfony/http-kernel-5.4.16.0",
266+
]
267+
self.assertEqual(expected_depends_on, extra_data["depends_on"])
268+
239269
def test_scanpipe_cyclonedx_resolve_cyclonedx_packages_pre_validation(self):
240270
# This SBOM includes multiple deserialization issues that are "fixed"
241271
# by the pre-validation cleanup.
242272
input_location = self.data / "broken_sbom.json"
243273
package_data = cyclonedx.resolve_cyclonedx_packages(input_location)
244-
self.assertEqual([{"name": "asgiref"}], package_data)
274+
self.assertEqual(
275+
[{"extra_data": {"bom_ref": "pkg:pypi/asgiref@3.3.0"}, "name": "asgiref"}],
276+
package_data,
277+
)
245278

246279
def test_scanpipe_cyclonedx_cleanup_components_properties(self):
247280
cyclonedx_document_json = {

0 commit comments

Comments
 (0)