Skip to content

Commit f42522b

Browse files
authored
Migrate the cyclonedx resolve to native app for 1.5 support #1056 (#1086)
Signed-off-by: tdruez <tdruez@nexb.com>
1 parent 6f2b653 commit f42522b

16 files changed

+10305
-2741
lines changed

CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ Changelog
44
v34.1.0 (unreleased)
55
--------------------
66

7+
- Add support for importing CycloneDX SBOM 1.2, 1.3, 1.4 and 1.5 spec formats.
8+
https://github.com/nexB/scancode.io/issues/1045
9+
710
- The pipeline help modal is now available from all project views: form, list, details.
811
The docstring are converted from markdown to html for proper rendering.
912
https://github.com/nexB/scancode.io/pull/1105

scanpipe/pipes/cyclonedx.py

Lines changed: 88 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -27,82 +27,28 @@
2727

2828
from django.core.validators import EMPTY_VALUES
2929

30-
import jsonschema
31-
from hoppr_cyclonedx_models.cyclonedx_1_4 import (
32-
CyclonedxSoftwareBillOfMaterialsStandard as Bom_1_4,
33-
)
34-
35-
SCHEMAS_PATH = Path(__file__).parent / "schemas"
36-
37-
CYCLONEDX_SPEC_VERSION = "1.4"
38-
CYCLONEDX_SCHEMA_NAME = "bom-1.4.schema.json"
39-
CYCLONEDX_SCHEMA_PATH = SCHEMAS_PATH / CYCLONEDX_SCHEMA_NAME
40-
CYCLONEDX_SCHEMA_URL = (
41-
"https://raw.githubusercontent.com/"
42-
"CycloneDX/specification/master/schema/bom-1.4.schema.json"
43-
)
44-
45-
SPDX_SCHEMA_NAME = "spdx.schema.json"
46-
SPDX_SCHEMA_PATH = SCHEMAS_PATH / SPDX_SCHEMA_NAME
47-
48-
JSF_SCHEMA_NAME = "jsf-0.82.schema.json"
49-
JSF_SCHEMA_PATH = SCHEMAS_PATH / JSF_SCHEMA_NAME
50-
51-
52-
def get_bom(cyclonedx_document):
53-
"""Return CycloneDX BOM object."""
54-
return Bom_1_4(**cyclonedx_document)
55-
56-
57-
def get_components(bom):
58-
"""Return list of components from CycloneDX BOM."""
59-
return recursive_component_collector(bom.components, [])
60-
61-
62-
def bom_attributes_to_dict(cyclonedx_attributes):
63-
"""Return list of dict from a list of CycloneDX attributes."""
64-
if not cyclonedx_attributes:
65-
return []
66-
67-
return [
68-
json.loads(attribute.json(exclude_unset=True, by_alias=True))
69-
for attribute in cyclonedx_attributes
70-
]
71-
72-
73-
def recursive_component_collector(root_component_list, collected):
74-
"""Return list of components including the nested components."""
75-
if not root_component_list:
76-
return []
77-
78-
for component in root_component_list:
79-
extra_data = {}
80-
if component.components is not None:
81-
extra_data = bom_attributes_to_dict(component.components)
82-
83-
collected.append({"cdx_package": component, "nested_components": extra_data})
84-
recursive_component_collector(component.components, collected)
85-
return collected
30+
from cyclonedx.model import license as cdx_license_model
31+
from cyclonedx.model.bom import Bom
32+
from cyclonedx.schema import SchemaVersion
33+
from cyclonedx.validation import ValidationError
34+
from cyclonedx.validation.json import JsonStrictValidator
35+
from packageurl import PackageURL
8636

8737

8838
def resolve_license(license):
8939
"""Return license expression/id/name from license item."""
90-
if "expression" in license:
91-
return license["expression"]
92-
elif "id" in license["license"]:
93-
return license["license"]["id"]
94-
else:
95-
return license["license"]["name"]
40+
if isinstance(license, cdx_license_model.LicenseExpression):
41+
return license.value
42+
elif isinstance(license, cdx_license_model.License):
43+
return license.id or license.name
9644

9745

9846
def get_declared_licenses(licenses):
9947
"""Return resolved license from list of LicenseChoice."""
10048
if not licenses:
10149
return ""
10250

103-
resolved_licenses = [
104-
resolve_license(license) for license in bom_attributes_to_dict(licenses)
105-
]
51+
resolved_licenses = [resolve_license(license) for license in licenses]
10652
return "\n".join(resolved_licenses)
10753

10854

@@ -126,14 +72,14 @@ def get_checksums(component):
12672

12773

12874
def get_external_references(component):
129-
"""Return dict of reference urls from list of `component.externalReferences`."""
130-
external_references = component.externalReferences
75+
"""Return dict of reference urls from list of `component.external_references`."""
76+
external_references = component.external_references
13177
if not external_references:
13278
return {}
13379

13480
references = defaultdict(list)
13581
for reference in external_references:
136-
references[reference.type].append(reference.url)
82+
references[reference.type.value].append(reference.url.uri)
13783

13884
return dict(references)
13985

@@ -154,38 +100,90 @@ def get_properties_data(component):
154100
return properties_data
155101

156102

157-
def validate_document(document, schema=CYCLONEDX_SCHEMA_PATH):
158-
"""Check the validity of this CycloneDX document."""
103+
def validate_document(document):
104+
"""
105+
Check the validity of this CycloneDX document.
106+
107+
The validator is loaded from the document specVersion property.
108+
"""
159109
if isinstance(document, str):
160110
document = json.loads(document)
161111

162-
if isinstance(schema, Path):
163-
schema = schema.read_text()
164-
165-
if isinstance(schema, str):
166-
schema = json.loads(schema)
112+
spec_version = document.get("specVersion")
113+
if not spec_version:
114+
return ValidationError("'specVersion' is a required property")
167115

168-
spdx_schema = SPDX_SCHEMA_PATH.read_text()
169-
jsf_schema = JSF_SCHEMA_PATH.read_text()
170-
171-
store = {
172-
"http://cyclonedx.org/schema/spdx.schema.json": json.loads(spdx_schema),
173-
"http://cyclonedx.org/schema/jsf-0.82.schema.json": json.loads(jsf_schema),
174-
}
116+
schema_version = SchemaVersion.from_version(spec_version)
175117

176-
resolver = jsonschema.RefResolver.from_schema(schema, store=store)
177-
validator = jsonschema.Draft7Validator(schema=schema, resolver=resolver)
178-
validator.validate(instance=document)
118+
json_validator = JsonStrictValidator(schema_version)
119+
return json_validator._validata_data(document)
179120

180121

181122
def is_cyclonedx_bom(input_location):
182123
"""Return True if the file at `input_location` is a CycloneDX BOM."""
183124
with suppress(Exception):
184125
data = json.loads(Path(input_location).read_text())
185-
conditions = (
186-
data.get("$schema", "").endswith(CYCLONEDX_SCHEMA_NAME),
187-
data.get("bomFormat") == "CycloneDX",
188-
)
189-
if any(conditions):
126+
if data.get("bomFormat") == "CycloneDX":
190127
return True
191128
return False
129+
130+
131+
def cyclonedx_component_to_package_data(cdx_component):
132+
"""Return package_data from CycloneDX component."""
133+
extra_data = {}
134+
135+
package_url_dict = {}
136+
if cdx_component.purl:
137+
package_url_dict = PackageURL.from_string(str(cdx_component.purl)).to_dict(
138+
encode=True
139+
)
140+
141+
declared_license = get_declared_licenses(licenses=cdx_component.licenses)
142+
143+
if external_references := get_external_references(cdx_component):
144+
extra_data["externalReferences"] = external_references
145+
146+
if nested_components := cdx_component.get_all_nested_components(include_self=False):
147+
nested_purls = [component.bom_ref.value for component in nested_components]
148+
extra_data["nestedComponents"] = sorted(nested_purls)
149+
150+
package_data = {
151+
"name": cdx_component.name,
152+
"extracted_license_statement": declared_license,
153+
"copyright": cdx_component.copyright,
154+
"version": cdx_component.version,
155+
"description": cdx_component.description,
156+
"extra_data": extra_data,
157+
**package_url_dict,
158+
**get_checksums(cdx_component),
159+
**get_properties_data(cdx_component),
160+
}
161+
162+
return {
163+
key: value for key, value in package_data.items() if value not in EMPTY_VALUES
164+
}
165+
166+
167+
def get_bom(cyclonedx_document):
168+
"""Return CycloneDX BOM object."""
169+
return Bom.from_json(data=cyclonedx_document)
170+
171+
172+
def get_components(bom):
173+
"""Return list of components from CycloneDX BOM."""
174+
return list(bom._get_all_components())
175+
176+
177+
def resolve_cyclonedx_packages(input_location):
178+
"""Resolve the packages from the `input_location` CycloneDX document file."""
179+
input_path = Path(input_location)
180+
cyclonedx_document = json.loads(input_path.read_text())
181+
182+
if errors := validate_document(cyclonedx_document):
183+
error_msg = f'CycloneDX document "{input_path.name}" is not valid:\n{errors}'
184+
raise ValueError(error_msg)
185+
186+
cyclonedx_bom = get_bom(cyclonedx_document)
187+
components = get_components(cyclonedx_bom)
188+
189+
return [cyclonedx_component_to_package_data(component) for component in components]

scanpipe/pipes/resolve.py

Lines changed: 1 addition & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@
2424
import sys
2525
from pathlib import Path
2626

27-
from django.core.validators import EMPTY_VALUES
28-
2927
from attributecode.model import About
3028
from packagedcode import APPLICATION_PACKAGE_DATAFILE_HANDLERS
3129
from packagedcode.licensing import get_license_detections_and_expression
@@ -258,56 +256,6 @@ def resolve_spdx_packages(input_location):
258256
]
259257

260258

261-
def cyclonedx_component_to_package_data(component_data):
262-
"""Return package_data from CycloneDX component."""
263-
extra_data = {}
264-
component = component_data["cdx_package"]
265-
266-
package_url_dict = {}
267-
if component.purl:
268-
package_url_dict = PackageURL.from_string(component.purl).to_dict(encode=True)
269-
270-
declared_license = cyclonedx.get_declared_licenses(licenses=component.licenses)
271-
272-
if external_references := cyclonedx.get_external_references(component):
273-
extra_data["externalReferences"] = external_references
274-
275-
if nested_components := component_data.get("nested_components"):
276-
extra_data["nestedComponents"] = nested_components
277-
278-
package_data = {
279-
"name": component.name,
280-
"extracted_license_statement": declared_license,
281-
"copyright": component.copyright,
282-
"version": component.version,
283-
"description": component.description,
284-
"extra_data": extra_data,
285-
**package_url_dict,
286-
**cyclonedx.get_checksums(component),
287-
**cyclonedx.get_properties_data(component),
288-
}
289-
290-
return {
291-
key: value for key, value in package_data.items() if value not in EMPTY_VALUES
292-
}
293-
294-
295-
def resolve_cyclonedx_packages(input_location):
296-
"""Resolve the packages from the `input_location` CycloneDX document file."""
297-
input_path = Path(input_location)
298-
cyclonedx_document = json.loads(input_path.read_text())
299-
300-
try:
301-
cyclonedx.validate_document(cyclonedx_document)
302-
except Exception as e:
303-
raise Exception(f'CycloneDX document "{input_path.name}" is not valid: {e}')
304-
305-
cyclonedx_bom = cyclonedx.get_bom(cyclonedx_document)
306-
components = cyclonedx.get_components(cyclonedx_bom)
307-
308-
return [cyclonedx_component_to_package_data(component) for component in components]
309-
310-
311259
def get_default_package_type(input_location):
312260
"""
313261
Return the package type associated with the provided `input_location`.
@@ -344,7 +292,7 @@ def get_default_package_type(input_location):
344292
sbom_registry = {
345293
"about": resolve_about_packages,
346294
"spdx": resolve_spdx_packages,
347-
"cyclonedx": resolve_cyclonedx_packages,
295+
"cyclonedx": cyclonedx.resolve_cyclonedx_packages,
348296
}
349297

350298

0 commit comments

Comments
 (0)