Skip to content

Commit e9b15e3

Browse files
authored
Upgrade CycloneDX output to schema v1.5 #807 (#1057)
Signed-off-by: tdruez <tdruez@nexb.com>
1 parent e4dab19 commit e9b15e3

File tree

12 files changed

+544
-86
lines changed

12 files changed

+544
-86
lines changed

CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ Unreleased
1717
compatible with the old names.
1818
https://github.com/nexB/scancode.io/issues/1044
1919

20+
- Generate CycloneDX SBOM in 1.5 spec format, migrated from 1.4 previously.
21+
The Package vulnerabilities are now included in the CycloneDX SBOM when available.
22+
https://github.com/nexB/scancode.io/issues/807
23+
2024
v33.0.0 (2024-01-16)
2125
--------------------
2226

scanpipe/models.py

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@
6666
from commoncode.fileutils import parent_directory
6767
from cyclonedx import model as cyclonedx_model
6868
from cyclonedx.model import component as cyclonedx_component
69+
from cyclonedx.model import license as cyclonedx_license
6970
from extractcode import EXTRACT_SUFFIX
70-
from formattedcode.output_cyclonedx import CycloneDxExternalRef
7171
from licensedcode.cache import build_spdx_license_expression
7272
from licensedcode.cache import get_licensing
7373
from matchcode_toolkit.fingerprinting import IGNORED_DIRECTORY_FINGERPRINTS
@@ -3062,9 +3062,9 @@ def as_cyclonedx(self):
30623062
"""Return this DiscoveredPackage as an CycloneDX Component entry."""
30633063
licenses = []
30643064
if expression_spdx := self.get_declared_license_expression_spdx():
3065-
licenses = [
3066-
cyclonedx_model.LicenseChoice(license_expression=expression_spdx),
3067-
]
3065+
# Using the LicenseExpression directly as the make_with_expression method
3066+
# does not support the "LicenseRef-" keys.
3067+
licenses = [cyclonedx_license.LicenseExpression(value=expression_spdx)]
30683068

30693069
hash_fields = {
30703070
"md5": cyclonedx_model.HashAlgorithm.MD5,
@@ -3073,7 +3073,7 @@ def as_cyclonedx(self):
30733073
"sha512": cyclonedx_model.HashAlgorithm.SHA_512,
30743074
}
30753075
hashes = [
3076-
cyclonedx_model.HashType(algorithm=algorithm, hash_value=hash_value)
3076+
cyclonedx_model.HashType(alg=algorithm, content=hash_value)
30773077
for field_name, algorithm in hash_fields.items()
30783078
if (hash_value := getattr(self, field_name))
30793079
]
@@ -3097,25 +3097,53 @@ def as_cyclonedx(self):
30973097
if (value := getattr(self, field_name)) not in EMPTY_VALUES
30983098
]
30993099

3100-
cyclonedx_url_to_type = CycloneDxExternalRef.cdx_url_type_by_scancode_field
3100+
reference_type = cyclonedx_model.ExternalReferenceType
3101+
url_field_to_cdx_type = {
3102+
"api_data_url": reference_type.BOM,
3103+
"bug_tracking_url": reference_type.ISSUE_TRACKER,
3104+
"code_view_url": reference_type.OTHER,
3105+
"download_url": reference_type.DISTRIBUTION,
3106+
"homepage_url": reference_type.WEBSITE,
3107+
"repository_download_url": reference_type.DISTRIBUTION,
3108+
"repository_homepage_url": reference_type.WEBSITE,
3109+
"vcs_url": reference_type.VCS,
3110+
}
31013111
external_references = [
3102-
cyclonedx_model.ExternalReference(reference_type=reference_type, url=url)
3103-
for field_name, reference_type in cyclonedx_url_to_type.items()
3112+
cyclonedx_model.ExternalReference(type=reference_type, url=url)
3113+
for field_name, reference_type in url_field_to_cdx_type.items()
31043114
if (url := getattr(self, field_name)) and field_name not in property_fields
31053115
]
31063116

3107-
purl = self.package_url
3117+
# Always use the package_uid when available to ensure having unique
3118+
# package_url in the BOM when several instances of the same DiscoveredPackage
3119+
# (i.e. same purl) are present in the project.
3120+
try:
3121+
package_url = PackageURL.from_string(self.package_uid)
3122+
except ValueError:
3123+
package_url = self.get_package_url()
3124+
3125+
evidence = None
3126+
if self.other_license_expression_spdx:
3127+
evidence = cyclonedx_component.ComponentEvidence(
3128+
licenses=[
3129+
cyclonedx_license.LicenseExpression(
3130+
value=self.other_license_expression_spdx
3131+
)
3132+
],
3133+
)
3134+
31083135
return cyclonedx_component.Component(
31093136
name=self.name,
31103137
version=self.version,
3111-
bom_ref=self.package_uid or str(self.uuid),
3112-
purl=purl,
3138+
bom_ref=str(package_url),
3139+
purl=package_url,
31133140
licenses=licenses,
3114-
copyright_=self.copyright,
3141+
copyright=self.copyright,
31153142
description=self.description,
31163143
hashes=hashes,
31173144
properties=properties,
31183145
external_references=external_references,
3146+
evidence=evidence,
31193147
)
31203148

31213149

scanpipe/pipes/cyclonedx.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,11 @@ def get_checksums(component):
117117
"SHA-256": "sha256",
118118
"SHA-512": "sha512",
119119
}
120+
120121
return {
121-
algorithm_map_cdx_scio[algo_hash.alg.value]: algo_hash.content.__root__
122+
algorithm_map_cdx_scio[algo_hash.alg]: algo_hash.content
122123
for algo_hash in component.hashes
123-
if algo_hash.alg.value in algorithm_map_cdx_scio
124+
if algo_hash.alg in algorithm_map_cdx_scio
124125
}
125126

126127

@@ -132,7 +133,7 @@ def get_external_references(component):
132133

133134
references = defaultdict(list)
134135
for reference in external_references:
135-
references[reference.type.value].append(reference.url)
136+
references[reference.type].append(reference.url)
136137

137138
return dict(references)
138139

scanpipe/pipes/output.py

Lines changed: 124 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# Visit https://github.com/nexB/scancode.io for support and download.
2222

2323
import csv
24+
import decimal
2425
import json
2526
import re
2627
from operator import attrgetter
@@ -34,9 +35,13 @@
3435

3536
import saneyaml
3637
import xlsxwriter
37-
from cyclonedx import output as cyclonedx_output
38-
from cyclonedx.model import bom as cyclonedx_bom
39-
from cyclonedx.model import component as cyclonedx_component
38+
from cyclonedx.model import bom as cdx_bom
39+
from cyclonedx.model import component as cdx_component
40+
from cyclonedx.model import vulnerability as cdx_vulnerability
41+
from cyclonedx.output import OutputFormat
42+
from cyclonedx.output import make_outputter
43+
from cyclonedx.schema import SchemaVersion
44+
from cyclonedx.validation.json import JsonStrictValidator
4045
from license_expression import Licensing
4146
from license_expression import ordered_unique
4247
from licensedcode.cache import build_spdx_license_expression
@@ -579,65 +584,156 @@ def to_spdx(project, include_files=False):
579584
return output_file
580585

581586

587+
def vulnerability_as_cyclonedx(vulnerability_data, component_bom_ref):
588+
affects = [cdx_vulnerability.BomTarget(ref=f"urn:cdx:{component_bom_ref}")]
589+
590+
source = cdx_vulnerability.VulnerabilitySource(
591+
name="VulnerableCode",
592+
url=vulnerability_data.get("url"),
593+
)
594+
595+
references = []
596+
ratings = []
597+
for reference in vulnerability_data.get("references", []):
598+
source = cdx_vulnerability.VulnerabilitySource(
599+
url=reference.get("reference_url"),
600+
)
601+
602+
references.append(
603+
cdx_vulnerability.VulnerabilityReference(
604+
id=reference.get("reference_id"),
605+
source=source,
606+
)
607+
)
608+
609+
for score_entry in reference.get("scores", []):
610+
# CycloneDX only support a float value for the score field,
611+
# where on the VulnerableCode data it can be either a score float value
612+
# or a severity string value.
613+
score_value = score_entry.get("value")
614+
try:
615+
score = decimal.Decimal(score_value)
616+
severity = None
617+
except decimal.DecimalException:
618+
score = None
619+
severity = getattr(
620+
cdx_vulnerability.VulnerabilitySeverity,
621+
score_value.upper(),
622+
None,
623+
)
624+
625+
ratings.append(
626+
cdx_vulnerability.VulnerabilityRating(
627+
source=source,
628+
score=score,
629+
severity=severity,
630+
# Providing a value for method raise a AssertionError
631+
# method=score_entry.get("scoring_system"),
632+
vector=score_entry.get("scoring_elements"),
633+
)
634+
)
635+
636+
cwes = [
637+
weakness.get("cwe_id") for weakness in vulnerability_data.get("weaknesses", [])
638+
]
639+
640+
return cdx_vulnerability.Vulnerability(
641+
id=vulnerability_data.get("vulnerability_id"),
642+
source=source,
643+
description=vulnerability_data.get("summary"),
644+
affects=affects,
645+
references=references,
646+
cwes=cwes,
647+
ratings=ratings,
648+
)
649+
650+
582651
def get_cyclonedx_bom(project):
583652
"""
584653
Return a CycloneDX `Bom` object filled with provided `project` data.
585654
See https://cyclonedx.org/use-cases/#dependency-graph
586655
"""
587-
components = [
588-
*get_queryset(project, "discoveredpackage"),
589-
]
590-
591-
cyclonedx_components = [component.as_cyclonedx() for component in components]
592-
593-
bom = cyclonedx_bom.Bom(components=cyclonedx_components)
594-
595-
project_as_cyclonedx = cyclonedx_component.Component(
656+
project_as_root_component = cdx_component.Component(
596657
name=project.name,
597658
bom_ref=str(project.uuid),
598659
)
599660

600-
project_as_cyclonedx.dependencies.update(
601-
[component.bom_ref for component in cyclonedx_components]
602-
)
603-
604-
bom.metadata = cyclonedx_bom.BomMetaData(
605-
component=project_as_cyclonedx,
661+
bom = cdx_bom.Bom()
662+
bom.metadata = cdx_bom.BomMetaData(
663+
component=project_as_root_component,
606664
tools=[
607-
cyclonedx_bom.Tool(
665+
cdx_bom.Tool(
608666
name="ScanCode.io",
609667
version=scancodeio_version,
610668
)
611669
],
612670
properties=[
613-
cyclonedx_bom.Property(
671+
cdx_bom.Property(
614672
name="notice",
615673
value=SCAN_NOTICE,
616674
)
617675
],
618676
)
619677

678+
components = []
679+
vulnerabilities = []
680+
for package in get_queryset(project, "discoveredpackage"):
681+
component = package.as_cyclonedx()
682+
components.append(component)
683+
684+
for vulnerability_data in package.affected_by_vulnerabilities:
685+
vulnerabilities.append(
686+
vulnerability_as_cyclonedx(
687+
vulnerability_data=vulnerability_data,
688+
component_bom_ref=component.bom_ref,
689+
)
690+
)
691+
692+
for component in components:
693+
bom.components.add(component)
694+
bom.register_dependency(project_as_root_component, [component])
695+
696+
bom.vulnerabilities = vulnerabilities
697+
620698
return bom
621699

622700

623-
def to_cyclonedx(project):
701+
def sort_bom_with_schema_ordering(bom_as_dict, schema_version):
702+
"""Sort the ``bom_as_dict`` using the ordering from the ``schema_version``."""
703+
schema_file = JsonStrictValidator(schema_version)._schema_file
704+
with open(schema_file) as sf:
705+
schema_dict = json.loads(sf.read())
706+
707+
order_from_schema = list(schema_dict.get("properties", {}).keys())
708+
ordered_dict = {
709+
key: bom_as_dict.get(key) for key in order_from_schema if key in bom_as_dict
710+
}
711+
712+
return json.dumps(ordered_dict, indent=2)
713+
714+
715+
def to_cyclonedx(project, schema_version=SchemaVersion.V1_5):
624716
"""
625717
Generate output for the provided ``project`` in CycloneDX BOM format.
626718
The output file is created in the ``project`` "output/" directory.
627719
Return the path of the generated output file.
628720
"""
629721
output_file = project.get_output_file_path("results", "cdx.json")
630722

631-
cyclonedx_bom = get_cyclonedx_bom(project)
723+
bom = get_cyclonedx_bom(project)
724+
json_outputter = make_outputter(bom, OutputFormat.JSON, schema_version)
632725

633-
outputter = cyclonedx_output.get_instance(
634-
bom=cyclonedx_bom,
635-
output_format=cyclonedx_output.OutputFormat.JSON,
636-
)
726+
# Using the internal API in place of the output_as_string() method to avoid
727+
# a round of deserialization/serialization while fixing the field ordering.
728+
json_outputter.generate()
729+
bom_as_dict = json_outputter._bom_json
730+
731+
# The default order out of the outputter is not great, the following sorts the
732+
# bom using the order from the schema.
733+
sorted_json = sort_bom_with_schema_ordering(bom_as_dict, schema_version)
637734

638-
bom_json = outputter.output_as_string()
639735
with output_file.open("w") as file:
640-
file.write(bom_json)
736+
file.write(sorted_json)
641737

642738
return output_file
643739

scanpipe/tests/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ def make_resource_directory(project, path, **extra):
137137
),
138138
"declared_license_expression": "gpl-2.0 AND gpl-2.0-plus",
139139
"declared_license_expression_spdx": "GPL-2.0-only AND GPL-2.0-or-later",
140+
"other_license_expression": "apache-2.0 AND (mpl-1.1 OR gpl-2.0)",
141+
"other_license_expression_spdx": "Apache-2.0 AND (MPL-1.1 OR GPL-2.0)",
140142
"extracted_license_statement": "",
141143
"notice_text": "Notice\nText",
142144
"root_path": None,

0 commit comments

Comments
 (0)