Skip to content

Commit 68291b1

Browse files
author
Shane Wright
committed
Add KB match on BD IDs
- Avoids adding unnecessary custom components - Various comment fixup - Removed stray unused variable - Make the stats a little more clear
1 parent 2f6a8f9 commit 68291b1

File tree

1 file changed

+89
-35
lines changed

1 file changed

+89
-35
lines changed

examples/client/parse_spdx.py

Lines changed: 89 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,6 @@
22
Created on August 15, 2023
33
@author: swright
44
5-
##################### DISCLAIMER ##########################
6-
## This script was created for a specific purpose and ##
7-
## SHOULD NOT BE USED as a general purpose utility. ##
8-
## For general purpose utility use ##
9-
## /examples/client/generate_sbom.py ##
10-
###########################################################
11-
125
Copyright (C) 2023 Synopsys, Inc.
136
http://www.blackducksoftware.com/
147
@@ -38,6 +31,10 @@
3831
3932
All missing components are saved to a file in JSON format for future reference.
4033
34+
Version History
35+
1.0 2023-09-26 Initial Release
36+
1.1 2023-10-13 Updates to improve component matching of BD Component IDs
37+
4138
Requirements
4239
4340
- python3 version 3.8 or newer recommended
@@ -394,6 +391,39 @@ def find_comp_in_kb(extref):
394391
# Fall through -- lookup failed
395392
return(None)
396393

394+
# Lookup the given BD Compononent Version in the BD KB.
395+
# Note: Match source will be one of: KB, CUSTOM, or KB_MODIFIED
396+
# Any of these should be should be acceptable
397+
#
398+
# Inputs:
399+
# Component UUID
400+
# Component Version UUID
401+
#
402+
# Returns:
403+
# kb_match dictionary that mimics the format returned by find_comp_in_kb:
404+
# keys are: componentName, versionName, version (url of component version)
405+
# No match returns None
406+
def find_comp_id_in_kb(comp, ver):
407+
kb_match = {}
408+
try:
409+
json_data = bd.get_json(f"/api/components/{comp}")
410+
except:
411+
# No component match
412+
return None
413+
kb_match['componentName'] = json_data['name']
414+
415+
try:
416+
json_data = bd.get_json(f"/api/components/{comp}/versions/{ver}")
417+
except:
418+
# No component version match
419+
return None
420+
kb_match['versionName'] = json_data['versionName']
421+
422+
# Add the url of the component-version
423+
kb_match['version'] = json_data['_meta']['href']
424+
425+
return kb_match
426+
397427
# Locate component name + version in BOM
398428
# Inputs:
399429
# compname - Component name to locate
@@ -586,6 +616,16 @@ def spdx_main_parse_args():
586616
import_sbom(bdobj, args.project_name, args.version_name, args.spdx_file, \
587617
args.out_file, args.license_name, args.spdx_validate)
588618

619+
# Normalize a BD UUID or URL in the extrefs section to be consistently formatted
620+
# Input: Black Duck component or version ID string from SPDX file
621+
# Output: UUID
622+
def normalize_id(id):
623+
# Strip trailing '/'
624+
id = id.rstrip('/')
625+
# Ensure only the UUID remains
626+
id = id.split('/')[-1]
627+
return id
628+
589629
# Main entry point
590630
#
591631
# Inputs:
@@ -635,7 +675,9 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
635675
bom_matches = 0
636676
kb_matches = 0
637677
nopurl = 0
638-
nomatch = 0
678+
not_in_bom = 0
679+
cust_added_to_bom = 0
680+
kb_match_added_to_bom = 0
639681
package_count = 0
640682
cust_comp_count = 0
641683
cust_ver_count = 0
@@ -647,9 +689,9 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
647689
# Walk through each component in the SPDX file
648690
for package in document.packages:
649691
package_count += 1
650-
# We hope we'll have an external reference (pURL), but we might not.
692+
# We hope we'll have an external reference (pURL or KBID), but it
693+
# is possible to have neither.
651694
extref = None
652-
purlmatch = False
653695

654696
if package.name == "":
655697
# Strange case where the package name is empty. Skip it.
@@ -671,27 +713,37 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
671713
print(f"Processing SPDX package: {matchname} version: {matchver}...")
672714

673715
# Tracking unique package name + version combos from spdx file
716+
# This is only used for debugging and stats purposes
674717
packages[matchname+matchver] = packages.get(matchname+matchver, 0) + 1
675718

676719
kb_match = None
677-
bd_proj = False
678720
if package.external_references:
679-
foundpurl = False
721+
# Build dictionary of extrefs for easy access
722+
extrefs = {}
680723
for ref in package.external_references:
681-
# There can be multiple extrefs; try to locate a pURL.
682-
# If there are multiple pURLs, use the first one.
683-
if (ref.reference_type == "purl"):
684-
foundpurl = True
685-
kb_match = find_comp_in_kb(ref.locator)
686-
extref = ref.locator
687-
break
724+
# Older BD release prepend this string; strip it
725+
reftype = ref.reference_type.lstrip("LocationRef-")
726+
extrefs[reftype] = ref.locator
727+
728+
if "purl" in extrefs:
729+
# purl is the preferred lookup
730+
kb_match = find_comp_in_kb(ref.locator)
731+
extref = ref.locator
732+
elif "BlackDuck-Component" in extrefs:
733+
compid = normalize_id(extrefs['BlackDuck-Component'])
734+
verid = normalize_id(extrefs['BlackDuck-ComponentVersion'])
735+
736+
# Lookup by KB ID
737+
kb_match = find_comp_id_in_kb(compid, verid)
738+
extref = extrefs['BlackDuck-Component']
739+
elif "BlackDuck-Version" in extrefs:
688740
# Skip BD project/versions. These occur in BD-generated BOMs.
689-
if (ref.reference_type == "BlackDuck-Version"):
690-
bd_proj = True
691-
break
692-
if not foundpurl:
741+
print(f" Skipping BD project/version in BOM: {package.name} {package.version}")
742+
continue
743+
else:
693744
nopurl += 1
694-
print(f" No pURL provided for {package.name} {package.version}")
745+
print(f" No pURL or KB ID provided for {package.name} {package.version}")
746+
695747
if (kb_match):
696748
# Update package name and version to reflect the KB name/ver
697749
print(f" KB match for {package.name} {package.version}")
@@ -701,25 +753,21 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
701753
else:
702754
print(f" No KB match for {package.name} {package.version}")
703755
else:
704-
# No external references field was provided
756+
# No external references field was provide
705757
nopurl += 1
706758
print(f" No pURL provided for {package.name} {package.version}")
707759

708-
if bd_proj:
709-
print(f" Skipping BD project/version in BOM: {package.name} {package.version}")
710-
continue
711-
712760
if find_comp_in_bom(matchname, matchver, version):
713761
bom_matches += 1
714762
print(f" Found component in BOM: {matchname} {matchver}")
715763
continue
716764

717765
# If we've gotten this far, the package is not in the BOM.
718766
# Now we need to figure out:
719-
# - Is it already in the KB and we need to add it? (should be rare)
767+
# - Is it already in the KB and we need to add it?
720768
# - Do we need to add a custom component?
721769
# - Do we need to add a version to an existing custom component?
722-
nomatch += 1
770+
not_in_bom += 1
723771
print(f" Not present in BOM: {matchname} {matchver}")
724772

725773
# Missing component data to write to a file for reference
@@ -733,7 +781,9 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
733781

734782
# KB match was successful, but it wasn't in the BOM for some reason
735783
if kb_match:
736-
print(f" WARNING: {matchname} {matchver} in KB but not in SBOM")
784+
kb_match_added_to_bom += 1
785+
print(f" WARNING: {matchname} {matchver} found in KB but not in SBOM - adding it")
786+
# kb_match['version'] contains the url of the component-version to add
737787
add_to_sbom(proj_version_url, kb_match['version'])
738788
# short-circuit the rest
739789
continue
@@ -759,6 +809,7 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
759809
assert(comp_ver_url), f"No component URL found for {package.name} {package.version}"
760810

761811
print(f" Adding component to SBOM: {package.name} aka {matchname} {package.version}")
812+
cust_added_to_bom += 1
762813
add_to_sbom(proj_version_url, comp_ver_url)
763814

764815
# Save unmatched components
@@ -769,15 +820,18 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
769820
print("\nStats: ")
770821
print("------")
771822
print(f" SPDX packages processed: {package_count}")
772-
print(f" Packages missing from BOM: {nomatch}")
823+
# package_count above could have repeated packages in it
824+
print(f" Unique packages processed: {len(packages)}")
825+
print(f" Packages missing purl or KBID: {nopurl}")
773826
print(f" BOM matches: {bom_matches}")
774827
print(f" KB matches: {kb_matches}")
775-
print(f" Packages missing purl: {nopurl}")
776828
print(f" Custom components created: {cust_comp_count}")
777829
print(f" Custom component versions created: {cust_ver_count}")
830+
print(f" Packages missing from BOM: {not_in_bom}")
831+
print(f" Custom components added to BOM: {cust_added_to_bom}")
832+
print(f" KB matches added to BOM: {kb_match_added_to_bom}")
778833
#for debugging
779834
#pprint(packages)
780-
print(f" {len(packages)} unique packages processed")
781835

782836
if __name__ == "__main__":
783837
sys.exit(spdx_main_parse_args())

0 commit comments

Comments
 (0)