Skip to content

Commit 10fc4dc

Browse files
author
Shane Wright
committed
several bug fixes related to counting and component accuracy:
- Force encoding utf8 when opening file - Use the component URL from API call in the version query - resolves some situations where the KB version lookup fails - Update find_comp_in_bom to return the matching URL instead of True/False - Track unique BOM matches by tracking the matched component URL returned by find_comp_in_com - Track the count of skipped items from the SPDX - Make the unique package tracking more accurate - do not include skipped items - Create fall-through matching. First check BD component, then the purl info (rather than only checking the purl)
1 parent 7942d15 commit 10fc4dc

File tree

1 file changed

+45
-20
lines changed

1 file changed

+45
-20
lines changed

examples/client/parse_spdx.py

Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,19 @@
3939
- Check if project every had a "non-SBOM" scan and exit if so
4040
- Fix some invalid sort parameter formatting
4141
- Limit notification checking to last 24 hours
42+
1.3 2023-11-14 - Force encoding utf8 when opening file
43+
- Use the component URL from API call in the version query - resolves
44+
some situations where the KB version lookup fails
45+
- Update find_comp_in_bom to return the matching URL instead of
46+
True/False
47+
- Track unique BOM matches by tracking the matched component URL
48+
returned by find_comp_in_com
49+
- Track the count of skipped items from the SPDX
50+
- Make the unique package tracking more accurate - do not include skipped items
51+
- Create fall-through matching. First check BD component, then the purl info
52+
(rather than only checking the purl)
4253
4354
Requirements
44-
4555
- python3 version 3.8 or newer recommended
4656
- The following packages are used by the script and should be installed
4757
prior to use:
@@ -184,7 +194,7 @@ def spdx_validate(document):
184194
# Returns MIME type to provide to scan API
185195
# Input: filename to check
186196
def get_sbom_mime_type(filename):
187-
with open(filename, 'r') as f:
197+
with open(filename, 'r', encoding="utf8") as f:
188198
data = f.readlines()
189199
content = " ".join(data)
190200
if 'CycloneDX' in content:
@@ -448,8 +458,10 @@ def find_comp_id_in_kb(comp, ver):
448458
kb_match['versionName'] = "UNKNOWN"
449459
return kb_match
450460

461+
# Update the component url to match the one returned by API
462+
comp_url = json_data['_meta']['href']
451463
try:
452-
json_data = bd.get_json(f"/api/components/{comp}/versions/{ver}")
464+
json_data = bd.get_json(f"{comp_url}/versions/{ver}")
453465
except:
454466
# No component version match
455467
return None
@@ -466,7 +478,7 @@ def find_comp_id_in_kb(comp, ver):
466478
# compver - Component version to locate
467479
# projver - Project version to locate component in BOM
468480
#
469-
# Returns: True on success, False on failure
481+
# Returns: Component match URL on success, None on failure
470482
def find_comp_in_bom(compname, compver, projver):
471483
have_match = False
472484
num_match = 0
@@ -485,16 +497,16 @@ def find_comp_in_bom(compname, compver, projver):
485497
continue
486498
if compver == "UNKNOWN":
487499
# We did not have a version specified in the first place
488-
return True
500+
return comp['component']
489501
# Check component name + version name
490502
try:
491503
if comp['componentVersionName'].lower() == compver.lower():
492-
return True
504+
return comp['componentVersion']
493505
except:
494506
# Handle situation where it's missing the version name for some reason
495507
print(f"comp {compname} in BOM has no version!")
496-
return False
497-
return False
508+
return None
509+
return None
498510

499511
# Verifies if a custom component and version already exist in the system.
500512
#
@@ -718,8 +730,11 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
718730
package_count = 0
719731
cust_comp_count = 0
720732
cust_ver_count = 0
721-
# Used for tracking repeated package data
733+
skip_count = 0
734+
# Used for tracking repeated package data, not including skips
722735
packages = {}
736+
# Used for tracking unique BOM matches
737+
bom_packages = {}
723738
# Saved component data to write to file
724739
comps_out = []
725740

@@ -733,6 +748,7 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
733748
if package.name == "":
734749
# Strange case where the package name is empty. Skip it.
735750
logging.warning("WARNING: Skipping empty package name. Package info:")
751+
skip_count += 1
736752
pprint(package)
737753
continue
738754

@@ -749,10 +765,6 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
749765
matchver = package.version
750766
print(f"Processing SPDX package: {matchname} version: {matchver}...")
751767

752-
# Tracking unique package name + version combos from spdx file
753-
# This is only used for debugging and stats purposes
754-
packages[matchname+matchver] = packages.get(matchname+matchver, 0) + 1
755-
756768
kb_match = None
757769
if package.external_references:
758770
# Build dictionary of extrefs for easy access
@@ -762,11 +774,8 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
762774
reftype = ref.reference_type.lstrip("LocationRef-")
763775
extrefs[reftype] = ref.locator
764776

765-
if "purl" in extrefs:
766-
# purl is the preferred lookup
767-
kb_match = find_comp_in_kb(extrefs['purl'])
768-
extref = extrefs['purl']
769-
elif "BlackDuck-Component" in extrefs:
777+
if "BlackDuck-Component" in extrefs:
778+
# Prefer BD component lookup if available
770779
compid = normalize_id(extrefs['BlackDuck-Component'])
771780
try:
772781
verid = normalize_id(extrefs['BlackDuck-ComponentVersion'])
@@ -776,8 +785,18 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
776785
# Lookup by KB ID
777786
kb_match = find_comp_id_in_kb(compid, verid)
778787
extref = extrefs['BlackDuck-Component']
788+
if not kb_match:
789+
# BD comp lookup failed, so try purl instead
790+
if "purl" in extrefs:
791+
kb_match = find_comp_in_kb(extrefs['purl'])
792+
extref = extrefs['purl']
793+
elif "purl" in extrefs:
794+
# If no BD component details are available
795+
kb_match = find_comp_in_kb(extrefs['purl'])
796+
extref = extrefs['purl']
779797
elif "BlackDuck-Version" in extrefs:
780798
# Skip BD project/versions. These occur in BD-generated BOMs.
799+
skip_count += 1
781800
print(f" Skipping BD project/version in BOM: {package.name} {package.version}")
782801
continue
783802
else:
@@ -797,7 +816,10 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
797816
nopurl += 1
798817
print(f" No pURL provided for {package.name} {package.version}")
799818

800-
if find_comp_in_bom(matchname, matchver, version):
819+
bom_comp = find_comp_in_bom(matchname, matchver, version)
820+
if bom_comp:
821+
bom_packages[bom_comp] = bom_packages.get(bom_comp, 0) + 1
822+
packages[matchname+matchver] = packages.get(matchname+matchver, 0) + 1
801823
bom_matches += 1
802824
print(f" Found component in BOM: {matchname} {matchver}")
803825
continue
@@ -809,6 +831,7 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
809831
# - Do we need to add a version to an existing custom component?
810832
not_in_bom += 1
811833
print(f" Not present in BOM: {matchname} {matchver}")
834+
packages[matchname+matchver] = packages.get(matchname+matchver, 0) + 1
812835

813836
# Missing component data to write to a file for reference
814837
comp_data = {
@@ -862,16 +885,18 @@ def import_sbom(bdobj, projname, vername, spdxfile, outfile=None, \
862885
print(f" SPDX packages processed: {package_count}")
863886
# package_count above could have repeated packages in it
864887
print(f" Unique packages processed: {len(packages)}")
888+
print(f" Skipped: {skip_count}")
865889
print(f" Packages missing purl or KBID: {nopurl}")
866890
print(f" BOM matches: {bom_matches}")
891+
print(f" Unique BOM matches: {len(bom_packages)}")
867892
print(f" KB matches: {kb_matches}")
868893
print(f" Custom components created: {cust_comp_count}")
869894
print(f" Custom component versions created: {cust_ver_count}")
870895
print(f" Packages missing from BOM: {not_in_bom}")
871896
print(f" Custom components added to BOM: {cust_added_to_bom}")
872897
print(f" KB matches added to BOM: {kb_match_added_to_bom}")
873-
#for debugging
874898
#pprint(packages)
899+
#pprint(bom_packages)
875900

876901
if __name__ == "__main__":
877902
sys.exit(spdx_main_parse_args())

0 commit comments

Comments
 (0)