Skip to content

Commit dbe89da

Browse files
committed
v1.4 - new matching model and options
1 parent 14c8ee2 commit dbe89da

File tree

8 files changed

+195
-72
lines changed

8 files changed

+195
-72
lines changed

bd_sig_filter/BOMClass.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,19 @@ def report_full(self):
7171
print(tabulate(table, headers=["Component", "Match Type", "Ignored", "Reviewed", "To be Ignored",
7272
"To be Reviewed", "Action"]))
7373
print()
74+
if global_values.report_file != '':
75+
with open(global_values.report_file, "a") as rfile:
76+
# Writing data to a file
77+
rfile.writelines(tabulate(table, headers=["Component", "Match Type", "Ignored", "Reviewed", "To be Ignored",
78+
"To be Reviewed", "Action"]))
79+
rfile.writelines("")
80+
81+
def report_unmatched(self):
82+
data = self.complist.get_unmatched_list()
83+
data = "UNMATCHED COMPONENTS:\n" + data
84+
print(data)
85+
if global_values.report_file != '':
86+
with open(global_values.report_file, "a") as rfile:
87+
# Writing data to a file
88+
rfile.writelines(data)
89+

bd_sig_filter/ComponentClass.py

Lines changed: 66 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ def __init__(self, name, version, data):
1818
self.sig_match_result = -1
1919
self.compname_found = False
2020
self.compver_found = False
21-
self.reason = 'No Action - compname or version not found in Sig paths'
21+
self.reason = 'No Action'
2222
self.best_sigpath = ''
23+
self.oriname_arr = self.get_origin_compnames()
24+
self.unmatched = False
2325

2426
def get_compverid(self):
2527
try:
@@ -54,7 +56,7 @@ def is_signature(self):
5456
return False
5557

5658
def is_only_signature(self):
57-
return (not self.is_dependency() and self.is_signature())
59+
return not self.is_dependency() and self.is_signature()
5860

5961
def set_ignore(self):
6062
self.ignore = True
@@ -80,6 +82,7 @@ def is_ignored(self):
8082

8183
def process_signatures(self):
8284
all_paths_ignoreable = True
85+
unmatched = False
8386
reason = ''
8487
for sigentry in self.sigentry_arr:
8588
ignore, reason = sigentry.filter_folders()
@@ -100,23 +103,33 @@ def process_signatures(self):
100103
self.sig_match_result = 0
101104
set_reviewed = False
102105
ignore = True
106+
unmatched = True
107+
reason = f"No Action - component name '{self.oriname_arr}' not found in signature paths"
103108
for sigentry in self.sigentry_arr:
109+
# compname_found, compver_found,\
110+
# new_match_result = sigentry.search_component(self.filter_name, self.filter_version)
104111
compname_found, compver_found,\
105-
new_match_result = sigentry.search_component(self.filter_name, self.filter_version)
112+
new_match_result = sigentry.search_component(self.oriname_arr, self.filter_version)
106113
logging.debug(f"Compname in path {compname_found}, Version in path {compver_found}, "
107114
f"Match result {new_match_result}, Path '{sigentry.path}'")
115+
108116
if compver_found:
109117
self.compver_found = True
110118
ignore = False
119+
unmatched = False
111120
if compname_found:
112121
self.compname_found = True
113122
if global_values.version_match_reqd:
114123
if compver_found:
115124
set_reviewed = True
116125
ignore = False
126+
unmatched = False
127+
else:
128+
reason = f"No Action - component version {self.filter_version} not found (and required because --version_match_reqd set)"
117129
elif compname_found:
118130
set_reviewed = True
119131
ignore = False
132+
unmatched = False
120133
if new_match_result > self.sig_match_result:
121134
self.sig_match_result = new_match_result
122135
self.best_sigpath = sigentry.path
@@ -125,14 +138,17 @@ def process_signatures(self):
125138
if self.compver_found:
126139
reason = f"Mark REVIEWED - Compname & version in path '{self.best_sigpath}', Match result {self.sig_match_result}"
127140
elif self.compname_found:
128-
reason = f"Mark REVIEWED - Compname in path '{self.best_sigpath}', Match result {self.sig_match_result}"
141+
reason = f"Mark REVIEWED - Compname {self.oriname_arr} in path '{self.best_sigpath}', Match result {self.sig_match_result}"
129142

130-
self.reason = reason
131143
logging.debug(f"- Component {self.name}/{self.version}: {reason}")
132144
self.set_reviewed()
133-
if ignore and global_values.ignore_no_path_matches:
134-
self.set_ignore()
135-
self.reason = f"Mark IGNORED - compname or version not found in paths & --ignore_no_path_matches set"
145+
unmatched = False
146+
if ignore and global_values.ignore_no_path_matches:
147+
self.set_ignore()
148+
reason = f"Mark IGNORED - compname or version not found in paths & --ignore_no_path_matches set"
149+
150+
self.reason = reason
151+
self.unmatched = unmatched
136152

137153
@staticmethod
138154
def filter_name_string(name):
@@ -148,7 +164,7 @@ def filter_name_string(name):
148164
ret_name = re.sub(r"[/@#:]", " ", ret_name)
149165
ret_name = re.sub(r" \w$| \w |^\w ", r" ", ret_name)
150166
ret_name = ret_name.replace("::", " ")
151-
ret_name = re.sub(r" *", r" ", ret_name)
167+
ret_name = re.sub(r" +", r" ", ret_name)
152168
ret_name = re.sub(r"^ ", r"", ret_name)
153169
ret_name = re.sub(r" $", r"", ret_name)
154170

@@ -160,9 +176,11 @@ def filter_version_string(version):
160176
# Remove +git*
161177
# Remove -snapshot*
162178
# Replace / with space
163-
ret_version = re.sub(r"\+git.*", r"", version, re.IGNORECASE)
164-
ret_version = re.sub(r"-snapshot.*", r"", ret_version, re.IGNORECASE)
179+
ret_version = re.sub(r"\+git.*", r"", version, flags=re.IGNORECASE)
180+
ret_version = re.sub(r"-snapshot.*", r"", ret_version, flags=re.IGNORECASE)
165181
ret_version = re.sub(r"/", r" ", ret_version)
182+
ret_version = re.sub(r"^v", r"", ret_version, flags=re.IGNORECASE)
183+
ret_version = re.sub(r"\+*", r"", ret_version, flags=re.IGNORECASE)
166184
return ret_version
167185

168186
def get_compid(self):
@@ -171,3 +189,40 @@ def get_compid(self):
171189
return compurl.split('/')[-1]
172190
except KeyError:
173191
return ''
192+
193+
def print_origins(self):
194+
try:
195+
for ori in self.data['origins']:
196+
print(f"Comp '{self.name}/{self.version}' Origin '{ori['externalId']}' Name '{ori['name']}'")
197+
except KeyError:
198+
print(f"Comp '{self.name}/{self.version}' No Origin")
199+
200+
def get_origin_compnames(self):
201+
compnames_arr = []
202+
try:
203+
for ori_entry in self.data['origins']:
204+
ori = ori_entry['externalId']
205+
ori_ver = ori_entry['name']
206+
ori_string = ori.replace(f"{ori_ver}", '')
207+
arr = re.split(r"[:/#]", ori_string)
208+
new_name = arr[-2]
209+
if new_name not in compnames_arr:
210+
logging.debug(
211+
f"Comp '{self.name}/{self.version}' Compname calculate from origin '{arr[-2]}' - origin='{ori}'")
212+
compnames_arr.append(arr[-2])
213+
if self.filter_name.find(' ') == -1:
214+
# Single word component name
215+
if self.filter_name not in compnames_arr:
216+
compnames_arr.append(self.filter_name)
217+
except (KeyError, IndexError):
218+
logging.debug(f"Comp '{self.name}/{self.version}' Compname calculate from compname only '{self.name}'")
219+
compnames_arr.append(self.filter_name)
220+
return compnames_arr
221+
222+
def get_sigpaths(self):
223+
data = ''
224+
count = 0
225+
for sigentry in self.sigentry_arr:
226+
data += f"{sigentry.get_sigpath()}\n"
227+
count += 1
228+
return data

bd_sig_filter/ComponentListClass.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,20 @@ def process(self):
8787
for comp in self.components:
8888
if comp.is_ignored():
8989
continue
90+
# DEBUG
91+
# if comp.is_only_signature():
92+
# arr = comp.get_origin_compnames()
93+
# print(f"names '{arr}")
94+
# comp.print_sigpaths()
95+
# continue
96+
# END DEBUG
9097
if comp.is_dependency():
9198
comp.set_reviewed()
9299
comp.reason = "Mark REVIEWED - Dependency"
93100
elif comp.is_only_signature():
94101
comp.process_signatures()
102+
else:
103+
comp.reason = 'No action - not Signature match'
95104

96105
# look for duplicate components (same compid) and ignore
97106
logging.debug("\nDUPLICATE SIGNATURE MATCHES FILTER PHASE")
@@ -281,3 +290,11 @@ def get_component_report_data(self):
281290
comp.ignore, comp.mark_reviewed, comp.reason])
282291
return data
283292

293+
def get_unmatched_list(self):
294+
data = ''
295+
for comp in self.components:
296+
if comp.unmatched:
297+
paths = comp.get_sigpaths()
298+
orinames = ','.join(comp.oriname_arr)
299+
data += f"Comp: {comp.name}/{comp.version} (Origin names={orinames}):\n{paths}"
300+
return data

bd_sig_filter/SigEntryClass.py

Lines changed: 86 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ def __init__(self, src_entry):
1616
except KeyError:
1717
return
1818

19-
def search_component(self, compname, compver):
20-
logging.debug("")
21-
logging.debug(f"search_component() Checking Comp '{compname}/{compver}' - {self.path}:")
19+
def search_component(self, compname_arr, compver):
20+
# logging.debug("")
21+
# logging.debug(f"search_component() Checking Comp '{compname}/{compver}' - {self.path}:")
2222
# If component_version_reqd:
2323
# - folder matches compname and compver
2424
# - folder1 matches compname and folder2 matches compver
@@ -29,76 +29,103 @@ def search_component(self, compname, compver):
2929
# Bool2 - version found
3030
# Match_value - search result against both
3131

32-
compstring = f"{compname} {compver}"
33-
element_in_compname = 0
34-
compver_in_element = 0
3532

36-
# test of path search
37-
newpath = self.path.replace(os.sep, " ")
38-
newpath = re.sub(r"([a-zA-Z-]*)[0-9] ", "\1 ", newpath)
39-
comp_in_path = fuzz.token_set_ratio(compstring, newpath)
40-
logging.debug(f"search_component(): TEST comp_in_path is {comp_in_path}: path='{self.path}")
33+
best_match_name = 0
34+
best_match_ver = 0
35+
# match_path = ''
36+
for cname in compname_arr:
37+
# compstring = f"{cname} {compver}"
4138

42-
found_compname_only = False
43-
for element in self.elements:
44-
pos = re.search(r"\.dll|\.obj|\.o|\.a|\.lib|\.iso|\.qcow2|\.vmdk|\.vdi|\.ova|\.nbi|\.vib|\.exe|\.img|"
45-
"\.bin|\.apk|\.aac|\.ipa|\.msi|\.zip|\.gz|\.tar|\.xz|\.lz|\.bz2|\.7z|\.rar|"
46-
"\.cpio|\.Z|\.lz4|\.lha|\.arj|\.jar|\.ear|\.war|\.rpm|\.deb|\.dmg|\.pki", element)
47-
if pos is not None:
48-
element = element[:pos.start()]
49-
# How much of the element string is from the compname and version?
50-
# - for example acl-1.3.0.jar
51-
# - Value of 100 indicates either compname or version exists in element
52-
element_in_compstring = fuzz.token_set_ratio(element, compstring)
53-
element_in_compname = fuzz.token_set_ratio(element, compname)
54-
compver_in_element = fuzz.token_set_ratio(compver, element)
39+
# test of path search
40+
newpath = self.path.replace(os.sep, " ")
41+
# comp_in_path = fuzz.token_set_ratio(compstring, newpath)
42+
compname_in_path = fuzz.token_set_ratio(cname, newpath)
43+
compver_in_path = fuzz.token_set_ratio(compver, newpath)
44+
if compname_in_path + compver_in_path > 90:
45+
if compname_in_path + compver_in_path > best_match_name + best_match_ver:
46+
best_match_name = compname_in_path
47+
best_match_ver = compver_in_path
48+
# match_path = self.path
49+
logging.debug(f"search_component(): TEST '{cname}/{compver}' - {compname_in_path,compver_in_path}: path='{self.path}")
5550

56-
if element_in_compstring > 80:
57-
if compver_in_element > 50:
58-
# element has both compname and version
59-
logging.debug(f"search_component() - MATCHED component name & version ({compstring}) in '{element}'")
60-
return True, True, element_in_compname + compver_in_element
61-
elif element_in_compname > 50 and len(element) > 2:
62-
found_compname_only = True
63-
logging.debug(f"search_component() - FOUND component name ONLY ({compname}) in '{element}'")
64-
elif found_compname_only:
65-
if compver_in_element > 50:
66-
logging.debug(f"search_component() - MATCHED component version ({compver}) in '{element}'")
67-
return True, True, element_in_compname + compver_in_element
68-
else:
69-
test = 1
51+
name_bool = False
52+
ver_bool = False
53+
if best_match_name > 45:
54+
name_bool = True
55+
if best_match_ver > 45:
56+
ver_bool = True
7057

71-
if found_compname_only:
72-
logging.debug("search_component() - MATCHED Compname only")
73-
return True, False, element_in_compname + compver_in_element
74-
75-
logging.debug(f"search_component() - NOT MATCHED")
76-
return False, False, 0
58+
return name_bool, ver_bool, best_match_name + best_match_ver
59+
# compstring = f"{compname} {compver}"
60+
# element_in_compname = 0
61+
# compver_in_element = 0
62+
# found_compname_only = False
63+
# for element in self.elements:
64+
# pos = re.search(r"\.dll|\.obj|\.o|\.a|\.lib|\.iso|\.qcow2|\.vmdk|\.vdi|\.ova|\.nbi|\.vib|\.exe|\.img|"
65+
# "\.bin|\.apk|\.aac|\.ipa|\.msi|\.zip|\.gz|\.tar|\.xz|\.lz|\.bz2|\.7z|\.rar|"
66+
# "\.cpio|\.Z|\.lz4|\.lha|\.arj|\.jar|\.ear|\.war|\.rpm|\.deb|\.dmg|\.pki", element)
67+
# if pos is not None:
68+
# element = element[:pos.start()]
69+
# # How much of the element string is from the compname and version?
70+
# # - for example acl-1.3.0.jar
71+
# # - Value of 100 indicates either compname or version exists in element
72+
# element_in_compstring = fuzz.token_set_ratio(element, compstring)
73+
# element_in_compname = fuzz.token_set_ratio(element, compname)
74+
# compver_in_element = fuzz.token_set_ratio(compver, element)
75+
#
76+
# if element_in_compstring > 80:
77+
# if compver_in_element > 50:
78+
# # element has both compname and version
79+
# logging.debug(f"search_component() - MATCHED component name & version ({compstring}) in '{element}'")
80+
# return True, True, element_in_compname + compver_in_element
81+
# elif element_in_compname > 50 and len(element) > 2:
82+
# found_compname_only = True
83+
# logging.debug(f"search_component() - FOUND component name ONLY ({compname}) in '{element}'")
84+
# elif found_compname_only:
85+
# if compver_in_element > 50:
86+
# logging.debug(f"search_component() - MATCHED component version ({compver}) in '{element}'")
87+
# return True, True, element_in_compname + compver_in_element
88+
# else:
89+
# test = 1
90+
#
91+
# if found_compname_only:
92+
# logging.debug("search_component() - MATCHED Compname only")
93+
# return True, False, element_in_compname + compver_in_element
94+
#
95+
# logging.debug(f"search_component() - NOT MATCHED")
7796

7897

7998
def filter_folders(self):
8099
# Return True if path should be ignored + reason
81100
if not global_values.no_ignore_synopsys:
82-
syn_folders = ['.synopsys', 'synopsys-detect', '.coverity', 'synopsys-detect.jar',
83-
'scan.cli.impl-standalone.jar', 'seeker-agent.tgz', 'seeker-agent.zip',
84-
'Black_Duck_Scan_Installation']
85-
for e in self.elements:
86-
if e in syn_folders:
87-
return True, f"Found '{e}' in Signature match path '{self.path}'"
101+
# syn_folders = ['.synopsys', 'synopsys-detect', '.coverity', 'synopsys-detect.jar',
102+
# 'scan.cli.impl-standalone.jar', 'seeker-agent.tgz', 'seeker-agent.zip',
103+
# 'Black_Duck_Scan_Installation']
104+
105+
syn_folders_re = (f"{os.sep}(\.synopsys|synopsys-detect|\.coverity|synopsys-detect.*\.jar|scan\.cli\.impl-standalone\.jar|"
106+
f"seeker-agent.*|Black_Duck_Scan_Installation){os.sep}")
107+
res = re.search(syn_folders_re, self.path)
108+
if res:
109+
return True, f"Found {res.group()} folder in Signature match path '{self.path}'"
88110

89111
if not global_values.no_ignore_defaults:
90-
def_folders = ['.cache', '.m2', '.local', '.cache','.config', '.docker', '.npm', '.npmrc', '.pyenv',
91-
'.Trash', '.git', 'node_modules']
92-
for e in self.elements:
93-
if e in def_folders:
94-
return True, f"Found '{e}' in Signature match path '{self.path}'"
112+
# def_folders = ['.cache', '.m2', '.local', '.cache','.config', '.docker', '.npm', '.npmrc', '.pyenv',
113+
# '.Trash', '.git', 'node_modules']
114+
def_folders_re = (f"{os.sep}(\.cache|\.m2|\.local|\.config|\.docker|\.npm|\.npmrc|"
115+
f"\.pyenv|\.Trash|\.git|node_modules){os.sep}")
116+
res = re.search(def_folders_re, os.sep + self.path + os.sep)
117+
if res:
118+
return True, f"Found {res.group()} folder in Signature match path '{self.path}'"
95119

96120
if not global_values.no_ignore_test:
97-
test_folders = r"^test$|^tests$|^testsuite$"
98-
for e in self.elements:
99-
if re.search(test_folders, e, flags=re.IGNORECASE) is not None:
100-
return True, f"Found '{e}' in Signature match path '{self.path}'"
121+
test_folders = f"{os.sep}(test|tests|testsuite){os.sep}"
122+
res = re.search(test_folders, os.sep + self.path + os.sep, flags=re.IGNORECASE)
123+
if res:
124+
return True, f"Found {res.group()} in Signature match path '{self.path}'"
101125
# if e in test_folders:
102126
# return True, f"Found '{e}' in Signature match path '{self.path}'"
103127

104128
return False, ''
129+
130+
def get_sigpath(self):
131+
return(f"- {self.path}")

bd_sig_filter/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
parser.add_argument("--no_ignore_synopsys", help="Do not ignore components in synopsys tool folders", action='store_true')
2727
parser.add_argument("--no_ignore_defaults", help="Do not ignore components in default folders", action='store_true')
2828
parser.add_argument("--ignore_no_path_matches", help="Also ignore components with no component/version match in signature path", action='store_true')
29+
parser.add_argument("--report_unmatched", help="Report unmatched (not reviewed or ignored) components", action='store_true')
2930

3031
args = parser.parse_args()
3132

@@ -114,6 +115,9 @@ def check_args():
114115
terminate = True
115116
global_values.report_file = args.report_file
116117

118+
if args.report_unmatched:
119+
global_values.report_unmatched = True
120+
117121

118122
if terminate:
119123
sys.exit(2)

0 commit comments

Comments
 (0)