Skip to content

Commit a725552

Browse files
committed
csv de-duplication added
1 parent 2ba8cd5 commit a725552

File tree

1 file changed

+34
-6
lines changed

1 file changed

+34
-6
lines changed

examples/client/file_hierarchy_report.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def trim_version_report(version_report, reduced_path_set):
172172
def get_csv_fieldnames():
173173
return ['component name', 'version name', 'license', 'match type', 'review status']
174174

175-
def get_csv_data(version_report):
175+
def get_csv_data(version_report, keep_dupes):
176176
csv_data = list()
177177
components = list()
178178
for bom_view_entry in version_report['aggregateBomViewEntries']:
@@ -192,16 +192,43 @@ def get_csv_data(version_report):
192192
if composite_key not in components:
193193
csv_data.append(entry)
194194
components.append(composite_key)
195-
return csv_data
196-
197-
def write_output_file(version_report, output_file):
195+
if keep_dupes:
196+
return csv_data
197+
else:
198+
return remove_duplicates(csv_data)
199+
200+
def remove_duplicates(data):
201+
# Put data into buckets by version
202+
buckets = dict()
203+
for row in data:
204+
name = row['component name'].lower()
205+
version = row['version name']
206+
if not version in buckets:
207+
buckets[version] = [row]
208+
else:
209+
buckets[version].append(row)
210+
# Run reduction process in component names that start with existing component name
211+
# This process will ignore case in component names
212+
for set in buckets.values():
213+
set.sort(key = lambda d: d['component name'].lower())
214+
for row in set:
215+
index = set.index(row)
216+
name = row['component name'].lower()
217+
while index + 1 < len(set) and set[index+1]['component name'].lower().startswith(name):
218+
set.pop(index+1)
219+
reduced_data = list()
220+
for b in buckets.values():
221+
reduced_data.extend(b)
222+
return reduced_data
223+
224+
def write_output_file(version_report, output_file, keep_dupes):
198225
if output_file.lower().endswith(".csv"):
199226
logging.info(f"Writing CSV output into {output_file}")
200227
field_names = get_csv_fieldnames()
201228
with open(output_file, "w") as f:
202229
writer = csv.DictWriter(f, fieldnames = field_names, extrasaction = 'ignore',quoting=csv.QUOTE_ALL) # TODO
203230
writer.writeheader()
204-
writer.writerows(get_csv_data(version_report))
231+
writer.writerows(get_csv_data(version_report, keep_dupes))
205232
return
206233
# If it's neither, then .json
207234
if not output_file.lower().endswith(".json"):
@@ -219,6 +246,7 @@ def parse_command_args():
219246
parser.add_argument("-pn", "--project-name", required=True, help="Project Name")
220247
parser.add_argument("-pv", "--project-version-name", required=True, help="Project Version Name")
221248
parser.add_argument("-o", "--output-file", required=False, help="File name to write output. File extension determines format .json and .csv, json is the default.")
249+
parser.add_argument("-kd", "--keep-dupes", action='store_true', help="Do not reduce CVS data by fuzzy matching component names")
222250
parser.add_argument("-kh", "--keep_hierarchy", action='store_true', help="Set to keep all entries in the sources report. Will not remove components found under others.")
223251
parser.add_argument("--report-retries", metavar="", type=int, default=RETRY_LIMIT, help="Retries for receiving the generated BlackDuck report. Generating copyright report tends to take longer minutes.")
224252
parser.add_argument("--report-timeout", metavar="", type=int, default=RETRY_TIMER, help="Wait time between subsequent download attempts.")
@@ -266,7 +294,7 @@ def main():
266294
trim_version_report(version_report, reduced_path_set)
267295
logging.info(f"Truncated dataset contains {len(version_report['aggregateBomViewEntries'])} bom entries and {len(version_report['detailedFileBomViewEntries'])} file view entries")
268296

269-
write_output_file(version_report, output_file)
297+
write_output_file(version_report, output_file, args.keep_dupes)
270298

271299
# Combine component data with selected file data
272300
# Output result with CSV anf JSON as options.

0 commit comments

Comments
 (0)