Skip to content

Commit 2472547

Browse files
authored
Merge pull request #272 from blackducksoftware/hierarchy_report
Hierarchy report
2 parents bd36209 + 9b63b78 commit 2472547

File tree

1 file changed

+264
-0
lines changed

1 file changed

+264
-0
lines changed
Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
'''
2+
Created on June 25, 2024
3+
4+
@author: dnichol and kumykov
5+
6+
Generate version detail reports (source and components) and consolidate information on source matches, with license
7+
and component matched. Removes matches found underneith other matched components in the source tree (configurable).
8+
9+
Copyright (C) 2023 Synopsys, Inc.
10+
http://www.synopsys.com/
11+
12+
Licensed to the Apache Software Foundation (ASF) under one
13+
or more contributor license agreements. See the NOTICE file
14+
distributed with this work for additional information
15+
regarding copyright ownership. The ASF licenses this file
16+
to you under the Apache License, Version 2.0 (the
17+
"License"); you may not use this file except in compliance
18+
with the License. You may obtain a copy of the License at
19+
20+
http://www.apache.org/licenses/LICENSE-2.0
21+
22+
Unless required by applicable law or agreed to in writing,
23+
software distributed under the License is distributed on an
24+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
25+
KIND, either express or implied. See the License for the
26+
specific language governing permissions and limitations
27+
under the License.
28+
'''
29+
30+
import argparse
31+
import csv
32+
import logging
33+
import sys
34+
import io
35+
import time
36+
import json
37+
import traceback
38+
from blackduck import Client
39+
from zipfile import ZipFile
40+
from pprint import pprint
41+
42+
program_description = \
43+
'''Generate version detail reports (source and components) and consolidate information on source matches, with license
44+
and component matched. Removes matches found underneath other matched components in the source tree (configurable).
45+
46+
This script assumes a project version exists and has scans associated with it (i.e. the project is not scanned as part of this process).
47+
48+
'''
49+
50+
# BD report general
51+
BLACKDUCK_VERSION_MEDIATYPE = "application/vnd.blackducksoftware.status-4+json"
52+
BLACKDUCK_VERSION_API = "/api/current-version"
53+
# Retries to wait for BD report creation. RETRY_LIMIT can be overwritten by the script parameter.
54+
RETRY_LIMIT = 30
55+
RETRY_TIMER = 30
56+
57+
def log_config(debug):
58+
if debug:
59+
logging.basicConfig(format='%(asctime)s:%(levelname)s:%(module)s: %(message)s', stream=sys.stderr, level=logging.DEBUG)
60+
else:
61+
logging.basicConfig(format='%(asctime)s:%(levelname)s:%(module)s: %(message)s', stream=sys.stderr, level=logging.INFO)
62+
logging.getLogger("requests").setLevel(logging.WARNING)
63+
logging.getLogger("urllib3").setLevel(logging.WARNING)
64+
logging.getLogger("blackduck").setLevel(logging.WARNING)
65+
66+
def find_project_by_name(bd, project_name):
67+
params = {
68+
'q': [f"name:{project_name}"]
69+
}
70+
projects = [p for p in bd.get_resource('projects', params=params) if p['name'] == project_name]
71+
assert len(projects) == 1, f"Project {project_name} not found."
72+
return projects[0]
73+
74+
def find_project_version_by_name(bd, project, version_name):
75+
params = {
76+
'q': [f"versionName:{version_name}"]
77+
}
78+
versions = [v for v in bd.get_resource('versions', project, params=params) if v['versionName'] == version_name]
79+
assert len(versions) == 1, f"Project version {version_name} for project {project['name']} not found"
80+
return versions[0]
81+
82+
def get_bd_project_data(hub_client, project_name, version_name):
83+
""" Get and return project ID, version ID. """
84+
project_id = ""
85+
for project in hub_client.get_resource("projects"):
86+
if project['name'] == project_name:
87+
project_id = (project['_meta']['href']).split("projects/", 1)[1]
88+
break
89+
if project_id == "":
90+
sys.exit(f"No project for {project_name} was found!")
91+
version_id = codelocations = ""
92+
for version in hub_client.get_resource("versions", project):
93+
if version['versionName'] == version_name:
94+
version_id = (version['_meta']['href']).split("versions/", 1)[1]
95+
break
96+
if version_id == "":
97+
sys.exit(f"No project version for {version_name} was found!")
98+
99+
return project_id, version_id
100+
101+
def create_version_details_report(bd, version):
102+
version_reports_url = bd.list_resources(version).get('versionReport')
103+
post_data = {
104+
'reportFormat' : 'JSON',
105+
'locale' : 'en_US',
106+
'versionId': version['_meta']['href'].split("/")[-1],
107+
'categories' : [ 'COMPONENTS', 'FILES' ] # Generating "project version" report including components and files
108+
}
109+
110+
bd.session.headers["Content-Type"] = "application/vnd.blackducksoftware.report-4+json"
111+
r = bd.session.post(version_reports_url, json=post_data)
112+
if (r.status_code == 403):
113+
logging.debug("Authorization Error - Please ensure the token you are using has write permissions!")
114+
r.raise_for_status()
115+
location = r.headers.get('Location')
116+
assert location, "Hmm, this does not make sense. If we successfully created a report then there needs to be a location where we can get it from"
117+
return location
118+
119+
def download_report(bd, location, retries, timeout):
120+
report_id = location.split("/")[-1]
121+
logging.debug(f"Report location {location}")
122+
url_data = location.split('/')
123+
url_data.pop(4)
124+
url_data.pop(4)
125+
download_link = '/'.join(url_data)
126+
logging.debug(f"Report Download link {download_link}")
127+
if retries:
128+
logging.debug(f"Retrieving generated report for {location} via {download_link}")
129+
response = bd.session.get(location)
130+
report_status = response.json().get('status', 'Not Ready')
131+
if response.status_code == 200 and report_status == 'COMPLETED':
132+
response = bd.session.get(download_link, headers={'Content-Type': 'application/zip', 'Accept':'application/zip'})
133+
if response.status_code == 200:
134+
return response.content
135+
else:
136+
logging.error("Ruh-roh, not sure what happened here")
137+
return None
138+
else:
139+
logging.debug(f"Report status request {response.status_code} {report_status} ,waiting {timeout} seconds then retrying...")
140+
time.sleep(timeout)
141+
retries -= 1
142+
return download_report(bd, location, retries, timeout)
143+
else:
144+
logging.debug(f"Failed to retrieve report {report_id} after multiple retries")
145+
return None
146+
147+
def get_blackduck_version(hub_client):
148+
url = hub_client.base_url + BLACKDUCK_VERSION_API
149+
res = hub_client.session.get(url)
150+
if res.status_code == 200 and res.content:
151+
return json.loads(res.content)['version']
152+
else:
153+
sys.exit(f"Get BlackDuck version failed with status {res.status_code}")
154+
155+
def reduce(path_set):
156+
path_set.sort()
157+
for path in path_set:
158+
if len(path) < 3:
159+
continue
160+
index = path_set.index(path)
161+
while index + 1 < len(path_set) and path in path_set[index+1]:
162+
logging.debug(f"{path} is in {path_set[index+1]} deleting the sub-path from the list")
163+
path_set.pop(index+1)
164+
return path_set
165+
166+
def trim_version_report(version_report, reduced_path_set):
167+
file_bom_entries = version_report['detailedFileBomViewEntries']
168+
aggregate_bom_view_entries = version_report['aggregateBomViewEntries']
169+
170+
reduced_file_bom_entries = [e for e in file_bom_entries if f"{e.get('archiveContext', "")}!{e['path']}" in reduced_path_set]
171+
version_report['detailedFileBomViewEntries'] = reduced_file_bom_entries
172+
173+
component_identifiers = [f"{e['projectId']}:{e['versionId']}" for e in reduced_file_bom_entries]
174+
deduplicated = list(dict.fromkeys(component_identifiers))
175+
176+
reduced_aggregate_bom_view_entries = [e for e in aggregate_bom_view_entries if f"{e['producerProject']['id']}:{e['producerReleases'][0]['id']}" in deduplicated]
177+
version_report['aggregateBomViewEntries'] = reduced_aggregate_bom_view_entries
178+
179+
def write_output_file(version_report, output_file):
180+
if output_file.lower().endswith(".csv"):
181+
logging.info(f"Writing CSV output into {output_file}")
182+
field_names = list(version_report['aggregateBomViewEntries'][0].keys())
183+
with open(output_file, "w") as f:
184+
writer = csv.DictWriter(f, fieldnames = field_names)
185+
writer.writeheader()
186+
writer.writerows(version_report['aggregateBomViewEntries'])
187+
188+
return
189+
# If it's neither, then .json
190+
if not output_file.lower().endswith(".json"):
191+
output_file += ".json"
192+
logging.info(f"Writing JSON output into {output_file}")
193+
with open(output_file,"w") as f:
194+
json.dump(version_report, f)
195+
196+
def parse_command_args():
197+
parser = argparse.ArgumentParser(description=program_description, formatter_class=argparse.RawTextHelpFormatter)
198+
parser.add_argument("-u", "--base-url", required=True, help="Hub server URL e.g. https://your.blackduck.url")
199+
parser.add_argument("-t", "--token-file", required=True, help="File containing access token")
200+
parser.add_argument("-nv", "--no-verify", action='store_false', help="Disable TLS certificate verification")
201+
parser.add_argument("-d", "--debug", action='store_true', help="Set debug output on")
202+
parser.add_argument("-pn", "--project-name", required=True, help="Project Name")
203+
parser.add_argument("-pv", "--project-version-name", required=True, help="Project Version Name")
204+
parser.add_argument("-o", "--output-file", required=False, help="File name to write output. File extension determines format .json and .csv, json is the default.")
205+
parser.add_argument("-kh", "--keep_hierarchy", action='store_true', help="Set to keep all entries in the sources report. Will not remove components found under others.")
206+
parser.add_argument("--report-retries", metavar="", type=int, default=RETRY_LIMIT, help="Retries for receiving the generated BlackDuck report. Generating copyright report tends to take longer minutes.")
207+
parser.add_argument("--report-timeout", metavar="", type=int, default=RETRY_TIMER, help="Wait time between subsequent download attempts.")
208+
parser.add_argument("--timeout", metavar="", type=int, default=60, help="Timeout for REST-API. Some API may take longer than the default 60 seconds")
209+
parser.add_argument("--retries", metavar="", type=int, default=4, help="Retries for REST-API. Some API may need more retries than the default 4 times")
210+
return parser.parse_args()
211+
212+
def main():
213+
args = parse_command_args()
214+
with open(args.token_file, 'r') as tf:
215+
token = tf.readline().strip()
216+
output_file = args.output_file
217+
if not args.output_file:
218+
output_file = f"{args.project_name}-{args.project_version_name}.json".replace(" ","_")
219+
try:
220+
log_config(args.debug)
221+
hub_client = Client(token=token,
222+
base_url=args.base_url,
223+
verify=args.no_verify,
224+
timeout=args.timeout,
225+
retries=args.retries)
226+
227+
project = find_project_by_name(hub_client, args.project_name)
228+
version = find_project_version_by_name(hub_client, project, args.project_version_name)
229+
location = create_version_details_report(hub_client, version)
230+
report_zip = download_report(hub_client, location, args.report_retries, args.report_timeout)
231+
logging.debug(f"Deleting report from Black Duck {hub_client.session.delete(location)}")
232+
zip=ZipFile(io.BytesIO(report_zip), "r")
233+
pprint(zip.namelist())
234+
report_data = {name: zip.read(name) for name in zip.namelist()}
235+
filename = [i for i in report_data.keys() if i.endswith(".json")][0]
236+
version_report = json.loads(report_data[filename])
237+
with open("out.json", "w") as f:
238+
json.dump(version_report, f)
239+
# TODO items
240+
# Process file section of report data to identify primary paths
241+
path_set = [f"{entry.get('archiveContext', "")}!{entry['path']}" for entry in version_report['detailedFileBomViewEntries']]
242+
reduced_path_set = reduce(path_set.copy())
243+
logging.info(f"{len(path_set)-len(reduced_path_set)} path entries were scrubbed from the dataset.")
244+
245+
# Remove component entries that correspond to removed path entries.
246+
247+
logging.info(f"Original dataset contains {len(version_report['aggregateBomViewEntries'])} bom entries and {len(version_report['detailedFileBomViewEntries'])} file view entries")
248+
if not args.keep_hierarchy:
249+
trim_version_report(version_report, reduced_path_set)
250+
logging.info(f"Truncated dataset contains {len(version_report['aggregateBomViewEntries'])} bom entries and {len(version_report['detailedFileBomViewEntries'])} file view entries")
251+
252+
write_output_file(version_report, output_file)
253+
254+
# Combine component data with selected file data
255+
# Output result with CSV anf JSON as options.
256+
257+
258+
259+
except (Exception, BaseException) as err:
260+
logging.error(f"Exception by {str(err)}. See the stack trace")
261+
traceback.print_exc()
262+
263+
if __name__ == '__main__':
264+
sys.exit(main())

0 commit comments

Comments
 (0)