|
| 1 | +''' |
| 2 | +Created on June 25, 2024 |
| 3 | +
|
| 4 | +@author: dnichol and kumykov |
| 5 | +
|
| 6 | +Generate version detail reports (source and components) and consolidate information on source matches, with license |
| 7 | +and component matched. Removes matches found underneith other matched components in the source tree (configurable). |
| 8 | +
|
| 9 | +Copyright (C) 2023 Synopsys, Inc. |
| 10 | +http://www.synopsys.com/ |
| 11 | +
|
| 12 | +Licensed to the Apache Software Foundation (ASF) under one |
| 13 | +or more contributor license agreements. See the NOTICE file |
| 14 | +distributed with this work for additional information |
| 15 | +regarding copyright ownership. The ASF licenses this file |
| 16 | +to you under the Apache License, Version 2.0 (the |
| 17 | +"License"); you may not use this file except in compliance |
| 18 | +with the License. You may obtain a copy of the License at |
| 19 | +
|
| 20 | +http://www.apache.org/licenses/LICENSE-2.0 |
| 21 | +
|
| 22 | +Unless required by applicable law or agreed to in writing, |
| 23 | +software distributed under the License is distributed on an |
| 24 | +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 25 | +KIND, either express or implied. See the License for the |
| 26 | +specific language governing permissions and limitations |
| 27 | +under the License. |
| 28 | +''' |
| 29 | + |
| 30 | +import argparse |
| 31 | +import csv |
| 32 | +import logging |
| 33 | +import sys |
| 34 | +import io |
| 35 | +import time |
| 36 | +import json |
| 37 | +import traceback |
| 38 | +from blackduck import Client |
| 39 | +from zipfile import ZipFile |
| 40 | +from pprint import pprint |
| 41 | + |
| 42 | +program_description = \ |
| 43 | +'''Generate version detail reports (source and components) and consolidate information on source matches, with license |
| 44 | +and component matched. Removes matches found underneath other matched components in the source tree (configurable). |
| 45 | +
|
| 46 | +This script assumes a project version exists and has scans associated with it (i.e. the project is not scanned as part of this process). |
| 47 | +
|
| 48 | +''' |
| 49 | + |
| 50 | +# BD report general |
| 51 | +BLACKDUCK_VERSION_MEDIATYPE = "application/vnd.blackducksoftware.status-4+json" |
| 52 | +BLACKDUCK_VERSION_API = "/api/current-version" |
| 53 | +# Retries to wait for BD report creation. RETRY_LIMIT can be overwritten by the script parameter. |
| 54 | +RETRY_LIMIT = 30 |
| 55 | +RETRY_TIMER = 30 |
| 56 | + |
| 57 | +def log_config(debug): |
| 58 | + if debug: |
| 59 | + logging.basicConfig(format='%(asctime)s:%(levelname)s:%(module)s: %(message)s', stream=sys.stderr, level=logging.DEBUG) |
| 60 | + else: |
| 61 | + logging.basicConfig(format='%(asctime)s:%(levelname)s:%(module)s: %(message)s', stream=sys.stderr, level=logging.INFO) |
| 62 | + logging.getLogger("requests").setLevel(logging.WARNING) |
| 63 | + logging.getLogger("urllib3").setLevel(logging.WARNING) |
| 64 | + logging.getLogger("blackduck").setLevel(logging.WARNING) |
| 65 | + |
| 66 | +def find_project_by_name(bd, project_name): |
| 67 | + params = { |
| 68 | + 'q': [f"name:{project_name}"] |
| 69 | + } |
| 70 | + projects = [p for p in bd.get_resource('projects', params=params) if p['name'] == project_name] |
| 71 | + assert len(projects) == 1, f"Project {project_name} not found." |
| 72 | + return projects[0] |
| 73 | + |
| 74 | +def find_project_version_by_name(bd, project, version_name): |
| 75 | + params = { |
| 76 | + 'q': [f"versionName:{version_name}"] |
| 77 | + } |
| 78 | + versions = [v for v in bd.get_resource('versions', project, params=params) if v['versionName'] == version_name] |
| 79 | + assert len(versions) == 1, f"Project version {version_name} for project {project['name']} not found" |
| 80 | + return versions[0] |
| 81 | + |
| 82 | +def get_bd_project_data(hub_client, project_name, version_name): |
| 83 | + """ Get and return project ID, version ID. """ |
| 84 | + project_id = "" |
| 85 | + for project in hub_client.get_resource("projects"): |
| 86 | + if project['name'] == project_name: |
| 87 | + project_id = (project['_meta']['href']).split("projects/", 1)[1] |
| 88 | + break |
| 89 | + if project_id == "": |
| 90 | + sys.exit(f"No project for {project_name} was found!") |
| 91 | + version_id = codelocations = "" |
| 92 | + for version in hub_client.get_resource("versions", project): |
| 93 | + if version['versionName'] == version_name: |
| 94 | + version_id = (version['_meta']['href']).split("versions/", 1)[1] |
| 95 | + break |
| 96 | + if version_id == "": |
| 97 | + sys.exit(f"No project version for {version_name} was found!") |
| 98 | + |
| 99 | + return project_id, version_id |
| 100 | + |
| 101 | +def create_version_details_report(bd, version): |
| 102 | + version_reports_url = bd.list_resources(version).get('versionReport') |
| 103 | + post_data = { |
| 104 | + 'reportFormat' : 'JSON', |
| 105 | + 'locale' : 'en_US', |
| 106 | + 'versionId': version['_meta']['href'].split("/")[-1], |
| 107 | + 'categories' : [ 'COMPONENTS', 'FILES' ] # Generating "project version" report including components and files |
| 108 | + } |
| 109 | + |
| 110 | + bd.session.headers["Content-Type"] = "application/vnd.blackducksoftware.report-4+json" |
| 111 | + r = bd.session.post(version_reports_url, json=post_data) |
| 112 | + if (r.status_code == 403): |
| 113 | + logging.debug("Authorization Error - Please ensure the token you are using has write permissions!") |
| 114 | + r.raise_for_status() |
| 115 | + location = r.headers.get('Location') |
| 116 | + assert location, "Hmm, this does not make sense. If we successfully created a report then there needs to be a location where we can get it from" |
| 117 | + return location |
| 118 | + |
| 119 | +def download_report(bd, location, retries, timeout): |
| 120 | + report_id = location.split("/")[-1] |
| 121 | + logging.debug(f"Report location {location}") |
| 122 | + url_data = location.split('/') |
| 123 | + url_data.pop(4) |
| 124 | + url_data.pop(4) |
| 125 | + download_link = '/'.join(url_data) |
| 126 | + logging.debug(f"Report Download link {download_link}") |
| 127 | + if retries: |
| 128 | + logging.debug(f"Retrieving generated report for {location} via {download_link}") |
| 129 | + response = bd.session.get(location) |
| 130 | + report_status = response.json().get('status', 'Not Ready') |
| 131 | + if response.status_code == 200 and report_status == 'COMPLETED': |
| 132 | + response = bd.session.get(download_link, headers={'Content-Type': 'application/zip', 'Accept':'application/zip'}) |
| 133 | + if response.status_code == 200: |
| 134 | + return response.content |
| 135 | + else: |
| 136 | + logging.error("Ruh-roh, not sure what happened here") |
| 137 | + return None |
| 138 | + else: |
| 139 | + logging.debug(f"Report status request {response.status_code} {report_status} ,waiting {timeout} seconds then retrying...") |
| 140 | + time.sleep(timeout) |
| 141 | + retries -= 1 |
| 142 | + return download_report(bd, location, retries, timeout) |
| 143 | + else: |
| 144 | + logging.debug(f"Failed to retrieve report {report_id} after multiple retries") |
| 145 | + return None |
| 146 | + |
| 147 | +def get_blackduck_version(hub_client): |
| 148 | + url = hub_client.base_url + BLACKDUCK_VERSION_API |
| 149 | + res = hub_client.session.get(url) |
| 150 | + if res.status_code == 200 and res.content: |
| 151 | + return json.loads(res.content)['version'] |
| 152 | + else: |
| 153 | + sys.exit(f"Get BlackDuck version failed with status {res.status_code}") |
| 154 | + |
| 155 | +def reduce(path_set): |
| 156 | + path_set.sort() |
| 157 | + for path in path_set: |
| 158 | + if len(path) < 3: |
| 159 | + continue |
| 160 | + index = path_set.index(path) |
| 161 | + while index + 1 < len(path_set) and path in path_set[index+1]: |
| 162 | + logging.debug(f"{path} is in {path_set[index+1]} deleting the sub-path from the list") |
| 163 | + path_set.pop(index+1) |
| 164 | + return path_set |
| 165 | + |
| 166 | +def trim_version_report(version_report, reduced_path_set): |
| 167 | + file_bom_entries = version_report['detailedFileBomViewEntries'] |
| 168 | + aggregate_bom_view_entries = version_report['aggregateBomViewEntries'] |
| 169 | + |
| 170 | + reduced_file_bom_entries = [e for e in file_bom_entries if f"{e.get('archiveContext', "")}!{e['path']}" in reduced_path_set] |
| 171 | + version_report['detailedFileBomViewEntries'] = reduced_file_bom_entries |
| 172 | + |
| 173 | + component_identifiers = [f"{e['projectId']}:{e['versionId']}" for e in reduced_file_bom_entries] |
| 174 | + deduplicated = list(dict.fromkeys(component_identifiers)) |
| 175 | + |
| 176 | + reduced_aggregate_bom_view_entries = [e for e in aggregate_bom_view_entries if f"{e['producerProject']['id']}:{e['producerReleases'][0]['id']}" in deduplicated] |
| 177 | + version_report['aggregateBomViewEntries'] = reduced_aggregate_bom_view_entries |
| 178 | + |
| 179 | +def write_output_file(version_report, output_file): |
| 180 | + if output_file.lower().endswith(".csv"): |
| 181 | + logging.info(f"Writing CSV output into {output_file}") |
| 182 | + field_names = list(version_report['aggregateBomViewEntries'][0].keys()) |
| 183 | + with open(output_file, "w") as f: |
| 184 | + writer = csv.DictWriter(f, fieldnames = field_names) |
| 185 | + writer.writeheader() |
| 186 | + writer.writerows(version_report['aggregateBomViewEntries']) |
| 187 | + |
| 188 | + return |
| 189 | + # If it's neither, then .json |
| 190 | + if not output_file.lower().endswith(".json"): |
| 191 | + output_file += ".json" |
| 192 | + logging.info(f"Writing JSON output into {output_file}") |
| 193 | + with open(output_file,"w") as f: |
| 194 | + json.dump(version_report, f) |
| 195 | + |
| 196 | +def parse_command_args(): |
| 197 | + parser = argparse.ArgumentParser(description=program_description, formatter_class=argparse.RawTextHelpFormatter) |
| 198 | + parser.add_argument("-u", "--base-url", required=True, help="Hub server URL e.g. https://your.blackduck.url") |
| 199 | + parser.add_argument("-t", "--token-file", required=True, help="File containing access token") |
| 200 | + parser.add_argument("-nv", "--no-verify", action='store_false', help="Disable TLS certificate verification") |
| 201 | + parser.add_argument("-d", "--debug", action='store_true', help="Set debug output on") |
| 202 | + parser.add_argument("-pn", "--project-name", required=True, help="Project Name") |
| 203 | + parser.add_argument("-pv", "--project-version-name", required=True, help="Project Version Name") |
| 204 | + parser.add_argument("-o", "--output-file", required=False, help="File name to write output. File extension determines format .json and .csv, json is the default.") |
| 205 | + parser.add_argument("-kh", "--keep_hierarchy", action='store_true', help="Set to keep all entries in the sources report. Will not remove components found under others.") |
| 206 | + parser.add_argument("--report-retries", metavar="", type=int, default=RETRY_LIMIT, help="Retries for receiving the generated BlackDuck report. Generating copyright report tends to take longer minutes.") |
| 207 | + parser.add_argument("--report-timeout", metavar="", type=int, default=RETRY_TIMER, help="Wait time between subsequent download attempts.") |
| 208 | + parser.add_argument("--timeout", metavar="", type=int, default=60, help="Timeout for REST-API. Some API may take longer than the default 60 seconds") |
| 209 | + parser.add_argument("--retries", metavar="", type=int, default=4, help="Retries for REST-API. Some API may need more retries than the default 4 times") |
| 210 | + return parser.parse_args() |
| 211 | + |
| 212 | +def main(): |
| 213 | + args = parse_command_args() |
| 214 | + with open(args.token_file, 'r') as tf: |
| 215 | + token = tf.readline().strip() |
| 216 | + output_file = args.output_file |
| 217 | + if not args.output_file: |
| 218 | + output_file = f"{args.project_name}-{args.project_version_name}.json".replace(" ","_") |
| 219 | + try: |
| 220 | + log_config(args.debug) |
| 221 | + hub_client = Client(token=token, |
| 222 | + base_url=args.base_url, |
| 223 | + verify=args.no_verify, |
| 224 | + timeout=args.timeout, |
| 225 | + retries=args.retries) |
| 226 | + |
| 227 | + project = find_project_by_name(hub_client, args.project_name) |
| 228 | + version = find_project_version_by_name(hub_client, project, args.project_version_name) |
| 229 | + location = create_version_details_report(hub_client, version) |
| 230 | + report_zip = download_report(hub_client, location, args.report_retries, args.report_timeout) |
| 231 | + logging.debug(f"Deleting report from Black Duck {hub_client.session.delete(location)}") |
| 232 | + zip=ZipFile(io.BytesIO(report_zip), "r") |
| 233 | + pprint(zip.namelist()) |
| 234 | + report_data = {name: zip.read(name) for name in zip.namelist()} |
| 235 | + filename = [i for i in report_data.keys() if i.endswith(".json")][0] |
| 236 | + version_report = json.loads(report_data[filename]) |
| 237 | + with open("out.json", "w") as f: |
| 238 | + json.dump(version_report, f) |
| 239 | + # TODO items |
| 240 | + # Process file section of report data to identify primary paths |
| 241 | + path_set = [f"{entry.get('archiveContext', "")}!{entry['path']}" for entry in version_report['detailedFileBomViewEntries']] |
| 242 | + reduced_path_set = reduce(path_set.copy()) |
| 243 | + logging.info(f"{len(path_set)-len(reduced_path_set)} path entries were scrubbed from the dataset.") |
| 244 | + |
| 245 | + # Remove component entries that correspond to removed path entries. |
| 246 | + |
| 247 | + logging.info(f"Original dataset contains {len(version_report['aggregateBomViewEntries'])} bom entries and {len(version_report['detailedFileBomViewEntries'])} file view entries") |
| 248 | + if not args.keep_hierarchy: |
| 249 | + trim_version_report(version_report, reduced_path_set) |
| 250 | + logging.info(f"Truncated dataset contains {len(version_report['aggregateBomViewEntries'])} bom entries and {len(version_report['detailedFileBomViewEntries'])} file view entries") |
| 251 | + |
| 252 | + write_output_file(version_report, output_file) |
| 253 | + |
| 254 | + # Combine component data with selected file data |
| 255 | + # Output result with CSV anf JSON as options. |
| 256 | + |
| 257 | + |
| 258 | + |
| 259 | + except (Exception, BaseException) as err: |
| 260 | + logging.error(f"Exception by {str(err)}. See the stack trace") |
| 261 | + traceback.print_exc() |
| 262 | + |
| 263 | +if __name__ == '__main__': |
| 264 | + sys.exit(main()) |
0 commit comments