|
| 1 | +# Copyright 2020 Google LLC |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +"""A command-line tool to compare benchmark results in json format. |
| 16 | +
|
| 17 | +This tool lets one to see the difference between two independent runs |
| 18 | +of the same benchmarks. This is is convenient whenever one develops a |
| 19 | +perfromance fix and wants to find out if a particula change brings |
| 20 | +measurable performance improvement. |
| 21 | +
|
| 22 | +For example: |
| 23 | +
|
| 24 | + $ swift run -c release BenchmarkMinimalExample --format json > a.json |
| 25 | +
|
| 26 | + $ swift run -c release BenchmarkMinimalExample --format json > b.json |
| 27 | +
|
| 28 | + $ python Scripts/compare.py a.json b.json |
| 29 | + benchmark column a b % |
| 30 | + ----------------------------------------------------------------- |
| 31 | + add string no capacity time 37099.00 37160.00 -0.16 |
| 32 | + add string no capacity std 1.13 1.30 -15.27 |
| 33 | + add string no capacity iterations 37700.00 37618.00 0.22 |
| 34 | + add string reserved capacity time 36730.00 36743.00 -0.04 |
| 35 | + add string reserved capacity std 1.12 2.42 -116.30 |
| 36 | + add string reserved capacity iterations 38078.00 38084.00 -0.02 |
| 37 | + ----------------------------------------------------------------- |
| 38 | + time -0.10 |
| 39 | + std -57.90 |
| 40 | + iterations 0.10 |
| 41 | +
|
| 42 | +Here one can see an output that compares two indepdendant runs `a` and |
| 43 | +`b` and concludes that they only differ in 0.1%, and are thus probably |
| 44 | +identical results. |
| 45 | +
|
| 46 | +One can filter out the results in the comparison by either the benchmark |
| 47 | +name using `--filter` and `--filter-not` flags, and also by the column |
| 48 | +of the json output using `--columns`. |
| 49 | +""" |
| 50 | + |
| 51 | +import argparse |
| 52 | +from collections import defaultdict |
| 53 | +import json |
| 54 | +import re |
| 55 | + |
| 56 | + |
| 57 | +def require(cond, msg): |
| 58 | + """Fails with a message if condition is not true.""" |
| 59 | + |
| 60 | + if not cond: raise Exception(msg) |
| 61 | + |
| 62 | + |
| 63 | +def validate(file_name, parsed): |
| 64 | + """Validates that given json object is a valid benchmarks result.""" |
| 65 | + |
| 66 | + require("benchmarks" in parsed, |
| 67 | + "{}: missing key 'benchmarks'.".format(file_name)) |
| 68 | + require(len(parsed["benchmarks"]) > 0, |
| 69 | + "{}: must have at least one benchmark.".format(file_name)) |
| 70 | + |
| 71 | + for i, benchmark in enumerate(parsed["benchmarks"]): |
| 72 | + require("name" in benchmark, |
| 73 | + "{}: benchmark #{}: missing key 'name'.".format(file_name, i)) |
| 74 | + |
| 75 | + for k, v in benchmark.items(): |
| 76 | + if k == "name": continue |
| 77 | + is_num = isinstance(v, int) or isinstance(v, float) |
| 78 | + template = "{}: benchmark #{}: values must be numbers." |
| 79 | + require(is_num, template.format(file_name, i)) |
| 80 | + |
| 81 | + |
| 82 | +def parse_and_validate(args): |
| 83 | + """Parse command-line args, parse given json files and validate their contents.""" |
| 84 | + |
| 85 | + runs = [] |
| 86 | + |
| 87 | + for file_name in args.file_names: |
| 88 | + with open(file_name) as f: |
| 89 | + parsed = None |
| 90 | + try: |
| 91 | + parsed = json.load(f) |
| 92 | + except Exception as err: |
| 93 | + raise Exception("failed to parse json: {}".format(err)) |
| 94 | + validate(file_name, parsed) |
| 95 | + runs.append((file_name, parsed)) |
| 96 | + |
| 97 | + return runs |
| 98 | + |
| 99 | + |
| 100 | +def benchmark_predicate(args): |
| 101 | + """Returns a predicate used to filter benchmark columns based on cli args.""" |
| 102 | + |
| 103 | + include = lambda x: True |
| 104 | + |
| 105 | + if args.filter: |
| 106 | + regex = re.compile(args.filter) |
| 107 | + prev_include = include |
| 108 | + include = lambda x: regex.search(x) is not None and prev_include(x) |
| 109 | + |
| 110 | + if args.filter_not: |
| 111 | + regex = re.compile(args.filter_not) |
| 112 | + prev_include = include |
| 113 | + include = lambda x: regex.search(x) is None and prev_include(x) |
| 114 | + |
| 115 | + return include |
| 116 | + |
| 117 | + |
| 118 | +def collect_values(args, runs): |
| 119 | + """Collect benchmark values for the comparison, excluding filtered out columns.""" |
| 120 | + |
| 121 | + baseline_name, baseline = runs[0] |
| 122 | + |
| 123 | + include_benchmark = benchmark_predicate(args) |
| 124 | + include_column = lambda x: args.columns is None or x in args.columns |
| 125 | + |
| 126 | + confs = [] |
| 127 | + values = {} |
| 128 | + |
| 129 | + for benchmark in baseline["benchmarks"]: |
| 130 | + benchmark_name = benchmark["name"] |
| 131 | + if not include_benchmark(benchmark_name): |
| 132 | + continue |
| 133 | + for column in benchmark.keys(): |
| 134 | + if column == "name": |
| 135 | + continue |
| 136 | + if not include_column(column): |
| 137 | + continue |
| 138 | + conf = (benchmark_name, column) |
| 139 | + confs.append(conf) |
| 140 | + values[conf] = {} |
| 141 | + |
| 142 | + for conf in confs: |
| 143 | + bench_name, column = conf |
| 144 | + for (file_name, run) in runs: |
| 145 | + for bench in run["benchmarks"]: |
| 146 | + if bench["name"] == bench_name: |
| 147 | + values[conf][file_name] = bench[column] |
| 148 | + |
| 149 | + return (confs, values) |
| 150 | + |
| 151 | + |
| 152 | +def geomean(values): |
| 153 | + """Compute geometric mean for the given sequence of values.""" |
| 154 | + |
| 155 | + product = 1.0 |
| 156 | + for value in values: |
| 157 | + product *= value |
| 158 | + return product**(1.0 / len(values)) |
| 159 | + |
| 160 | + |
| 161 | +def to_table(confs, args, values): |
| 162 | + """Compute a table of relative results across all input files.""" |
| 163 | + |
| 164 | + baseline_file_name = args.baseline |
| 165 | + rows = [] |
| 166 | + |
| 167 | + # Header row. |
| 168 | + header = [] |
| 169 | + header.append("benchmark") |
| 170 | + header.append("column") |
| 171 | + for (n, file_name) in enumerate(args.file_names): |
| 172 | + name = file_name.replace(".json", "") |
| 173 | + header.append(name) |
| 174 | + if n != 0: |
| 175 | + header.append("%") |
| 176 | + rows.append(header) |
| 177 | + |
| 178 | + # Body rows. |
| 179 | + relative_values = defaultdict(lambda: defaultdict(list)) |
| 180 | + for conf in confs: |
| 181 | + bench_name, column = conf |
| 182 | + row = [] |
| 183 | + row.append(bench_name) |
| 184 | + row.append(column) |
| 185 | + for n, file_name in enumerate(args.file_names): |
| 186 | + base_value = values[conf][baseline_file_name] |
| 187 | + value = values[conf][file_name] |
| 188 | + row.append("{:.2f}".format(value)) |
| 189 | + if n != 0: |
| 190 | + relative = value/base_value |
| 191 | + relative_values[column][file_name].append(relative) |
| 192 | + relative_percentage = (1 - relative ) * 100 |
| 193 | + row.append("{:.2f}".format(relative_percentage)) |
| 194 | + rows.append(row) |
| 195 | + |
| 196 | + # Compute totals for each columsn as a geomean of all relative results. |
| 197 | + cols = [] |
| 198 | + geomean_values = defaultdict(dict) |
| 199 | + for (_, col) in confs: |
| 200 | + if col not in cols: |
| 201 | + cols.append(col) |
| 202 | + for n, file_name in enumerate(args.file_names): |
| 203 | + if n != 0: |
| 204 | + vs = relative_values[col][file_name] |
| 205 | + geomean_values[col][file_name] = geomean(vs) |
| 206 | + |
| 207 | + for col in cols: |
| 208 | + row = [] |
| 209 | + row.append("") |
| 210 | + row.append(col) |
| 211 | + for n, file_name in enumerate(args.file_names): |
| 212 | + row.append("") |
| 213 | + if n != 0: |
| 214 | + value = geomean_values[col][file_name] |
| 215 | + percentage = (1 - value) * 100 |
| 216 | + row.append("{:.2f}".format(percentage)) |
| 217 | + rows.append(row) |
| 218 | + |
| 219 | + return rows |
| 220 | + |
| 221 | + |
| 222 | +def pad(base, fill, count, right = False): |
| 223 | + """Pad base string with given fill until count, on either left or right.""" |
| 224 | + |
| 225 | + while len(base) < count: |
| 226 | + if right: |
| 227 | + base += fill |
| 228 | + else: |
| 229 | + base = fill + base |
| 230 | + return base |
| 231 | + |
| 232 | + |
| 233 | +def print_table(table): |
| 234 | + """Pretty print results table as aligned human-readable text.""" |
| 235 | + |
| 236 | + # Collect width of each max column. |
| 237 | + widths = defaultdict(lambda: 0) |
| 238 | + for row in table: |
| 239 | + for ncol, col in enumerate(row): |
| 240 | + widths[ncol] = max(widths[ncol], len(str(col))) |
| 241 | + |
| 242 | + # Print results as an aligned text to stdout. |
| 243 | + totals = False |
| 244 | + for nrow, row in enumerate(table): |
| 245 | + if row[0] == '' and not totals: |
| 246 | + print("-" * (sum(widths.values()) + len(widths) - 1)) |
| 247 | + totals = True |
| 248 | + line = [] |
| 249 | + for ncol, col in enumerate(row): |
| 250 | + right = ncol == 0 or ncol == 1 |
| 251 | + line.append(pad(str(col), " ", widths[ncol], right = right)) |
| 252 | + print(" ".join(line)) |
| 253 | + if nrow == 0: |
| 254 | + print("-" * (sum(widths.values()) + len(widths) - 1)) |
| 255 | + |
| 256 | + |
| 257 | +def parse_args(): |
| 258 | + """Parse command-line flags into a configuration object, and return it.""" |
| 259 | + |
| 260 | + parser = argparse.ArgumentParser(description="Compare multiple swift-benchmark json files.") |
| 261 | + parser.add_argument("baseline", help="Baseline json file to compare against.") |
| 262 | + parser.add_argument("candidate", nargs="+", |
| 263 | + help="Candidate json files to compare against baseline.") |
| 264 | + parser.add_argument("--filter", help="Only show benchmarks that match the regular expression.") |
| 265 | + parser.add_argument("--filter-not", help="Exclude benchmarks whose names match the regular expression.") |
| 266 | + parser.add_argument("--columns", help="A comma-separated list of columns to show.") |
| 267 | + |
| 268 | + args = parser.parse_args() |
| 269 | + args.file_names = [args.baseline] |
| 270 | + args.file_names.extend(args.candidate) |
| 271 | + if args.columns is not None: |
| 272 | + args.columns = set(args.columns.split(",")) |
| 273 | + |
| 274 | + return args |
| 275 | + |
| 276 | + |
| 277 | +def main(): |
| 278 | + """Command-line entry-point.""" |
| 279 | + |
| 280 | + args = parse_args() |
| 281 | + runs = parse_and_validate(args) |
| 282 | + confs, values = collect_values(args, runs) |
| 283 | + table = to_table(confs, args, values) |
| 284 | + print_table(table) |
| 285 | + |
| 286 | + |
| 287 | +if __name__ == "__main__": |
| 288 | + main() |
0 commit comments