Skip to content

Commit fd04739

Browse files
authored
Add summary metrics to saved json file (#46)
At the end of the run GuideLLM will print summary metrics that are computed from the raw results, but these are not currently saved anywhere. This PR adds these metrics as serializable properties of the `TextGenerationBenchmark` class. Most of the metrics are already declared as properties, but are not serializable. Adding the `@computed_field` decorator is enough in these cases. Other properties were added to complete the list.
1 parent edba84d commit fd04739

File tree

2 files changed

+101
-19
lines changed

2 files changed

+101
-19
lines changed

src/guidellm/core/report.py

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -147,19 +147,15 @@ def _create_benchmark_report_data_tokens_summary(
147147
for benchmark in report.benchmarks_sorted:
148148
table.add_row(
149149
_benchmark_rate_id(benchmark),
150-
f"{benchmark.prompt_token_distribution.mean:.2f}",
150+
f"{benchmark.prompt_token:.2f}",
151151
", ".join(
152152
f"{percentile:.1f}"
153-
for percentile in benchmark.prompt_token_distribution.percentiles(
154-
[1, 5, 50, 95, 99]
155-
)
153+
for percentile in benchmark.prompt_token_percentiles
156154
),
157-
f"{benchmark.output_token_distribution.mean:.2f}",
155+
f"{benchmark.output_token:.2f}",
158156
", ".join(
159157
f"{percentile:.1f}"
160-
for percentile in benchmark.output_token_distribution.percentiles(
161-
[1, 5, 50, 95, 99]
162-
)
158+
for percentile in benchmark.output_token_percentiles
163159
),
164160
)
165161
logger.debug("Created data tokens summary table for the report.")
@@ -181,7 +177,7 @@ def _create_benchmark_report_dist_perf_summary(
181177
"Benchmark",
182178
"Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)",
183179
"Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
184-
"Inter Token Latency [1%, 5%, 10%, 50%, 90% 95%, 99%] (ms)",
180+
"Inter Token Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
185181
title="[magenta]Performance Stats by Benchmark[/magenta]",
186182
title_style="bold",
187183
title_justify="left",
@@ -193,21 +189,15 @@ def _create_benchmark_report_dist_perf_summary(
193189
_benchmark_rate_id(benchmark),
194190
", ".join(
195191
f"{percentile:.2f}"
196-
for percentile in benchmark.request_latency_distribution.percentiles(
197-
[1, 5, 10, 50, 90, 95, 99]
198-
)
192+
for percentile in benchmark.request_latency_percentiles
199193
),
200194
", ".join(
201195
f"{percentile * 1000:.1f}"
202-
for percentile in benchmark.ttft_distribution.percentiles(
203-
[1, 5, 10, 50, 90, 95, 99]
204-
)
196+
for percentile in benchmark.time_to_first_token_percentiles
205197
),
206198
", ".join(
207199
f"{percentile * 1000:.1f}"
208-
for percentile in benchmark.itl_distribution.percentiles(
209-
[1, 5, 10, 50, 90, 95, 99]
210-
)
200+
for percentile in benchmark.inter_token_latency_percentiles
211201
),
212202
)
213203
logger.debug("Created distribution performance summary table for the report.")

src/guidellm/core/result.py

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import Any, Dict, List, Literal, Optional, Union
33

44
from loguru import logger
5-
from pydantic import Field
5+
from pydantic import Field, computed_field
66

77
from guidellm.core.distribution import Distribution
88
from guidellm.core.request import TextGenerationRequest
@@ -221,6 +221,7 @@ def __iter__(self):
221221
"""
222222
return iter(self.results)
223223

224+
@computed_field # type: ignore[misc]
224225
@property
225226
def request_count(self) -> int:
226227
"""
@@ -231,6 +232,7 @@ def request_count(self) -> int:
231232
"""
232233
return len(self.results)
233234

235+
@computed_field # type: ignore[misc]
234236
@property
235237
def error_count(self) -> int:
236238
"""
@@ -241,6 +243,7 @@ def error_count(self) -> int:
241243
"""
242244
return len(self.errors)
243245

246+
@computed_field # type: ignore[misc]
244247
@property
245248
def total_count(self) -> int:
246249
"""
@@ -251,6 +254,7 @@ def total_count(self) -> int:
251254
"""
252255
return self.request_count + self.error_count
253256

257+
@computed_field # type: ignore[misc]
254258
@property
255259
def start_time(self) -> Optional[float]:
256260
"""
@@ -264,6 +268,7 @@ def start_time(self) -> Optional[float]:
264268

265269
return self.results[0].start_time
266270

271+
@computed_field # type: ignore[misc]
267272
@property
268273
def end_time(self) -> Optional[float]:
269274
"""
@@ -277,6 +282,7 @@ def end_time(self) -> Optional[float]:
277282

278283
return self.results[-1].end_time
279284

285+
@computed_field # type: ignore[misc]
280286
@property
281287
def duration(self) -> float:
282288
"""
@@ -290,6 +296,7 @@ def duration(self) -> float:
290296

291297
return self.end_time - self.start_time
292298

299+
@computed_field # type: ignore[misc]
293300
@property
294301
def completed_request_rate(self) -> float:
295302
"""
@@ -303,6 +310,7 @@ def completed_request_rate(self) -> float:
303310

304311
return len(self.results) / self.duration
305312

313+
@computed_field # type: ignore[misc]
306314
@property
307315
def request_latency(self) -> float:
308316
"""
@@ -332,6 +340,19 @@ def request_latency_distribution(self) -> Distribution:
332340
]
333341
)
334342

343+
@computed_field # type: ignore[misc]
344+
@property
345+
def request_latency_percentiles(self) -> List[float]:
346+
"""
347+
Get standard percentiles of request latency in seconds.
348+
349+
:return: List of percentile request latency in seconds
350+
:rtype: List[float]
351+
"""
352+
return self.request_latency_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])
353+
354+
355+
@computed_field # type: ignore[misc]
335356
@property
336357
def time_to_first_token(self) -> float:
337358
"""
@@ -361,6 +382,20 @@ def ttft_distribution(self) -> Distribution:
361382
]
362383
)
363384

385+
@computed_field # type: ignore[misc]
386+
@property
387+
def time_to_first_token_percentiles(self) -> List[float]:
388+
"""
389+
Get standard percentiles for time taken to decode the first token
390+
in milliseconds.
391+
392+
:return: List of percentile time taken to decode the first token
393+
in milliseconds.
394+
:rtype: List[float]
395+
"""
396+
return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])
397+
398+
@computed_field # type: ignore[misc]
364399
@property
365400
def inter_token_latency(self) -> float:
366401
"""
@@ -388,6 +423,18 @@ def itl_distribution(self) -> Distribution:
388423
]
389424
)
390425

426+
@computed_field # type: ignore[misc]
427+
@property
428+
def inter_token_latency_percentiles(self) -> List[float]:
429+
"""
430+
Get standard percentiles for the time between tokens in milliseconds.
431+
432+
:return: List of percentiles for the average time between tokens.
433+
:rtype: List[float]
434+
"""
435+
return self.itl_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])
436+
437+
@computed_field # type: ignore[misc]
391438
@property
392439
def output_token_throughput(self) -> float:
393440
"""
@@ -403,6 +450,17 @@ def output_token_throughput(self) -> float:
403450

404451
return total_tokens / self.duration
405452

453+
@computed_field # type: ignore[misc]
454+
@property
455+
def prompt_token(self) -> float:
456+
"""
457+
Get the average number of prompt tokens.
458+
459+
:return: The average number of prompt tokens.
460+
:rtype: float
461+
"""
462+
return self.prompt_token_distribution.mean
463+
406464
@property
407465
def prompt_token_distribution(self) -> Distribution:
408466
"""
@@ -413,6 +471,28 @@ def prompt_token_distribution(self) -> Distribution:
413471
"""
414472
return Distribution(data=[result.prompt_token_count for result in self.results])
415473

474+
@computed_field # type: ignore[misc]
475+
@property
476+
def prompt_token_percentiles(self) -> List[float]:
477+
"""
478+
Get standard percentiles for number of prompt tokens.
479+
480+
:return: List of percentiles of number of prompt tokens.
481+
:rtype: List[float]
482+
"""
483+
return self.prompt_token_distribution.percentiles([1, 5, 50, 95, 99])
484+
485+
@computed_field # type: ignore[misc]
486+
@property
487+
def output_token(self) -> float:
488+
"""
489+
Get the average number of output tokens.
490+
491+
:return: The average number of output tokens.
492+
:rtype: float
493+
"""
494+
return self.output_token_distribution.mean
495+
416496
@property
417497
def output_token_distribution(self) -> Distribution:
418498
"""
@@ -423,6 +503,18 @@ def output_token_distribution(self) -> Distribution:
423503
"""
424504
return Distribution(data=[result.output_token_count for result in self.results])
425505

506+
@computed_field # type: ignore[misc]
507+
@property
508+
def output_token_percentiles(self) -> List[float]:
509+
"""
510+
Get standard percentiles for number of output tokens.
511+
512+
:return: List of percentiles of number of output tokens.
513+
:rtype: List[float]
514+
"""
515+
return self.output_token_distribution.percentiles([1, 5, 50, 95, 99])
516+
517+
@computed_field # type: ignore[misc]
426518
@property
427519
def overloaded(self) -> bool:
428520
if (

0 commit comments

Comments
 (0)