Skip to content

Commit ba4187d

Browse files
anmarquesmarkurtz
authored andcommitted
Add mean and percentile info as computed_field properties such that they become serializable
1 parent 355f368 commit ba4187d

File tree

2 files changed

+99
-19
lines changed

2 files changed

+99
-19
lines changed

src/guidellm/core/report.py

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -147,19 +147,15 @@ def _create_benchmark_report_data_tokens_summary(
147147
for benchmark in report.benchmarks_sorted:
148148
table.add_row(
149149
_benchmark_rate_id(benchmark),
150-
f"{benchmark.prompt_token_distribution.mean:.2f}",
150+
f"{benchmark.prompt_token:.2f}",
151151
", ".join(
152152
f"{percentile:.1f}"
153-
for percentile in benchmark.prompt_token_distribution.percentiles(
154-
[1, 5, 50, 95, 99]
155-
)
153+
for percentile in benchmark.prompt_token_percentiles
156154
),
157-
f"{benchmark.output_token_distribution.mean:.2f}",
155+
f"{benchmark.output_token:.2f}",
158156
", ".join(
159157
f"{percentile:.1f}"
160-
for percentile in benchmark.output_token_distribution.percentiles(
161-
[1, 5, 50, 95, 99]
162-
)
158+
for percentile in benchmark.output_token_percentiles
163159
),
164160
)
165161
logger.debug("Created data tokens summary table for the report.")
@@ -181,7 +177,7 @@ def _create_benchmark_report_dist_perf_summary(
181177
"Benchmark",
182178
"Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)",
183179
"Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
184-
"Inter Token Latency [1%, 5%, 10%, 50%, 90% 95%, 99%] (ms)",
180+
"Inter Token Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
185181
title="[magenta]Performance Stats by Benchmark[/magenta]",
186182
title_style="bold",
187183
title_justify="left",
@@ -193,21 +189,15 @@ def _create_benchmark_report_dist_perf_summary(
193189
_benchmark_rate_id(benchmark),
194190
", ".join(
195191
f"{percentile:.2f}"
196-
for percentile in benchmark.request_latency_distribution.percentiles(
197-
[1, 5, 10, 50, 90, 95, 99]
198-
)
192+
for percentile in benchmark.request_latency_percentiles
199193
),
200194
", ".join(
201195
f"{percentile * 1000:.1f}"
202-
for percentile in benchmark.ttft_distribution.percentiles(
203-
[1, 5, 10, 50, 90, 95, 99]
204-
)
196+
for percentile in benchmark.time_to_first_token_percentiles
205197
),
206198
", ".join(
207199
f"{percentile * 1000:.1f}"
208-
for percentile in benchmark.itl_distribution.percentiles(
209-
[1, 5, 10, 50, 90, 95, 99]
210-
)
200+
for percentile in benchmark.inter_token_latency_percentiles
211201
),
212202
)
213203
logger.debug("Created distribution performance summary table for the report.")

src/guidellm/core/result.py

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import Any, Dict, List, Literal, Optional, Union
33

44
from loguru import logger
5-
from pydantic import Field
5+
from pydantic import Field, computed_field
66

77
from guidellm.core.distribution import Distribution
88
from guidellm.core.request import TextGenerationRequest
@@ -221,6 +221,7 @@ def __iter__(self):
221221
"""
222222
return iter(self.results)
223223

224+
@computed_field
224225
@property
225226
def request_count(self) -> int:
226227
"""
@@ -231,6 +232,7 @@ def request_count(self) -> int:
231232
"""
232233
return len(self.results)
233234

235+
@computed_field
234236
@property
235237
def error_count(self) -> int:
236238
"""
@@ -241,6 +243,7 @@ def error_count(self) -> int:
241243
"""
242244
return len(self.errors)
243245

246+
@computed_field
244247
@property
245248
def total_count(self) -> int:
246249
"""
@@ -251,6 +254,7 @@ def total_count(self) -> int:
251254
"""
252255
return self.request_count + self.error_count
253256

257+
@computed_field
254258
@property
255259
def start_time(self) -> Optional[float]:
256260
"""
@@ -264,6 +268,7 @@ def start_time(self) -> Optional[float]:
264268

265269
return self.results[0].start_time
266270

271+
@computed_field
267272
@property
268273
def end_time(self) -> Optional[float]:
269274
"""
@@ -277,6 +282,7 @@ def end_time(self) -> Optional[float]:
277282

278283
return self.results[-1].end_time
279284

285+
@computed_field
280286
@property
281287
def duration(self) -> float:
282288
"""
@@ -290,6 +296,7 @@ def duration(self) -> float:
290296

291297
return self.end_time - self.start_time
292298

299+
@computed_field
293300
@property
294301
def completed_request_rate(self) -> float:
295302
"""
@@ -303,6 +310,7 @@ def completed_request_rate(self) -> float:
303310

304311
return len(self.results) / self.duration
305312

313+
@computed_field
306314
@property
307315
def request_latency(self) -> float:
308316
"""
@@ -332,6 +340,19 @@ def request_latency_distribution(self) -> Distribution:
332340
]
333341
)
334342

343+
@computed_field
344+
@property
345+
def request_latency_percentiles(self) -> List[float]:
346+
"""
347+
Get standard percentiles of request latency in seconds.
348+
349+
:return: List of percentile request latency in seconds
350+
:rtype: List[float]
351+
"""
352+
return self.request_latency_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])
353+
354+
355+
@computed_field
335356
@property
336357
def time_to_first_token(self) -> float:
337358
"""
@@ -360,7 +381,19 @@ def ttft_distribution(self) -> Distribution:
360381
if result.first_token_time is not None
361382
]
362383
)
384+
385+
@computed_field
386+
@property
387+
def time_to_first_token_percentiles(self) -> List[float]:
388+
"""
389+
Get standard percentiles for time taken to decode the first token in milliseconds.
390+
391+
:return: List of percentile time taken to decode the first token in milliseconds.
392+
:rtype: List[float]
393+
"""
394+
return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])
363395

396+
@computed_field
364397
@property
365398
def inter_token_latency(self) -> float:
366399
"""
@@ -387,7 +420,19 @@ def itl_distribution(self) -> Distribution:
387420
decode for result in self.results for decode in result.decode_times.data
388421
]
389422
)
423+
424+
@computed_field
425+
@property
426+
def inter_token_latency_percentiles(self) -> List[float]:
427+
"""
428+
Get standard percentiles for the time between tokens in milliseconds.
390429
430+
:return: List of percentiles for the average time between tokens.
431+
:rtype: List[float]
432+
"""
433+
return self.itl_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])
434+
435+
@computed_field
391436
@property
392437
def output_token_throughput(self) -> float:
393438
"""
@@ -403,6 +448,17 @@ def output_token_throughput(self) -> float:
403448

404449
return total_tokens / self.duration
405450

451+
@computed_field
452+
@property
453+
def prompt_token(self) -> float:
454+
"""
455+
Get the average number of prompt tokens.
456+
457+
:return: The average number of prompt tokens.
458+
:rtype: float
459+
"""
460+
return self.prompt_token_distribution.mean
461+
406462
@property
407463
def prompt_token_distribution(self) -> Distribution:
408464
"""
@@ -413,6 +469,28 @@ def prompt_token_distribution(self) -> Distribution:
413469
"""
414470
return Distribution(data=[result.prompt_token_count for result in self.results])
415471

472+
@computed_field
473+
@property
474+
def prompt_token_percentiles(self) -> List[float]:
475+
"""
476+
Get standard percentiles for number of prompt tokens.
477+
478+
:return: List of percentiles of number of prompt tokens.
479+
:rtype: List[float]
480+
"""
481+
return self.prompt_token_distribution.percentiles([1, 5, 50, 95, 99])
482+
483+
@computed_field
484+
@property
485+
def output_token(self) -> float:
486+
"""
487+
Get the average number of output tokens.
488+
489+
:return: The average number of output tokens.
490+
:rtype: float
491+
"""
492+
return self.output_token_distribution.mean
493+
416494
@property
417495
def output_token_distribution(self) -> Distribution:
418496
"""
@@ -423,6 +501,18 @@ def output_token_distribution(self) -> Distribution:
423501
"""
424502
return Distribution(data=[result.output_token_count for result in self.results])
425503

504+
@computed_field
505+
@property
506+
def output_token_percentiles(self) -> List[float]:
507+
"""
508+
Get standard percentiles for number of output tokens.
509+
510+
:return: List of percentiles of number of output tokens.
511+
:rtype: List[float]
512+
"""
513+
return self.output_token_distribution.percentiles([1, 5, 50, 95, 99])
514+
515+
@computed_field
426516
@property
427517
def overloaded(self) -> bool:
428518
if (

0 commit comments

Comments
 (0)