Skip to content

Commit 1f7a638

Browse files
committed
Merge branch 'output_summary' into http_backend
2 parents bca2614 + d9819e9 commit 1f7a638

File tree

3 files changed

+104
-19
lines changed

3 files changed

+104
-19
lines changed

src/guidellm/backend/openai.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ async def make_request(
9292
{
9393
"max_tokens": request.output_token_count,
9494
"stop": None,
95+
"extra_body": {
96+
"ignore_eos": True,
97+
}
9598
}
9699
)
97100
elif settings.openai.max_gen_tokens and settings.openai.max_gen_tokens > 0:

src/guidellm/core/report.py

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -147,19 +147,15 @@ def _create_benchmark_report_data_tokens_summary(
147147
for benchmark in report.benchmarks_sorted:
148148
table.add_row(
149149
_benchmark_rate_id(benchmark),
150-
f"{benchmark.prompt_token_distribution.mean:.2f}",
150+
f"{benchmark.prompt_token:.2f}",
151151
", ".join(
152152
f"{percentile:.1f}"
153-
for percentile in benchmark.prompt_token_distribution.percentiles(
154-
[1, 5, 50, 95, 99]
155-
)
153+
for percentile in benchmark.prompt_token_percentiles
156154
),
157-
f"{benchmark.output_token_distribution.mean:.2f}",
155+
f"{benchmark.output_token:.2f}",
158156
", ".join(
159157
f"{percentile:.1f}"
160-
for percentile in benchmark.output_token_distribution.percentiles(
161-
[1, 5, 50, 95, 99]
162-
)
158+
for percentile in benchmark.output_token_percentiles
163159
),
164160
)
165161
logger.debug("Created data tokens summary table for the report.")
@@ -181,7 +177,7 @@ def _create_benchmark_report_dist_perf_summary(
181177
"Benchmark",
182178
"Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)",
183179
"Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
184-
"Inter Token Latency [1%, 5%, 10%, 50%, 90% 95%, 99%] (ms)",
180+
"Inter Token Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
185181
title="[magenta]Performance Stats by Benchmark[/magenta]",
186182
title_style="bold",
187183
title_justify="left",
@@ -193,21 +189,15 @@ def _create_benchmark_report_dist_perf_summary(
193189
_benchmark_rate_id(benchmark),
194190
", ".join(
195191
f"{percentile:.2f}"
196-
for percentile in benchmark.request_latency_distribution.percentiles(
197-
[1, 5, 10, 50, 90, 95, 99]
198-
)
192+
for percentile in benchmark.request_latency_percentiles
199193
),
200194
", ".join(
201195
f"{percentile * 1000:.1f}"
202-
for percentile in benchmark.ttft_distribution.percentiles(
203-
[1, 5, 10, 50, 90, 95, 99]
204-
)
196+
for percentile in benchmark.time_to_first_token_percentiles
205197
),
206198
", ".join(
207199
f"{percentile * 1000:.1f}"
208-
for percentile in benchmark.itl_distribution.percentiles(
209-
[1, 5, 10, 50, 90, 95, 99]
210-
)
200+
for percentile in benchmark.inter_token_latency_percentiles
211201
),
212202
)
213203
logger.debug("Created distribution performance summary table for the report.")

src/guidellm/core/result.py

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import Any, Dict, List, Literal, Optional, Union
33

44
from loguru import logger
5-
from pydantic import Field
5+
from pydantic import Field, computed_field
66

77
from guidellm.core.distribution import Distribution
88
from guidellm.core.request import TextGenerationRequest
@@ -221,6 +221,7 @@ def __iter__(self):
221221
"""
222222
return iter(self.results)
223223

224+
@computed_field # type: ignore[misc]
224225
@property
225226
def request_count(self) -> int:
226227
"""
@@ -231,6 +232,7 @@ def request_count(self) -> int:
231232
"""
232233
return len(self.results)
233234

235+
@computed_field # type: ignore[misc]
234236
@property
235237
def error_count(self) -> int:
236238
"""
@@ -241,6 +243,7 @@ def error_count(self) -> int:
241243
"""
242244
return len(self.errors)
243245

246+
@computed_field # type: ignore[misc]
244247
@property
245248
def total_count(self) -> int:
246249
"""
@@ -251,6 +254,7 @@ def total_count(self) -> int:
251254
"""
252255
return self.request_count + self.error_count
253256

257+
@computed_field # type: ignore[misc]
254258
@property
255259
def start_time(self) -> Optional[float]:
256260
"""
@@ -264,6 +268,7 @@ def start_time(self) -> Optional[float]:
264268

265269
return self.results[0].start_time
266270

271+
@computed_field # type: ignore[misc]
267272
@property
268273
def end_time(self) -> Optional[float]:
269274
"""
@@ -277,6 +282,7 @@ def end_time(self) -> Optional[float]:
277282

278283
return self.results[-1].end_time
279284

285+
@computed_field # type: ignore[misc]
280286
@property
281287
def duration(self) -> float:
282288
"""
@@ -290,6 +296,7 @@ def duration(self) -> float:
290296

291297
return self.end_time - self.start_time
292298

299+
@computed_field # type: ignore[misc]
293300
@property
294301
def completed_request_rate(self) -> float:
295302
"""
@@ -303,6 +310,7 @@ def completed_request_rate(self) -> float:
303310

304311
return len(self.results) / self.duration
305312

313+
@computed_field # type: ignore[misc]
306314
@property
307315
def request_latency(self) -> float:
308316
"""
@@ -332,6 +340,19 @@ def request_latency_distribution(self) -> Distribution:
332340
]
333341
)
334342

343+
@computed_field # type: ignore[misc]
344+
@property
345+
def request_latency_percentiles(self) -> List[float]:
346+
"""
347+
Get standard percentiles of request latency in seconds.
348+
349+
:return: List of percentile request latency in seconds
350+
:rtype: List[float]
351+
"""
352+
return self.request_latency_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])
353+
354+
355+
@computed_field # type: ignore[misc]
335356
@property
336357
def time_to_first_token(self) -> float:
337358
"""
@@ -361,6 +382,20 @@ def ttft_distribution(self) -> Distribution:
361382
]
362383
)
363384

385+
@computed_field # type: ignore[misc]
386+
@property
387+
def time_to_first_token_percentiles(self) -> List[float]:
388+
"""
389+
Get standard percentiles for time taken to decode the first token
390+
in milliseconds.
391+
392+
:return: List of percentile time taken to decode the first token
393+
in milliseconds.
394+
:rtype: List[float]
395+
"""
396+
return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])
397+
398+
@computed_field # type: ignore[misc]
364399
@property
365400
def inter_token_latency(self) -> float:
366401
"""
@@ -388,6 +423,18 @@ def itl_distribution(self) -> Distribution:
388423
]
389424
)
390425

426+
@computed_field # type: ignore[misc]
427+
@property
428+
def inter_token_latency_percentiles(self) -> List[float]:
429+
"""
430+
Get standard percentiles for the time between tokens in milliseconds.
431+
432+
:return: List of percentiles for the average time between tokens.
433+
:rtype: List[float]
434+
"""
435+
return self.itl_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])
436+
437+
@computed_field # type: ignore[misc]
391438
@property
392439
def output_token_throughput(self) -> float:
393440
"""
@@ -403,6 +450,17 @@ def output_token_throughput(self) -> float:
403450

404451
return total_tokens / self.duration
405452

453+
@computed_field # type: ignore[misc]
454+
@property
455+
def prompt_token(self) -> float:
456+
"""
457+
Get the average number of prompt tokens.
458+
459+
:return: The average number of prompt tokens.
460+
:rtype: float
461+
"""
462+
return self.prompt_token_distribution.mean
463+
406464
@property
407465
def prompt_token_distribution(self) -> Distribution:
408466
"""
@@ -413,6 +471,28 @@ def prompt_token_distribution(self) -> Distribution:
413471
"""
414472
return Distribution(data=[result.prompt_token_count for result in self.results])
415473

474+
@computed_field # type: ignore[misc]
475+
@property
476+
def prompt_token_percentiles(self) -> List[float]:
477+
"""
478+
Get standard percentiles for number of prompt tokens.
479+
480+
:return: List of percentiles of number of prompt tokens.
481+
:rtype: List[float]
482+
"""
483+
return self.prompt_token_distribution.percentiles([1, 5, 50, 95, 99])
484+
485+
@computed_field # type: ignore[misc]
486+
@property
487+
def output_token(self) -> float:
488+
"""
489+
Get the average number of output tokens.
490+
491+
:return: The average number of output tokens.
492+
:rtype: float
493+
"""
494+
return self.output_token_distribution.mean
495+
416496
@property
417497
def output_token_distribution(self) -> Distribution:
418498
"""
@@ -423,6 +503,18 @@ def output_token_distribution(self) -> Distribution:
423503
"""
424504
return Distribution(data=[result.output_token_count for result in self.results])
425505

506+
@computed_field # type: ignore[misc]
507+
@property
508+
def output_token_percentiles(self) -> List[float]:
509+
"""
510+
Get standard percentiles for number of output tokens.
511+
512+
:return: List of percentiles of number of output tokens.
513+
:rtype: List[float]
514+
"""
515+
return self.output_token_distribution.percentiles([1, 5, 50, 95, 99])
516+
517+
@computed_field # type: ignore[misc]
426518
@property
427519
def overloaded(self) -> bool:
428520
if (

0 commit comments

Comments
 (0)