2
2
from typing import Any , Dict , List , Literal , Optional , Union
3
3
4
4
from loguru import logger
5
- from pydantic import Field
5
+ from pydantic import Field , computed_field
6
6
7
7
from guidellm .core .distribution import Distribution
8
8
from guidellm .core .request import TextGenerationRequest
@@ -221,6 +221,7 @@ def __iter__(self):
221
221
"""
222
222
return iter (self .results )
223
223
224
+ @computed_field # type: ignore[misc]
224
225
@property
225
226
def request_count (self ) -> int :
226
227
"""
@@ -231,6 +232,7 @@ def request_count(self) -> int:
231
232
"""
232
233
return len (self .results )
233
234
235
+ @computed_field # type: ignore[misc]
234
236
@property
235
237
def error_count (self ) -> int :
236
238
"""
@@ -241,6 +243,7 @@ def error_count(self) -> int:
241
243
"""
242
244
return len (self .errors )
243
245
246
+ @computed_field # type: ignore[misc]
244
247
@property
245
248
def total_count (self ) -> int :
246
249
"""
@@ -251,6 +254,7 @@ def total_count(self) -> int:
251
254
"""
252
255
return self .request_count + self .error_count
253
256
257
+ @computed_field # type: ignore[misc]
254
258
@property
255
259
def start_time (self ) -> Optional [float ]:
256
260
"""
@@ -264,6 +268,7 @@ def start_time(self) -> Optional[float]:
264
268
265
269
return self .results [0 ].start_time
266
270
271
+ @computed_field # type: ignore[misc]
267
272
@property
268
273
def end_time (self ) -> Optional [float ]:
269
274
"""
@@ -277,6 +282,7 @@ def end_time(self) -> Optional[float]:
277
282
278
283
return self .results [- 1 ].end_time
279
284
285
+ @computed_field # type: ignore[misc]
280
286
@property
281
287
def duration (self ) -> float :
282
288
"""
@@ -290,6 +296,7 @@ def duration(self) -> float:
290
296
291
297
return self .end_time - self .start_time
292
298
299
+ @computed_field # type: ignore[misc]
293
300
@property
294
301
def completed_request_rate (self ) -> float :
295
302
"""
@@ -303,6 +310,7 @@ def completed_request_rate(self) -> float:
303
310
304
311
return len (self .results ) / self .duration
305
312
313
+ @computed_field # type: ignore[misc]
306
314
@property
307
315
def request_latency (self ) -> float :
308
316
"""
@@ -332,6 +340,19 @@ def request_latency_distribution(self) -> Distribution:
332
340
]
333
341
)
334
342
343
+ @computed_field # type: ignore[misc]
344
+ @property
345
+ def request_latency_percentiles (self ) -> List [float ]:
346
+ """
347
+ Get standard percentiles of request latency in seconds.
348
+
349
+ :return: List of percentile request latency in seconds
350
+ :rtype: List[float]
351
+ """
352
+ return self .request_latency_distribution .percentiles ([1 , 5 , 10 , 50 , 90 , 95 , 99 ])
353
+
354
+
355
+ @computed_field # type: ignore[misc]
335
356
@property
336
357
def time_to_first_token (self ) -> float :
337
358
"""
@@ -361,6 +382,20 @@ def ttft_distribution(self) -> Distribution:
361
382
]
362
383
)
363
384
385
+ @computed_field # type: ignore[misc]
386
+ @property
387
+ def time_to_first_token_percentiles (self ) -> List [float ]:
388
+ """
389
+ Get standard percentiles for time taken to decode the first token
390
+ in milliseconds.
391
+
392
+ :return: List of percentile time taken to decode the first token
393
+ in milliseconds.
394
+ :rtype: List[float]
395
+ """
396
+ return self .ttft_distribution .percentiles ([1 , 5 , 10 , 50 , 90 , 95 , 99 ])
397
+
398
+ @computed_field # type: ignore[misc]
364
399
@property
365
400
def inter_token_latency (self ) -> float :
366
401
"""
@@ -388,6 +423,18 @@ def itl_distribution(self) -> Distribution:
388
423
]
389
424
)
390
425
426
+ @computed_field # type: ignore[misc]
427
+ @property
428
+ def inter_token_latency_percentiles (self ) -> List [float ]:
429
+ """
430
+ Get standard percentiles for the time between tokens in milliseconds.
431
+
432
+ :return: List of percentiles for the average time between tokens.
433
+ :rtype: List[float]
434
+ """
435
+ return self .itl_distribution .percentiles ([1 , 5 , 10 , 50 , 90 , 95 , 99 ])
436
+
437
+ @computed_field # type: ignore[misc]
391
438
@property
392
439
def output_token_throughput (self ) -> float :
393
440
"""
@@ -403,6 +450,17 @@ def output_token_throughput(self) -> float:
403
450
404
451
return total_tokens / self .duration
405
452
453
+ @computed_field # type: ignore[misc]
454
+ @property
455
+ def prompt_token (self ) -> float :
456
+ """
457
+ Get the average number of prompt tokens.
458
+
459
+ :return: The average number of prompt tokens.
460
+ :rtype: float
461
+ """
462
+ return self .prompt_token_distribution .mean
463
+
406
464
@property
407
465
def prompt_token_distribution (self ) -> Distribution :
408
466
"""
@@ -413,6 +471,28 @@ def prompt_token_distribution(self) -> Distribution:
413
471
"""
414
472
return Distribution (data = [result .prompt_token_count for result in self .results ])
415
473
474
+ @computed_field # type: ignore[misc]
475
+ @property
476
+ def prompt_token_percentiles (self ) -> List [float ]:
477
+ """
478
+ Get standard percentiles for number of prompt tokens.
479
+
480
+ :return: List of percentiles of number of prompt tokens.
481
+ :rtype: List[float]
482
+ """
483
+ return self .prompt_token_distribution .percentiles ([1 , 5 , 50 , 95 , 99 ])
484
+
485
+ @computed_field # type: ignore[misc]
486
+ @property
487
+ def output_token (self ) -> float :
488
+ """
489
+ Get the average number of output tokens.
490
+
491
+ :return: The average number of output tokens.
492
+ :rtype: float
493
+ """
494
+ return self .output_token_distribution .mean
495
+
416
496
@property
417
497
def output_token_distribution (self ) -> Distribution :
418
498
"""
@@ -423,6 +503,18 @@ def output_token_distribution(self) -> Distribution:
423
503
"""
424
504
return Distribution (data = [result .output_token_count for result in self .results ])
425
505
506
+ @computed_field # type: ignore[misc]
507
+ @property
508
+ def output_token_percentiles (self ) -> List [float ]:
509
+ """
510
+ Get standard percentiles for number of output tokens.
511
+
512
+ :return: List of percentiles of number of output tokens.
513
+ :rtype: List[float]
514
+ """
515
+ return self .output_token_distribution .percentiles ([1 , 5 , 50 , 95 , 99 ])
516
+
517
+ @computed_field # type: ignore[misc]
426
518
@property
427
519
def overloaded (self ) -> bool :
428
520
if (
0 commit comments