2
2
from typing import Any , Dict , List , Literal , Optional , Union
3
3
4
4
from loguru import logger
5
- from pydantic import Field
5
+ from pydantic import Field , computed_field
6
6
7
7
from guidellm .core .distribution import Distribution
8
8
from guidellm .core .request import TextGenerationRequest
@@ -221,6 +221,7 @@ def __iter__(self):
221
221
"""
222
222
return iter (self .results )
223
223
224
+ @computed_field
224
225
@property
225
226
def request_count (self ) -> int :
226
227
"""
@@ -231,6 +232,7 @@ def request_count(self) -> int:
231
232
"""
232
233
return len (self .results )
233
234
235
+ @computed_field
234
236
@property
235
237
def error_count (self ) -> int :
236
238
"""
@@ -241,6 +243,7 @@ def error_count(self) -> int:
241
243
"""
242
244
return len (self .errors )
243
245
246
+ @computed_field
244
247
@property
245
248
def total_count (self ) -> int :
246
249
"""
@@ -251,6 +254,7 @@ def total_count(self) -> int:
251
254
"""
252
255
return self .request_count + self .error_count
253
256
257
+ @computed_field
254
258
@property
255
259
def start_time (self ) -> Optional [float ]:
256
260
"""
@@ -264,6 +268,7 @@ def start_time(self) -> Optional[float]:
264
268
265
269
return self .results [0 ].start_time
266
270
271
+ @computed_field
267
272
@property
268
273
def end_time (self ) -> Optional [float ]:
269
274
"""
@@ -277,6 +282,7 @@ def end_time(self) -> Optional[float]:
277
282
278
283
return self .results [- 1 ].end_time
279
284
285
+ @computed_field
280
286
@property
281
287
def duration (self ) -> float :
282
288
"""
@@ -290,6 +296,7 @@ def duration(self) -> float:
290
296
291
297
return self .end_time - self .start_time
292
298
299
+ @computed_field
293
300
@property
294
301
def completed_request_rate (self ) -> float :
295
302
"""
@@ -303,6 +310,7 @@ def completed_request_rate(self) -> float:
303
310
304
311
return len (self .results ) / self .duration
305
312
313
+ @computed_field
306
314
@property
307
315
def request_latency (self ) -> float :
308
316
"""
@@ -332,6 +340,19 @@ def request_latency_distribution(self) -> Distribution:
332
340
]
333
341
)
334
342
343
+ @computed_field
344
+ @property
345
+ def request_latency_percentiles (self ) -> List [float ]:
346
+ """
347
+ Get standard percentiles of request latency in seconds.
348
+
349
+ :return: List of percentile request latency in seconds
350
+ :rtype: List[float]
351
+ """
352
+ return self .request_latency_distribution .percentiles ([1 , 5 , 10 , 50 , 90 , 95 , 99 ])
353
+
354
+
355
+ @computed_field
335
356
@property
336
357
def time_to_first_token (self ) -> float :
337
358
"""
@@ -360,7 +381,19 @@ def ttft_distribution(self) -> Distribution:
360
381
if result .first_token_time is not None
361
382
]
362
383
)
384
+
385
+ @computed_field
386
+ @property
387
+ def time_to_first_token_percentiles (self ) -> List [float ]:
388
+ """
389
+ Get standard percentiles for time taken to decode the first token in milliseconds.
390
+
391
+ :return: List of percentile time taken to decode the first token in milliseconds.
392
+ :rtype: List[float]
393
+ """
394
+ return self .ttft_distribution .percentiles ([1 , 5 , 10 , 50 , 90 , 95 , 99 ])
363
395
396
+ @computed_field
364
397
@property
365
398
def inter_token_latency (self ) -> float :
366
399
"""
@@ -387,7 +420,19 @@ def itl_distribution(self) -> Distribution:
387
420
decode for result in self .results for decode in result .decode_times .data
388
421
]
389
422
)
423
+
424
+ @computed_field
425
+ @property
426
+ def inter_token_latency_percentiles (self ) -> List [float ]:
427
+ """
428
+ Get standard percentiles for the time between tokens in milliseconds.
390
429
430
+ :return: List of percentiles for the average time between tokens.
431
+ :rtype: List[float]
432
+ """
433
+ return self .itl_distribution .percentiles ([1 , 5 , 10 , 50 , 90 , 95 , 99 ])
434
+
435
+ @computed_field
391
436
@property
392
437
def output_token_throughput (self ) -> float :
393
438
"""
@@ -403,6 +448,17 @@ def output_token_throughput(self) -> float:
403
448
404
449
return total_tokens / self .duration
405
450
451
+ @computed_field
452
+ @property
453
+ def prompt_token (self ) -> float :
454
+ """
455
+ Get the average number of prompt tokens.
456
+
457
+ :return: The average number of prompt tokens.
458
+ :rtype: float
459
+ """
460
+ return self .prompt_token_distribution .mean
461
+
406
462
@property
407
463
def prompt_token_distribution (self ) -> Distribution :
408
464
"""
@@ -413,6 +469,28 @@ def prompt_token_distribution(self) -> Distribution:
413
469
"""
414
470
return Distribution (data = [result .prompt_token_count for result in self .results ])
415
471
472
+ @computed_field
473
+ @property
474
+ def prompt_token_percentiles (self ) -> List [float ]:
475
+ """
476
+ Get standard percentiles for number of prompt tokens.
477
+
478
+ :return: List of percentiles of number of prompt tokens.
479
+ :rtype: List[float]
480
+ """
481
+ return self .prompt_token_distribution .percentiles ([1 , 5 , 50 , 95 , 99 ])
482
+
483
+ @computed_field
484
+ @property
485
+ def output_token (self ) -> float :
486
+ """
487
+ Get the average number of output tokens.
488
+
489
+ :return: The average number of output tokens.
490
+ :rtype: float
491
+ """
492
+ return self .output_token_distribution .mean
493
+
416
494
@property
417
495
def output_token_distribution (self ) -> Distribution :
418
496
"""
@@ -423,6 +501,18 @@ def output_token_distribution(self) -> Distribution:
423
501
"""
424
502
return Distribution (data = [result .output_token_count for result in self .results ])
425
503
504
+ @computed_field
505
+ @property
506
+ def output_token_percentiles (self ) -> List [float ]:
507
+ """
508
+ Get standard percentiles for number of output tokens.
509
+
510
+ :return: List of percentiles of number of output tokens.
511
+ :rtype: List[float]
512
+ """
513
+ return self .output_token_distribution .percentiles ([1 , 5 , 50 , 95 , 99 ])
514
+
515
+ @computed_field
426
516
@property
427
517
def overloaded (self ) -> bool :
428
518
if (
0 commit comments