Skip to content

Commit 59ba130

Browse files
authored
Add Detailed Benchmark Progress Tracking and Enhanced Report Generation Features (#28)
## Summary This PR introduces enhancements to the benchmarking and report generation functionalities in the GuideLLM project. Key updates include the integration of detailed progress tracking using the rich library, improvements to performance statistics, and adjustments to report output formats. ## Details - Added `BenchmarkReportProgress` class to manage real-time progress updates for benchmarks and report generation using `rich`. - Updated the report structure to include comprehensive performance statistics, such as request latency, time to first token, and inter-token latency. - Introduced new methods to generate detailed tables summarizing requests, data tokens, and performance metrics in reports. - Enhanced the executor logic to handle different benchmark modes (e.g., sweep, synchronous, throughput) more effectively. - Refined error handling and logging for better clarity and debugging. - Modified unit tests to cover new functionalities and ensure robustness across various scenarios. - Adjusted configuration and settings to optimize benchmarking capabilities and ensure consistent output. ## Test Plan - Added new unit tests to verify the functionality of `BenchmarkReportProgress` and other report generation methods. - Performed end-to-end testing of different benchmark modes to ensure accurate real-time progress tracking and report generation. - Verified that all existing tests pass without any regression failures. <img width="716" alt="Screen Shot 2024-08-22 at 4 21 29 AM" src="https://github.com/user-attachments/assets/cf336607-3be2-47aa-b570-5bf7c44da1be"> <img width="1279" alt="Screen Shot 2024-08-22 at 4 59 21 AM" src="https://github.com/user-attachments/assets/fe6a167b-1fa3-41ca-b95d-882ddd08bc1c">
1 parent eb930bf commit 59ba130

31 files changed

+1592
-250
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ repos:
2626
pydantic_settings,
2727
pyyaml,
2828
requests,
29+
rich,
2930
transformers,
3031

3132
# dev dependencies

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ dependencies = [
3535
"pydantic-settings>=2.0.0",
3636
"pyyaml>=6.0.0",
3737
"requests",
38+
"rich",
3839
"transformers",
3940
]
4041

@@ -182,6 +183,7 @@ select = [
182183
"N806", # allow uppercase variable names in tests
183184
"PGH003", # allow general ignores in tests
184185
"S106", # allow hardcoded passwords in tests
186+
"PLR0915", # allow complext statements in tests
185187
]
186188

187189
[tool.ruff.lint.isort]

src/guidellm/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,15 @@
33
evaluating and benchmarking large language models (LLMs).
44
"""
55

6+
import os
7+
8+
import transformers # type: ignore # noqa: PGH003
9+
610
from .config import settings
711
from .logger import configure_logger, logger
812

913
__all__ = ["configure_logger", "logger", "settings"]
14+
15+
16+
os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers
17+
transformers.logging.set_verbosity_error() # Silence warnings for transformers

src/guidellm/backend/base.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ class Backend(ABC):
5151
5252
:cvar _registry: A dictionary that maps BackendEngine types to backend classes.
5353
:type _registry: Dict[BackendEngine, Type[Backend]]
54+
:param type_: The type of the backend.
55+
:type type_: BackendEngine
56+
:param target: The target URL for the backend.
57+
:type target: str
58+
:param model: The model used by the backend.
59+
:type model: str
5460
"""
5561

5662
_registry: Dict[BackendEngine, "Type[Backend]"] = {}
@@ -96,6 +102,11 @@ def create(cls, backend_type: BackendEngine, **kwargs) -> "Backend":
96102

97103
return Backend._registry[backend_type](**kwargs)
98104

105+
def __init__(self, type_: BackendEngine, target: str, model: str):
106+
self._type = type_
107+
self._target = target
108+
self._model = model
109+
99110
@property
100111
def default_model(self) -> str:
101112
"""
@@ -107,6 +118,36 @@ def default_model(self) -> str:
107118
"""
108119
return _cachable_default_model(self)
109120

121+
@property
122+
def type_(self) -> BackendEngine:
123+
"""
124+
Get the type of the backend.
125+
126+
:return: The type of the backend.
127+
:rtype: BackendEngine
128+
"""
129+
return self._type
130+
131+
@property
132+
def target(self) -> str:
133+
"""
134+
Get the target URL for the backend.
135+
136+
:return: The target URL.
137+
:rtype: str
138+
"""
139+
return self._target
140+
141+
@property
142+
def model(self) -> str:
143+
"""
144+
Get the model used by the backend.
145+
146+
:return: The model name.
147+
:rtype: str
148+
"""
149+
return self._model
150+
110151
async def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
111152
"""
112153
Submit a text generation request and return the result.

src/guidellm/backend/openai.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,19 @@ def __init__(
4545
self._request_args: Dict = request_args
4646
api_key: str = openai_api_key or settings.openai.api_key
4747

48+
if not api_key:
49+
err = ValueError(
50+
"`GUIDELLM__OPENAI__API_KEY` environment variable or "
51+
"--openai-api-key CLI parameter must be specified for the "
52+
"OpenAI backend."
53+
)
54+
logger.error("{}", err)
55+
raise err
56+
4857
if target:
4958
base_url = target
5059
elif host and port:
51-
base_url = f"{host}:{port}"
60+
base_url = f"{host}:{port}/v1"
5261
elif settings.openai.base_url:
5362
base_url = settings.openai.base_url
5463
else:
@@ -61,22 +70,11 @@ def __init__(
6170

6271
self._async_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
6372
self._client = OpenAI(api_key=api_key, base_url=base_url)
64-
65-
self.validate_connection()
6673
self._model = model or self.default_model
6774

75+
super().__init__(type_="openai_server", target=base_url, model=self._model)
6876
logger.info("OpenAI {} Backend listening on {}", self._model, base_url)
6977

70-
@property
71-
def model(self) -> str:
72-
"""
73-
Get the model used by this backend.
74-
75-
:return: The model name.
76-
:rtype: str
77-
"""
78-
return self._model
79-
8078
async def make_request(
8179
self,
8280
request: TextGenerationRequest,

src/guidellm/config.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class LoggingSettings(BaseModel):
4343

4444
disabled: bool = False
4545
clear_loggers: bool = True
46-
console_log_level: str = "INFO"
46+
console_log_level: str = "WARNING"
4747
log_file: Optional[str] = None
4848
log_file_level: Optional[str] = None
4949

@@ -98,7 +98,7 @@ class OpenAISettings(BaseModel):
9898
"""
9999

100100
# OpenAI API key.
101-
api_key: str = ""
101+
api_key: str = "invalid_token"
102102

103103
# OpenAI-compatible server URL
104104
# NOTE: The default value is default address of llama.cpp web server
@@ -141,8 +141,8 @@ class Settings(BaseSettings):
141141
# general settings
142142
env: Environment = Environment.PROD
143143
request_timeout: int = 30
144-
max_concurrency: int = 128
145-
num_sweep_profiles: int = 10
144+
max_concurrency: int = 512
145+
num_sweep_profiles: int = 9
146146
logging: LoggingSettings = LoggingSettings()
147147

148148
# Data settings

0 commit comments

Comments
 (0)