Skip to content

Commit 7f39986

Browse files
authored
Refactor Benchmark Report Generation and Update Backend Configurations (#33)
## Summary This PR refactors the benchmark report generation process and updates backend configurations for better performance and clarity. The main changes include restructuring the entry point for benchmark generation, consolidating backend configurations, and improving error handling for better user experience. ## Details - Introduced `generate_benchmark_report_cli` as the main CLI entry point for benchmark generation. - Refactored `generate_benchmark_report` to handle various input configurations and streamline the execution process. - Removed unused parameters (`host`, `port`) and consolidated backend URL configurations into a single `target` parameter. - Updated `OpenAIBackend` initialization to eliminate ambiguity around base URL settings. - Enhanced error handling and logging, ensuring informative messages for missing or incorrect configurations. - Modified `EmulatedRequestGenerator` and `FileRequestGenerator` to handle optional configuration inputs. - Removed obsolete CLI test files and restructured e2e tests to align with the new CLI entry point. - Added comprehensive unit tests for the main benchmark generation functions and CLI commands.
1 parent 59ba130 commit 7f39986

File tree

15 files changed

+453
-246
lines changed

15 files changed

+453
-246
lines changed

src/guidellm/__init__.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,17 @@
33
evaluating and benchmarking large language models (LLMs).
44
"""
55

6+
# flake8: noqa
7+
68
import os
9+
import transformers # type: ignore
10+
11+
os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers
12+
transformers.logging.set_verbosity_error() # Silence warnings for transformers
713

8-
import transformers # type: ignore # noqa: PGH003
914

1015
from .config import settings
1116
from .logger import configure_logger, logger
17+
from .main import generate_benchmark_report
1218

13-
__all__ = ["configure_logger", "logger", "settings"]
14-
15-
16-
os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers
17-
transformers.logging.set_verbosity_error() # Silence warnings for transformers
19+
__all__ = ["configure_logger", "logger", "settings", "generate_benchmark_report"]

src/guidellm/backend/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
from .base import Backend, BackendEngine, GenerativeResponse
1+
from .base import Backend, BackendEngine, BackendEnginePublic, GenerativeResponse
22
from .openai import OpenAIBackend
33

44
__all__ = [
55
"Backend",
66
"BackendEngine",
7+
"BackendEnginePublic",
78
"GenerativeResponse",
89
"OpenAIBackend",
910
]

src/guidellm/backend/base.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
import functools
22
from abc import ABC, abstractmethod
3-
from typing import AsyncGenerator, Dict, List, Literal, Optional, Type
3+
from typing import AsyncGenerator, Dict, List, Literal, Optional, Type, Union
44

55
from loguru import logger
66
from pydantic import BaseModel
77

88
from guidellm.core import TextGenerationRequest, TextGenerationResult
99

10-
__all__ = ["Backend", "BackendEngine", "GenerativeResponse"]
10+
__all__ = ["Backend", "BackendEngine", "BackendEnginePublic", "GenerativeResponse"]
1111

1212

13-
BackendEngine = Literal["test", "openai_server"]
13+
BackendEnginePublic = Literal["openai_server"]
14+
BackendEngine = Union[BackendEnginePublic, Literal["test"]]
1415

1516

1617
class GenerativeResponse(BaseModel):
@@ -87,7 +88,6 @@ def create(cls, backend_type: BackendEngine, **kwargs) -> "Backend":
8788
:param backend_type: The type of backend to create.
8889
:type backend_type: BackendEngine
8990
:param kwargs: Additional arguments for backend initialization.
90-
:type kwargs: dict
9191
:return: An instance of a subclass of Backend.
9292
:rtype: Backend
9393
:raises ValueError: If the backend type is not registered.

src/guidellm/backend/openai.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,6 @@ class OpenAIBackend(Backend):
2323
:type openai_api_key: Optional[str]
2424
:param target: The target URL string for the OpenAI server.
2525
:type target: Optional[str]
26-
:param host: Optional host for the OpenAI server.
27-
:type host: Optional[str]
28-
:param port: Optional port for the OpenAI server.
29-
:type port: Optional[int]
3026
:param model: The OpenAI model to use, defaults to the first available model.
3127
:type model: Optional[str]
3228
:param request_args: Additional arguments for the OpenAI request.
@@ -37,8 +33,6 @@ def __init__(
3733
self,
3834
openai_api_key: Optional[str] = None,
3935
target: Optional[str] = None,
40-
host: Optional[str] = None,
41-
port: Optional[int] = None,
4236
model: Optional[str] = None,
4337
**request_args,
4438
):
@@ -54,16 +48,12 @@ def __init__(
5448
logger.error("{}", err)
5549
raise err
5650

57-
if target:
58-
base_url = target
59-
elif host and port:
60-
base_url = f"{host}:{port}/v1"
61-
elif settings.openai.base_url:
62-
base_url = settings.openai.base_url
63-
else:
51+
base_url = target or settings.openai.base_url
52+
53+
if not base_url:
6454
err = ValueError(
6555
"`GUIDELLM__OPENAI__BASE_URL` environment variable or "
66-
"--target CLI parameter must be specified for the OpenAI backend."
56+
"target parameter must be specified for the OpenAI backend."
6757
)
6858
logger.error("{}", err)
6959
raise err

src/guidellm/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ class OpenAISettings(BaseModel):
102102

103103
# OpenAI-compatible server URL
104104
# NOTE: The default value is default address of llama.cpp web server
105-
base_url: str = "http://localhost:8080"
105+
base_url: str = "http://localhost:8000/v1"
106106

107107
max_gen_tokens: int = 4096
108108

0 commit comments

Comments
 (0)