Skip to content

Commit eb930bf

Browse files
authored
Refactor and Optimize Backend, Scheduler, and Test Suites for Improved Performance and Maintainability (#27)
## Summary Introduces significant refactors and optimizations across the backend, scheduler, and test suites to enhance performance, maintainability, and code clarity. Key changes include modularization of code, simplifying complex components, and expanding unit tests to ensure robust functionality. ## Details - **Backend Enhancements:** - Refactored `base.py` and `openai.py` for better separation of concerns and modularity. - Introduced comprehensive logging and error handling improvements. - Simplified configuration handling by merging `config/base.py` into `config.py`. - **Scheduler Improvements:** - Added `base.py` and optimized `load_generator.py` to handle complex scheduling logic more efficiently. - Removed deprecated methods in `scheduler.py` and transferred necessary logic to new base classes. - **Test Suite Overhaul:** - Expanded and reorganized unit tests, covering new and existing functionality. - Migrated and refactored integration tests for better alignment with the updated codebase. - Introduced more thorough test coverage for backend and scheduler components, ensuring reliability. ## Test Plan - **Automated Testing:** - All existing unit tests have been updated to reflect the changes. - New tests have been added to cover additional edge cases and new functionality. - **Manual Testing:** - Verified that core functionalities of the backend and scheduler work as expected. - Ensured no regressions were introduced by the refactor. ## Unittest Coverage Report ``` Name Stmts Miss Cover Missing -------------------------------------------------------------------------- src/guidellm/__init__.py 3 0 100% src/guidellm/backend/__init__.py 3 0 100% src/guidellm/backend/base.py 76 8 89% 150-155, 183, 197, 219-221 src/guidellm/backend/openai.py 59 7 88% 103, 162-164, 178-180 src/guidellm/config.py 55 0 100% src/guidellm/core/__init__.py 6 0 100% src/guidellm/core/distribution.py 92 0 100% src/guidellm/core/report.py 7 0 100% src/guidellm/core/request.py 10 0 100% src/guidellm/core/result.py 125 2 98% 104, 259 src/guidellm/core/serializable.py 64 1 98% 105 src/guidellm/executor/__init__.py 3 0 100% src/guidellm/executor/base.py 55 0 100% src/guidellm/executor/profile_generator.py 113 16 86% 98, 149-151, 173-175, 207-209, 287-289, 295-297 src/guidellm/logger.py 17 0 100% src/guidellm/main.py 55 55 0% 1-172 src/guidellm/request/__init__.py 5 0 100% src/guidellm/request/base.py 67 0 100% src/guidellm/request/emulated.py 130 3 98% 64, 79, 90 src/guidellm/request/file.py 26 0 100% src/guidellm/request/transformers.py 31 0 100% src/guidellm/scheduler/__init__.py 3 0 100% src/guidellm/scheduler/base.py 138 24 83% 294-328, 334, 352-355 src/guidellm/scheduler/load_generator.py 74 5 93% 104-105, 139, 165, 175 src/guidellm/utils/__init__.py 4 0 100% src/guidellm/utils/injector.py 20 0 100% src/guidellm/utils/text.py 201 22 89% 79-81, 86-88, 155-156, 166, 186, 190-191, 212-213, 245, 249, 304-305, 324, 344, 385, 443 src/guidellm/utils/transformers.py 53 0 100% -------------------------------------------------------------------------- TOTAL 1495 143 90% ```
1 parent aee89e8 commit eb930bf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+7429
-2400
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ repos:
1818
# main dependencies
1919
click,
2020
datasets,
21+
ftfy,
2122
loguru,
2223
numpy,
2324
openai,
@@ -30,6 +31,7 @@ repos:
3031
# dev dependencies
3132
pytest,
3233
pydantic_settings,
34+
requests-mock,
3335

3436
# types
3537
types-click,

pyproject.toml

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ urls = { homepage = "https://github.com/neuralmagic/guidellm" }
2727
dependencies = [
2828
"click",
2929
"datasets",
30+
"ftfy>=6.0.0",
3031
"loguru",
3132
"numpy",
3233
"openai",
@@ -41,13 +42,16 @@ dependencies = [
4142
dev = [
4243
# general and configurations
4344
"pre-commit~=3.5.0",
45+
"scipy~=1.10",
4446
"sphinx~=7.1.2",
4547
"tox~=4.16.0",
4648

4749
# testing
4850
"pytest~=8.2.2",
51+
"pytest-asyncio~=0.23.8",
4952
"pytest-cov~=5.0.0",
5053
"pytest-mock~=3.14.0",
54+
"pytest-rerunfailures~=14.0",
5155
"requests-mock~=1.12.1",
5256

5357
# code quality
@@ -83,7 +87,7 @@ profile = "black"
8387
files = ["src/guidellm", "tests"]
8488
python_version = '3.8'
8589
warn_redundant_casts = true
86-
warn_unused_ignores = true
90+
warn_unused_ignores = false
8791
show_error_codes = true
8892
namespace_packages = true
8993
exclude = ["venv", ".tox"]
@@ -92,22 +96,27 @@ exclude = ["venv", ".tox"]
9296
# Check: https://mypy.readthedocs.io/en/latest/config_file.html#import-discovery
9397
follow_imports = 'silent'
9498

95-
[[tool.mypy.overrides]]
96-
module = ["transformers.*", "datasets.*"]
97-
ignore_missing_imports=true
98-
9999

100100
[tool.ruff]
101101
line-length = 88
102+
indent-width = 4
102103
exclude = ["build", "dist", "env", ".venv"]
103-
lint.ignore = [
104+
105+
[tool.ruff.format]
106+
quote-style = "double"
107+
indent-style = "space"
108+
109+
[tool.ruff.lint]
110+
ignore = [
104111
"PLR0913",
105112
"TCH001",
106113
"COM812",
107114
"ISC001",
108115
"TCH002",
116+
"PLW1514", # allow Path.open without encoding
117+
109118
]
110-
lint.select = [
119+
select = [
111120
# Rules reference: https://docs.astral.sh/ruff/rules/
112121

113122
# Code Style / Formatting
@@ -127,11 +136,11 @@ lint.select = [
127136
"Q", # flake8-quotes: enforces consistent use of single or double quotes
128137
"TCH", # flake8-type-checking: enforces type checking practices and standards
129138
"TID", # flake8-tidy-imports: enforces tidy and well-organized imports
139+
"RUF022", # flake8-ruff: enforce sorting of __all__ in modules
130140

131141
# Code Structure / Complexity
132142
"C4", # flake8-comprehensions: improves readability and performance of list, set, and dict comprehensions
133143
"C90", # mccabe: checks for overly complex code using cyclomatic complexity
134-
"FBT", # flake8-boolean-trap: prevents the use of boolean traps in function arguments and calls
135144
"ISC", # flake8-implicit-str-concat: prevents implicit string concatenation
136145
"PIE", # flake8-pie: identifies and corrects common code inefficiencies and mistakes
137146
"R", # Refactor: suggests improvements to code structure and readability
@@ -164,7 +173,6 @@ lint.select = [
164173
"tests/**/*.py" = [
165174
"S101", # asserts allowed in tests
166175
"ARG", # Unused function args allowed in tests
167-
"FBT", # Booleans as positional arguments in tests, e.g. via @pytest.mark.parametrize()
168176
"PLR2004", # Magic value used in comparison
169177
"TCH002", # No import only type checking in tests
170178
"SLF001", # enable private member access in tests
@@ -173,8 +181,12 @@ lint.select = [
173181
"PT011", # allow generic exceptions in tests
174182
"N806", # allow uppercase variable names in tests
175183
"PGH003", # allow general ignores in tests
184+
"S106", # allow hardcoded passwords in tests
176185
]
177186

187+
[tool.ruff.lint.isort]
188+
known-first-party = ["guidellm", "tests"]
189+
178190

179191
[tool.pytest.ini_options]
180192
addopts = '-s -vvv --cache-clear'

src/guidellm/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,7 @@
33
evaluating and benchmarking large language models (LLMs).
44
"""
55

6-
from .logger import configure_logger, logger # noqa: F401
6+
from .config import settings
7+
from .logger import configure_logger, logger
8+
9+
__all__ = ["configure_logger", "logger", "settings"]

src/guidellm/backend/base.py

Lines changed: 111 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,39 @@
11
import functools
22
from abc import ABC, abstractmethod
3-
from dataclasses import dataclass
4-
from enum import Enum
5-
from typing import Dict, Iterator, List, Optional, Type
3+
from typing import AsyncGenerator, Dict, List, Literal, Optional, Type
64

75
from loguru import logger
6+
from pydantic import BaseModel
87

98
from guidellm.core import TextGenerationRequest, TextGenerationResult
109

1110
__all__ = ["Backend", "BackendEngine", "GenerativeResponse"]
1211

1312

14-
class BackendEngine(str, Enum):
15-
"""
16-
Determines the Engine of the LLM Backend.
17-
All the implemented backends in the project have the engine.
18-
19-
NOTE: the `TEST` engine has to be used only for testing purposes.
20-
"""
13+
BackendEngine = Literal["test", "openai_server"]
2114

22-
TEST = "test"
23-
OPENAI_SERVER = "openai_server"
2415

25-
26-
@dataclass
27-
class GenerativeResponse:
16+
class GenerativeResponse(BaseModel):
2817
"""
29-
A dataclass to represent a response from a generative AI backend.
18+
A model representing a response from a generative AI backend.
19+
20+
:param type_: The type of response, either 'token_iter' for intermediate
21+
token output or 'final' for the final result.
22+
:type type_: Literal["token_iter", "final"]
23+
:param add_token: The token to add to the output
24+
(only applicable if type_ is 'token_iter').
25+
:type add_token: Optional[str]
26+
:param prompt: The original prompt sent to the backend.
27+
:type prompt: Optional[str]
28+
:param output: The final generated output (only applicable if type_ is 'final').
29+
:type output: Optional[str]
30+
:param prompt_token_count: The number of tokens in the prompt.
31+
:type prompt_token_count: Optional[int]
32+
:param output_token_count: The number of tokens in the output.
33+
:type output_token_count: Optional[int]
3034
"""
3135

32-
type_: str # One of 'token_iter', 'final'
36+
type_: Literal["token_iter", "final"]
3337
add_token: Optional[str] = None
3438
prompt: Optional[str] = None
3539
output: Optional[str] = None
@@ -39,7 +43,14 @@ class GenerativeResponse:
3943

4044
class Backend(ABC):
4145
"""
42-
An abstract base class with template methods for generative AI backends.
46+
Abstract base class for generative AI backends.
47+
48+
This class provides a common interface for creating and interacting with different
49+
generative AI backends. Subclasses should implement the abstract methods to
50+
define specific backend behavior.
51+
52+
:cvar _registry: A dictionary that maps BackendEngine types to backend classes.
53+
:type _registry: Dict[BackendEngine, Type[Backend]]
4354
"""
4455

4556
_registry: Dict[BackendEngine, "Type[Backend]"] = {}
@@ -50,33 +61,38 @@ def register(cls, backend_type: BackendEngine):
5061
A decorator to register a backend class in the backend registry.
5162
5263
:param backend_type: The type of backend to register.
53-
:type backend_type: BackendType
64+
:type backend_type: BackendEngine
65+
:return: The decorated backend class.
66+
:rtype: Type[Backend]
5467
"""
5568

5669
def inner_wrapper(wrapped_class: Type["Backend"]):
5770
cls._registry[backend_type] = wrapped_class
71+
logger.info("Registered backend type: {}", backend_type)
5872
return wrapped_class
5973

6074
return inner_wrapper
6175

6276
@classmethod
6377
def create(cls, backend_type: BackendEngine, **kwargs) -> "Backend":
6478
"""
65-
Factory method to create a backend based on the backend type.
79+
Factory method to create a backend instance based on the backend type.
6680
6781
:param backend_type: The type of backend to create.
68-
:type backend_type: BackendType
82+
:type backend_type: BackendEngine
6983
:param kwargs: Additional arguments for backend initialization.
7084
:type kwargs: dict
7185
:return: An instance of a subclass of Backend.
7286
:rtype: Backend
87+
:raises ValueError: If the backend type is not registered.
7388
"""
7489

75-
logger.info(f"Creating backend of type {backend_type}")
90+
logger.info("Creating backend of type {}", backend_type)
7691

7792
if backend_type not in cls._registry:
78-
logger.error(f"Unsupported backend type: {backend_type}")
79-
raise ValueError(f"Unsupported backend type: {backend_type}")
93+
err = ValueError(f"Unsupported backend type: {backend_type}")
94+
logger.error("{}", err)
95+
raise err
8096

8197
return Backend._registry[backend_type](**kwargs)
8298

@@ -87,82 +103,119 @@ def default_model(self) -> str:
87103
88104
:return: The default model.
89105
:rtype: str
106+
:raises ValueError: If no models are available.
90107
"""
91108
return _cachable_default_model(self)
92109

93-
def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
110+
async def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
94111
"""
95-
Submit a result request and populate the BenchmarkResult.
112+
Submit a text generation request and return the result.
96113
97-
:param request: The result request to submit.
114+
This method handles the request submission to the backend and processes
115+
the response in a streaming fashion if applicable.
116+
117+
:param request: The request object containing the prompt
118+
and other configurations.
98119
:type request: TextGenerationRequest
99-
:return: The populated result result.
120+
:return: The result of the text generation request.
100121
:rtype: TextGenerationResult
122+
:raises ValueError: If no response is received from the backend.
101123
"""
102124

103-
logger.info(f"Submitting request with prompt: {request.prompt}")
125+
logger.debug("Submitting request with prompt: {}", request.prompt)
104126

105-
result = TextGenerationResult(
106-
request=TextGenerationRequest(prompt=request.prompt),
107-
)
127+
result = TextGenerationResult(request=request)
108128
result.start(request.prompt)
129+
received_final = False
109130

110-
for response in self.make_request(request): # GenerativeResponse
111-
if response.type_ == "token_iter" and response.add_token:
112-
result.output_token(response.add_token)
131+
async for response in self.make_request(request):
132+
logger.debug("Received response: {}", response)
133+
if response.type_ == "token_iter":
134+
result.output_token(response.add_token if response.add_token else "")
113135
elif response.type_ == "final":
136+
if received_final:
137+
err = ValueError(
138+
"Received multiple final responses from the backend."
139+
)
140+
logger.error(err)
141+
raise err
142+
114143
result.end(
144+
output=response.output,
115145
prompt_token_count=response.prompt_token_count,
116146
output_token_count=response.output_token_count,
117147
)
148+
received_final = True
149+
else:
150+
err = ValueError(
151+
f"Invalid response received from the backend of type: "
152+
f"{response.type_} for {response}"
153+
)
154+
logger.error(err)
155+
raise err
118156

119-
logger.info(f"Request completed with output: {result.output}")
157+
if not received_final:
158+
err = ValueError("No final response received from the backend.")
159+
logger.error(err)
160+
raise err
161+
162+
logger.info("Request completed with output: {}", result.output)
120163

121164
return result
122165

123166
@abstractmethod
124-
def make_request(
167+
async def make_request(
125168
self,
126169
request: TextGenerationRequest,
127-
) -> Iterator[GenerativeResponse]:
170+
) -> AsyncGenerator[GenerativeResponse, None]:
128171
"""
129172
Abstract method to make a request to the backend.
130173
131-
:param request: The result request to submit.
174+
Subclasses must implement this method to define how requests are handled
175+
by the backend.
176+
177+
:param request: The request object containing the prompt and
178+
other configurations.
132179
:type request: TextGenerationRequest
133-
:return: An iterator over the generative responses.
134-
:rtype: Iterator[GenerativeResponse]
180+
:yield: A generator yielding responses from the backend.
181+
:rtype: AsyncGenerator[GenerativeResponse, None]
135182
"""
136-
raise NotImplementedError
183+
yield None # type: ignore # noqa: PGH003
137184

138185
@abstractmethod
139186
def available_models(self) -> List[str]:
140187
"""
141188
Abstract method to get the available models for the backend.
142189
190+
Subclasses must implement this method to provide the list of models
191+
supported by the backend.
192+
143193
:return: A list of available models.
144194
:rtype: List[str]
145-
"""
146-
raise NotImplementedError
147-
148-
@abstractmethod
149-
def model_tokenizer(self, model: str) -> Optional[str]:
150-
"""
151-
Abstract method to get the tokenizer for a model.
152-
153-
:param model: The model to get the tokenizer for.
154-
:type model: str
155-
:return: The tokenizer for the model, or None if it cannot be created.
156-
:rtype: Optional[str]
195+
:raises NotImplementedError: If the method is not implemented by a subclass.
157196
"""
158197
raise NotImplementedError
159198

160199

161200
@functools.lru_cache(maxsize=1)
162201
def _cachable_default_model(backend: Backend) -> str:
163-
if models := backend.available_models():
164-
logger.debug(f"Default model: {models[0]}")
202+
"""
203+
Get the default model for a backend using LRU caching.
204+
205+
This function caches the default model to optimize repeated lookups.
206+
207+
:param backend: The backend instance for which to get the default model.
208+
:type backend: Backend
209+
:return: The default model.
210+
:rtype: str
211+
:raises ValueError: If no models are available.
212+
"""
213+
logger.debug("Getting default model for backend: {}", backend)
214+
models = backend.available_models()
215+
if models:
216+
logger.debug("Default model: {}", models[0])
165217
return models[0]
166218

167-
logger.error("No models available.")
168-
raise ValueError("No models available.")
219+
err = ValueError("No models available.")
220+
logger.error(err)
221+
raise err

0 commit comments

Comments
 (0)