Skip to content

Commit 8494b7a

Browse files
parfeniukinkDmytro Parfeniukmarkurtz
authored
💄CLI interface improvements (#18)
# Summary - The `click` CLI interface is tested with a bunch of unit tests - The `main` function validation is added. - `OpenAIBackend` initializer parameters are optimized - target, host, and port parameters usage is simplified - `openai.NotFound` **_available models_** error is handled - `SerializableFileType` renamed to `SerializableFileExtension` - `SerializableFileExtension` now inherits `str` to simplify usage, since this Enum class is mostly used to work with strings. - `rate_type_to_load_gen_mode` renamed to `RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER` - `rate_type_to_profile_mode` renamed to `RATE_TYPE_TO_PROFILE_MODE_MAPPER` - CLI parameters are renamed: - `--num-seconds` -> `--max-seconds` - `--num-requests` -> `--max-requests` - `path` removed from CLI arguments since it is not used - .env `GUIDELLM` prefix is fixed - Unused comments, settings, and code are removed - Logger default unit test uses the injected logging settings object - Module `backend.openai` has `_base_url` renamed to the `base_url` - In `OpenAIBackend.make_request`, the `GenerativeResponse` always counts `output_tokens` with `self._token_count` - `SerializableFileExtensions` is replaced with pure Python strings --------- Co-authored-by: Dmytro Parfeniuk <parfeniukinik@gmail.com> Co-authored-by: Mark Kurtz <mark.kurtz@neuralmagic.com>
1 parent 996de81 commit 8494b7a

25 files changed

+335
-141
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,8 @@ cython_debug/
164164

165165
# MacOS files
166166
.DS_Store
167+
168+
169+
# Project specific files
170+
*.json
171+
*.yaml

DEVELOPING.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,37 @@ The project is fully configurable with environment variables. With that configur
228228
| `GUIDELLM__OPENAI__BASE_URL` | `http://localhost:8080` | The address to the **OpenAI-compatible** server.<br><br>OpenAI live base url is `https://api.openai.com/v1` |
229229
| `GUIDELLM__OPENAI__API_KEY` | `invalid` | Corresponds to the **OpenAI-compatible** server API key.<br><br>If you look for the live key - check [this link](https://platform.openai.com/api-keys). |
230230

231+
<br>
232+
233+
## Project configuration
234+
235+
The project configuartion is powered by _[`🔗 pydantic-settings`](https://docs.pydantic.dev/latest/concepts/pydantic_settings/)_
236+
237+
The project configuration entrypoint is represented by lazy-loaded `settigns` singleton object ( `src/config/__init__` )
238+
239+
The project is fully configurable with environment variables. All the default values and
240+
241+
```py
242+
class NestedIntoLogging(BaseModel):
243+
nested: str = "default value"
244+
245+
class LoggingSettings(BaseModel):
246+
# ...
247+
disabled: bool = False
248+
249+
250+
class Settings(BaseSettings):
251+
"""The entrypoint to settings."""
252+
253+
# ...
254+
logging: LoggingSettings = LoggingSettings()
255+
256+
257+
settings = Settings()
258+
```
259+
260+
With that configuration set you can load parameters to `LoggingSettings()` by using environment variables. Just run `export GUIDELLM__LOGGING__DISABLED=true` or `export GUIDELLM__LOGGING__NESTED=another_value` respectfully. The nesting delimiter is `__`
261+
231262
## Contact and Support
232263

233264
If you need help or have any questions, please open an issue on GitHub or contact us at support@neuralmagic.com.

src/config/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,12 @@ class OpenAISettings(BaseModel):
2020
api_key: str = "invalid"
2121

2222
# OpenAI-compatible server URL
23-
# NOTE: The default value is default address of llama.cpp web server
23+
# NOTE: The default value is default address of llama.cpp http server
2424
base_url: str = "http://localhost:8080"
2525

26+
# The max value of generated tokens
27+
max_gen_tokens: int = 4096
28+
2629

2730
class Settings(BaseSettings):
2831
"""
@@ -39,7 +42,7 @@ class Settings(BaseSettings):
3942
"""
4043

4144
model_config = SettingsConfigDict(
42-
env_prefix="GUIDELLM",
45+
env_prefix="GUIDELLM__",
4346
env_nested_delimiter="__",
4447
env_file=".env",
4548
extra="ignore",

src/guidellm/__init__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,4 @@
33
evaluating and benchmarking large language models (LLMs).
44
"""
55

6-
from .logger import configure_logger, logger
7-
8-
__all__ = ["logger", "configure_logger"]
6+
from .logger import configure_logger, logger # noqa: F401

src/guidellm/backend/openai.py

Lines changed: 32 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import functools
21
from typing import Any, Dict, Generator, List, Optional
32

3+
import openai
44
from loguru import logger
55
from openai import OpenAI, Stream
66
from openai.types import Completion
@@ -37,9 +37,11 @@ class OpenAIBackend(Backend):
3737
def __init__(
3838
self,
3939
openai_api_key: Optional[str] = None,
40-
internal_callback_url: Optional[str] = None,
40+
target: Optional[str] = None,
41+
host: Optional[str] = None,
42+
port: Optional[int] = None,
4143
model: Optional[str] = None,
42-
**request_args: Any,
44+
**request_args,
4345
):
4446
"""
4547
Initialize an OpenAI Client
@@ -54,19 +56,22 @@ def __init__(
5456
"must be specify for the OpenAI backend"
5557
)
5658

57-
if not (_base_url := (internal_callback_url or settings.openai.base_url)):
59+
if target is not None:
60+
base_url = target
61+
elif host and port:
62+
base_url = f"{host}:{port}"
63+
elif settings.openai.base_url is not None:
64+
base_url = settings.openai.base_url
65+
else:
5866
raise ValueError(
5967
"`GUIDELLM__OPENAI__BASE_URL` environment variable "
60-
"or --openai-base-url CLI parameter "
61-
"must be specify for the OpenAI backend"
68+
"or --target CLI parameter must be specify for the OpenAI backend."
6269
)
63-
self.openai_client = OpenAI(api_key=_api_key, base_url=_base_url)
70+
71+
self.openai_client = OpenAI(api_key=_api_key, base_url=base_url)
6472
self.model = model or self.default_model
6573

66-
logger.info(
67-
f"Initialized OpenAIBackend with callback url: {internal_callback_url} "
68-
f"and model: {self.model}"
69-
)
74+
logger.info(f"OpenAI {self.model} Backend listening on {target}")
7075

7176
def make_request(
7277
self, request: TextGenerationRequest
@@ -85,8 +90,11 @@ def make_request(
8590
# How many completions to generate for each prompt
8691
request_args: Dict = {"n": 1}
8792

88-
if (num_gen_tokens := request.params.get("generated_tokens", None)) is not None:
89-
request_args.update(max_tokens=num_gen_tokens, stop=None)
93+
num_gen_tokens: int = (
94+
request.params.get("generated_tokens", None)
95+
or settings.openai.max_gen_tokens
96+
)
97+
request_args.update({"max_tokens": num_gen_tokens, "stop": None})
9098

9199
if self.request_args:
92100
request_args.update(self.request_args)
@@ -110,11 +118,7 @@ def make_request(
110118
prompt_token_count=(
111119
request.prompt_token_count or self._token_count(request.prompt)
112120
),
113-
output_token_count=(
114-
num_gen_tokens
115-
if num_gen_tokens
116-
else self._token_count(chunk_content)
117-
),
121+
output_token_count=(self._token_count(chunk_content)),
118122
)
119123
else:
120124
logger.debug("Received token from OpenAI backend")
@@ -128,15 +132,18 @@ def available_models(self) -> List[str]:
128132
:rtype: List[str]
129133
"""
130134

131-
models: List[str] = [
132-
model.id for model in self.openai_client.models.list().data
133-
]
134-
logger.info(f"Available models: {models}")
135-
136-
return models
135+
try:
136+
models: List[str] = [
137+
model.id for model in self.openai_client.models.list().data
138+
]
139+
except openai.NotFoundError as error:
140+
logger.error("No available models for OpenAI Backend")
141+
raise error
142+
else:
143+
logger.info(f"Available models: {models}")
144+
return models
137145

138146
@property
139-
@functools.lru_cache(maxsize=1)
140147
def default_model(self) -> str:
141148
"""
142149
Get the default model for the backend.

src/guidellm/core/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
TextGenerationError,
99
TextGenerationResult,
1010
)
11-
from .serializable import Serializable, SerializableFileType
11+
from .serializable import Serializable
1212

1313
__all__ = [
1414
"Distribution",
@@ -19,6 +19,5 @@
1919
"TextGenerationBenchmarkReport",
2020
"RequestConcurrencyMeasurement",
2121
"Serializable",
22-
"SerializableFileType",
2322
"GuidanceReport",
2423
]

src/guidellm/core/report.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
from pydantic import Field
44

5-
from guidellm.core.serializable import Serializable
65
from guidellm.core.result import TextGenerationBenchmarkReport
6+
from guidellm.core.serializable import Serializable
77

88
__all__ = [
99
"GuidanceReport",

src/guidellm/core/serializable.py

Lines changed: 26 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,18 @@
1-
from typing import Any, Optional
2-
31
import os
2+
from typing import Any, Literal, Tuple, Union
3+
44
import yaml
55
from loguru import logger
66
from pydantic import BaseModel, ConfigDict
7-
from enum import Enum
87

9-
from guidellm.utils import is_file_name
8+
from guidellm.utils import is_directory_name, is_file_name
109

10+
__all__ = ["Serializable", "_Extension"]
1111

12-
__all__ = ["Serializable", "SerializableFileType"]
1312

13+
_Extension = Union[Literal["yaml"], Literal["json"]]
1414

15-
class SerializableFileType(Enum):
16-
"""
17-
Enum class for file types supported by Serializable.
18-
"""
19-
20-
YAML = "yaml"
21-
JSON = "json"
15+
AVAILABLE_FILE_EXTENSIONS: Tuple[_Extension, ...] = ("yaml", "json")
2216

2317

2418
class Serializable(BaseModel):
@@ -90,7 +84,7 @@ def from_json(cls, data: str):
9084

9185
return obj
9286

93-
def save_file(self, path: str, type_: Optional[SerializableFileType] = None) -> str:
87+
def save_file(self, path: str, extension: _Extension = "yaml") -> str:
9488
"""
9589
Save the model to a file in either YAML or JSON format.
9690
@@ -103,44 +97,28 @@ def save_file(self, path: str, type_: Optional[SerializableFileType] = None) ->
10397
it will save in YAML format.
10498
:return: The path to the saved file.
10599
"""
106-
logger.debug("Saving to file... {} with format: {}", path, type_)
107-
108-
if not is_file_name(path):
109-
file_name = f"{self.__class__.__name__.lower()}"
110-
if type_:
111-
file_name += f".{type_.value.lower()}"
112-
else:
113-
file_name += ".yaml"
114-
type_ = SerializableFileType.YAML
115-
path = os.path.join(path, file_name)
116100

117-
if not type_:
118-
extension = path.split(".")[-1].upper()
119-
120-
if extension not in SerializableFileType.__members__:
101+
if is_file_name(path):
102+
requested_extension = path.split(".")[-1].lower()
103+
if requested_extension not in AVAILABLE_FILE_EXTENSIONS:
121104
raise ValueError(
122-
f"Unsupported file extension: {extension}. "
123-
f"Expected one of {', '.join(SerializableFileType.__members__)}) "
124-
f"for {path}"
105+
f"Unsupported file extension: .{extension}. "
106+
f"Expected one of {', '.join(AVAILABLE_FILE_EXTENSIONS)})."
125107
)
126108

127-
type_ = SerializableFileType[extension]
128-
129-
if type_.name not in SerializableFileType.__members__:
130-
raise ValueError(
131-
f"Unsupported file format: {type_} "
132-
f"(expected 'yaml' or 'json') for {path}"
133-
)
134-
135-
os.makedirs(os.path.dirname(path), exist_ok=True)
109+
elif is_directory_name(path):
110+
file_name = f"{self.__class__.__name__.lower()}.{extension}"
111+
path = os.path.join(path, file_name)
112+
else:
113+
raise ValueError("Output path must be a either directory or file path")
136114

137115
with open(path, "w") as file:
138-
if type_ == SerializableFileType.YAML:
116+
if extension == "yaml":
139117
file.write(self.to_yaml())
140-
elif type_ == SerializableFileType.JSON:
118+
elif extension == "json":
141119
file.write(self.to_json())
142120
else:
143-
raise ValueError(f"Unsupported file format: {type_}")
121+
raise ValueError(f"Unsupported file format: {extension}")
144122

145123
logger.info("Successfully saved {} to {}", self.__class__.__name__, path)
146124

@@ -161,25 +139,23 @@ def load_file(cls, path: str):
161139
elif not os.path.isfile(path):
162140
raise ValueError(f"Path is not a file: {path}")
163141

164-
extension = path.split(".")[-1].upper()
142+
extension = path.split(".")[-1].lower()
165143

166-
if extension not in SerializableFileType.__members__:
144+
if extension not in AVAILABLE_FILE_EXTENSIONS:
167145
raise ValueError(
168146
f"Unsupported file extension: {extension}. "
169-
f"Expected one of {', '.join(SerializableFileType.__members__)}) "
147+
f"Expected one of {AVAILABLE_FILE_EXTENSIONS}) "
170148
f"for {path}"
171149
)
172150

173-
type_ = SerializableFileType[extension]
174-
175151
with open(path, "r") as file:
176152
data = file.read()
177153

178-
if type_ == SerializableFileType.YAML:
154+
if extension == "yaml":
179155
obj = cls.from_yaml(data)
180-
elif type_ == SerializableFileType.JSON:
156+
elif extension == "json":
181157
obj = cls.from_json(data)
182158
else:
183-
raise ValueError(f"Unsupported file format: {type_}")
159+
raise ValueError(f"Unsupported file format: {extension}")
184160

185161
return obj

src/guidellm/executor/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
from .executor import Executor
22
from .profile_generator import (
3+
RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER,
4+
RATE_TYPE_TO_PROFILE_MODE_MAPPER,
35
FixedRateProfileGenerator,
46
Profile,
57
ProfileGenerationMode,
68
ProfileGenerator,
79
SweepProfileGenerator,
8-
rate_type_to_load_gen_mode,
9-
rate_type_to_profile_mode,
1010
)
1111

1212
__all__ = [
13-
"rate_type_to_load_gen_mode",
14-
"rate_type_to_profile_mode",
13+
"RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER",
14+
"RATE_TYPE_TO_PROFILE_MODE_MAPPER",
1515
"Executor",
1616
"ProfileGenerationMode",
1717
"Profile",

src/guidellm/executor/profile_generator.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
"SweepProfileGenerator",
1717
]
1818

19-
rate_type_to_load_gen_mode = {
19+
RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER = {
2020
"synchronous": LoadGenerationMode.SYNCHRONOUS,
2121
"constant": LoadGenerationMode.CONSTANT,
2222
"poisson": LoadGenerationMode.POISSON,
@@ -28,7 +28,7 @@ class ProfileGenerationMode(Enum):
2828
SWEEP = "sweep"
2929

3030

31-
rate_type_to_profile_mode = {
31+
RATE_TYPE_TO_PROFILE_MODE_MAPPER = {
3232
"synchronous": ProfileGenerationMode.FIXED_RATE,
3333
"constant": ProfileGenerationMode.FIXED_RATE,
3434
"poisson": ProfileGenerationMode.FIXED_RATE,
@@ -80,6 +80,7 @@ def __init__(
8080
super().__init__(ProfileGenerationMode.FIXED_RATE)
8181
if load_gen_mode == LoadGenerationMode.SYNCHRONOUS and rates and len(rates) > 0:
8282
raise ValueError("custom rates are not supported in synchronous mode")
83+
8384
self._rates: Optional[List[float]] = rates
8485
self._load_gen_mode = load_gen_mode
8586
self._generated: bool = False

0 commit comments

Comments
 (0)