vllm-project
diff --git a/‎.gitignore
Lines changed: 5 additions & 0 deletions b/‎.gitignore
Lines changed: 5 additions & 0 deletions
diff --git a/‎DEVELOPING.md
Lines changed: 31 additions & 0 deletions b/‎DEVELOPING.md
Lines changed: 31 additions & 0 deletions
diff --git a/‎src/config/__init__.py
Lines changed: 5 additions & 2 deletions b/‎src/config/__init__.py
Lines changed: 5 additions & 2 deletions
diff --git a/‎src/guidellm/__init__.py
Lines changed: 1 addition & 3 deletions b/‎src/guidellm/__init__.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎src/guidellm/backend/openai.py
Lines changed: 32 additions & 25 deletions b/‎src/guidellm/backend/openai.py
Lines changed: 32 additions & 25 deletions
diff --git a/‎src/guidellm/core/__init__.py
Lines changed: 1 addition & 2 deletions b/‎src/guidellm/core/__init__.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/guidellm/core/report.py
Lines changed: 1 addition & 1 deletion b/‎src/guidellm/core/report.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/guidellm/core/serializable.py
Lines changed: 26 additions & 50 deletions b/‎src/guidellm/core/serializable.py
Lines changed: 26 additions & 50 deletions
diff --git a/‎src/guidellm/executor/__init__.py
Lines changed: 4 additions & 4 deletions b/‎src/guidellm/executor/__init__.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/guidellm/executor/profile_generator.py
Lines changed: 3 additions & 2 deletions b/‎src/guidellm/executor/profile_generator.py
Lines changed: 3 additions & 2 deletions
@@ -164,3 +164,8 @@ cython_debug/
 
 # MacOS files
 .DS_Store
+
+
+# Project specific files
+*.json
+*.yaml
@@ -228,6 +228,37 @@ The project is fully configurable with environment variables. With that configur
 | `GUIDELLM__OPENAI__BASE_URL`   | `http://localhost:8080` | The address to the **OpenAI-compatible** server.<br><br>OpenAI live base url is `https://api.openai.com/v1`                                             |
 | `GUIDELLM__OPENAI__API_KEY`    | `invalid`               | Corresponds to the **OpenAI-compatible** server API key.<br><br>If you look for the live key - check [this link](https://platform.openai.com/api-keys). |
 
+<br>
+
+## Project configuration
+
+The project configuartion is powered by _[`🔗 pydantic-settings`](https://docs.pydantic.dev/latest/concepts/pydantic_settings/)_
+
+The project configuration entrypoint is represented by lazy-loaded `settigns` singleton object ( `src/config/__init__` )
+
+The project is fully configurable with environment variables. All the default values and
+
+```py
+class NestedIntoLogging(BaseModel):
+    nested: str = "default value"
+
+class LoggingSettings(BaseModel):
+    # ...
+    disabled: bool = False
+
+
+class Settings(BaseSettings):
+    """The entrypoint to settings."""
+
+    # ...
+    logging: LoggingSettings = LoggingSettings()
+
+
+settings = Settings()
+```
+
+With that configuration set you can load parameters to `LoggingSettings()` by using environment variables. Just run `export GUIDELLM__LOGGING__DISABLED=true` or `export GUIDELLM__LOGGING__NESTED=another_value` respectfully. The nesting delimiter is `__`
+
 ## Contact and Support
 
 If you need help or have any questions, please open an issue on GitHub or contact us at support@neuralmagic.com.
@@ -20,9 +20,12 @@ class OpenAISettings(BaseModel):
     api_key: str = "invalid"
 
     # OpenAI-compatible server URL
-    # NOTE: The default value is default address of llama.cpp web server
+    # NOTE: The default value is default address of llama.cpp http server
     base_url: str = "http://localhost:8080"
 
+    # The max value of generated tokens
+    max_gen_tokens: int = 4096
+
 
 class Settings(BaseSettings):
     """
@@ -39,7 +42,7 @@ class Settings(BaseSettings):
     """
 
     model_config = SettingsConfigDict(
-        env_prefix="GUIDELLM",
+        env_prefix="GUIDELLM__",
         env_nested_delimiter="__",
         env_file=".env",
         extra="ignore",
 
@@ -3,6 +3,4 @@
 evaluating and benchmarking large language models (LLMs).
 """
 
-from .logger import configure_logger, logger
-
-__all__ = ["logger", "configure_logger"]
+from .logger import configure_logger, logger  # noqa: F401
@@ -1,6 +1,6 @@
-import functools
 from typing import Any, Dict, Generator, List, Optional
 
+import openai
 from loguru import logger
 from openai import OpenAI, Stream
 from openai.types import Completion
@@ -37,9 +37,11 @@ class OpenAIBackend(Backend):
     def __init__(
         self,
         openai_api_key: Optional[str] = None,
-        internal_callback_url: Optional[str] = None,
+        target: Optional[str] = None,
+        host: Optional[str] = None,
+        port: Optional[int] = None,
         model: Optional[str] = None,
-        **request_args: Any,
+        **request_args,
     ):
         """
         Initialize an OpenAI Client
@@ -54,19 +56,22 @@ def __init__(
                 "must be specify for the OpenAI backend"
             )
 
-        if not (_base_url := (internal_callback_url or settings.openai.base_url)):
+        if target is not None:
+            base_url = target
+        elif host and port:
+            base_url = f"{host}:{port}"
+        elif settings.openai.base_url is not None:
+            base_url = settings.openai.base_url
+        else:
             raise ValueError(
                 "`GUIDELLM__OPENAI__BASE_URL` environment variable "
-                "or --openai-base-url CLI parameter "
-                "must be specify for the OpenAI backend"
+                "or --target CLI parameter must be specify for the OpenAI backend."
             )
-        self.openai_client = OpenAI(api_key=_api_key, base_url=_base_url)
+
+        self.openai_client = OpenAI(api_key=_api_key, base_url=base_url)
         self.model = model or self.default_model
 
-        logger.info(
-            f"Initialized OpenAIBackend with callback url: {internal_callback_url} "
-            f"and model: {self.model}"
-        )
+        logger.info(f"OpenAI {self.model} Backend listening on {target}")
 
     def make_request(
         self, request: TextGenerationRequest
@@ -85,8 +90,11 @@ def make_request(
         # How many completions to generate for each prompt
         request_args: Dict = {"n": 1}
 
-        if (num_gen_tokens := request.params.get("generated_tokens", None)) is not None:
-            request_args.update(max_tokens=num_gen_tokens, stop=None)
+        num_gen_tokens: int = (
+            request.params.get("generated_tokens", None)
+            or settings.openai.max_gen_tokens
+        )
+        request_args.update({"max_tokens": num_gen_tokens, "stop": None})
 
         if self.request_args:
             request_args.update(self.request_args)
@@ -110,11 +118,7 @@ def make_request(
                     prompt_token_count=(
                         request.prompt_token_count or self._token_count(request.prompt)
                     ),
-                    output_token_count=(
-                        num_gen_tokens
-                        if num_gen_tokens
-                        else self._token_count(chunk_content)
-                    ),
+                    output_token_count=(self._token_count(chunk_content)),
                 )
             else:
                 logger.debug("Received token from OpenAI backend")
@@ -128,15 +132,18 @@ def available_models(self) -> List[str]:
         :rtype: List[str]
         """
 
-        models: List[str] = [
-            model.id for model in self.openai_client.models.list().data
-        ]
-        logger.info(f"Available models: {models}")
-
-        return models
+        try:
+            models: List[str] = [
+                model.id for model in self.openai_client.models.list().data
+            ]
+        except openai.NotFoundError as error:
+            logger.error("No available models for OpenAI Backend")
+            raise error
+        else:
+            logger.info(f"Available models: {models}")
+            return models
 
     @property
-    @functools.lru_cache(maxsize=1)
     def default_model(self) -> str:
         """
         Get the default model for the backend.
 
@@ -8,7 +8,7 @@
     TextGenerationError,
     TextGenerationResult,
 )
-from .serializable import Serializable, SerializableFileType
+from .serializable import Serializable
 
 __all__ = [
     "Distribution",
@@ -19,6 +19,5 @@
     "TextGenerationBenchmarkReport",
     "RequestConcurrencyMeasurement",
     "Serializable",
-    "SerializableFileType",
     "GuidanceReport",
 ]
@@ -2,8 +2,8 @@
 
 from pydantic import Field
 
-from guidellm.core.serializable import Serializable
 from guidellm.core.result import TextGenerationBenchmarkReport
+from guidellm.core.serializable import Serializable
 
 __all__ = [
     "GuidanceReport",
 
@@ -1,24 +1,18 @@
-from typing import Any, Optional
-
 import os
+from typing import Any, Literal, Tuple, Union
+
 import yaml
 from loguru import logger
 from pydantic import BaseModel, ConfigDict
-from enum import Enum
 
-from guidellm.utils import is_file_name
+from guidellm.utils import is_directory_name, is_file_name
 
+__all__ = ["Serializable", "_Extension"]
 
-__all__ = ["Serializable", "SerializableFileType"]
 
+_Extension = Union[Literal["yaml"], Literal["json"]]
 
-class SerializableFileType(Enum):
-    """
-    Enum class for file types supported by Serializable.
-    """
-
-    YAML = "yaml"
-    JSON = "json"
+AVAILABLE_FILE_EXTENSIONS: Tuple[_Extension, ...] = ("yaml", "json")
 
 
 class Serializable(BaseModel):
@@ -90,7 +84,7 @@ def from_json(cls, data: str):
 
         return obj
 
-    def save_file(self, path: str, type_: Optional[SerializableFileType] = None) -> str:
+    def save_file(self, path: str, extension: _Extension = "yaml") -> str:
         """
         Save the model to a file in either YAML or JSON format.
 
@@ -103,44 +97,28 @@ def save_file(self, path: str, type_: Optional[SerializableFileType] = None) ->
             it will save in YAML format.
         :return: The path to the saved file.
         """
-        logger.debug("Saving to file... {} with format: {}", path, type_)
-
-        if not is_file_name(path):
-            file_name = f"{self.__class__.__name__.lower()}"
-            if type_:
-                file_name += f".{type_.value.lower()}"
-            else:
-                file_name += ".yaml"
-                type_ = SerializableFileType.YAML
-            path = os.path.join(path, file_name)
 
-        if not type_:
-            extension = path.split(".")[-1].upper()
-
-            if extension not in SerializableFileType.__members__:
+        if is_file_name(path):
+            requested_extension = path.split(".")[-1].lower()
+            if requested_extension not in AVAILABLE_FILE_EXTENSIONS:
                 raise ValueError(
-                    f"Unsupported file extension: {extension}. "
-                    f"Expected one of {', '.join(SerializableFileType.__members__)}) "
-                    f"for {path}"
+                    f"Unsupported file extension: .{extension}. "
+                    f"Expected one of {', '.join(AVAILABLE_FILE_EXTENSIONS)})."
                 )
 
-            type_ = SerializableFileType[extension]
-
-        if type_.name not in SerializableFileType.__members__:
-            raise ValueError(
-                f"Unsupported file format: {type_} "
-                f"(expected 'yaml' or 'json') for {path}"
-            )
-
-        os.makedirs(os.path.dirname(path), exist_ok=True)
+        elif is_directory_name(path):
+            file_name = f"{self.__class__.__name__.lower()}.{extension}"
+            path = os.path.join(path, file_name)
+        else:
+            raise ValueError("Output path must be a either directory or file path")
 
         with open(path, "w") as file:
-            if type_ == SerializableFileType.YAML:
+            if extension == "yaml":
                 file.write(self.to_yaml())
-            elif type_ == SerializableFileType.JSON:
+            elif extension == "json":
                 file.write(self.to_json())
             else:
-                raise ValueError(f"Unsupported file format: {type_}")
+                raise ValueError(f"Unsupported file format: {extension}")
 
         logger.info("Successfully saved {} to {}", self.__class__.__name__, path)
 
@@ -161,25 +139,23 @@ def load_file(cls, path: str):
         elif not os.path.isfile(path):
             raise ValueError(f"Path is not a file: {path}")
 
-        extension = path.split(".")[-1].upper()
+        extension = path.split(".")[-1].lower()
 
-        if extension not in SerializableFileType.__members__:
+        if extension not in AVAILABLE_FILE_EXTENSIONS:
             raise ValueError(
                 f"Unsupported file extension: {extension}. "
-                f"Expected one of {', '.join(SerializableFileType.__members__)}) "
+                f"Expected one of {AVAILABLE_FILE_EXTENSIONS}) "
                 f"for {path}"
             )
 
-        type_ = SerializableFileType[extension]
-
         with open(path, "r") as file:
             data = file.read()
 
-            if type_ == SerializableFileType.YAML:
+            if extension == "yaml":
                 obj = cls.from_yaml(data)
-            elif type_ == SerializableFileType.JSON:
+            elif extension == "json":
                 obj = cls.from_json(data)
             else:
-                raise ValueError(f"Unsupported file format: {type_}")
+                raise ValueError(f"Unsupported file format: {extension}")
 
         return obj
@@ -1,17 +1,17 @@
 from .executor import Executor
 from .profile_generator import (
+    RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER,
+    RATE_TYPE_TO_PROFILE_MODE_MAPPER,
     FixedRateProfileGenerator,
     Profile,
     ProfileGenerationMode,
     ProfileGenerator,
     SweepProfileGenerator,
-    rate_type_to_load_gen_mode,
-    rate_type_to_profile_mode,
 )
 
 __all__ = [
-    "rate_type_to_load_gen_mode",
-    "rate_type_to_profile_mode",
+    "RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER",
+    "RATE_TYPE_TO_PROFILE_MODE_MAPPER",
     "Executor",
     "ProfileGenerationMode",
     "Profile",
 
@@ -16,7 +16,7 @@
     "SweepProfileGenerator",
 ]
 
-rate_type_to_load_gen_mode = {
+RATE_TYPE_TO_LOAD_GEN_MODE_MAPPER = {
     "synchronous": LoadGenerationMode.SYNCHRONOUS,
     "constant": LoadGenerationMode.CONSTANT,
     "poisson": LoadGenerationMode.POISSON,
@@ -28,7 +28,7 @@ class ProfileGenerationMode(Enum):
     SWEEP = "sweep"
 
 
-rate_type_to_profile_mode = {
+RATE_TYPE_TO_PROFILE_MODE_MAPPER = {
     "synchronous": ProfileGenerationMode.FIXED_RATE,
     "constant": ProfileGenerationMode.FIXED_RATE,
     "poisson": ProfileGenerationMode.FIXED_RATE,
@@ -80,6 +80,7 @@ def __init__(
         super().__init__(ProfileGenerationMode.FIXED_RATE)
         if load_gen_mode == LoadGenerationMode.SYNCHRONOUS and rates and len(rates) > 0:
             raise ValueError("custom rates are not supported in synchronous mode")
+
         self._rates: Optional[List[float]] = rates
         self._load_gen_mode = load_gen_mode
         self._generated: bool = False
Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`TextGenerationError,`
`9`	`9`	`TextGenerationResult,`
`10`	`10`	`)`
`11`		`-from .serializable import Serializable, SerializableFileType`
	`11`	`+from .serializable import Serializable`
`12`	`12`
`13`	`13`	`__all__ = [`
`14`	`14`	`"Distribution",`
`@@ -19,6 +19,5 @@`
`19`	`19`	`"TextGenerationBenchmarkReport",`
`20`	`20`	`"RequestConcurrencyMeasurement",`
`21`	`21`	`"Serializable",`
`22`		`- "SerializableFileType",`
`23`	`22`	`"GuidanceReport",`
`24`	`23`	`]`