|
| 1 | +import asyncio |
| 2 | +from abc import ABC, abstractmethod |
| 3 | +from pathlib import Path |
| 4 | +from typing import Any, AsyncGenerator, Dict, List, Literal, Optional, Type, Union |
| 5 | + |
| 6 | +from loguru import logger |
| 7 | +from PIL import Image |
| 8 | + |
| 9 | +from guidellm.backend.response import ResponseSummary, StreamingTextResponse |
| 10 | + |
| 11 | +__all__ = [ |
| 12 | + "Backend", |
| 13 | + "BackendType", |
| 14 | +] |
| 15 | + |
| 16 | + |
| 17 | +BackendType = Literal["openai_http"] |
| 18 | + |
| 19 | + |
| 20 | +class Backend(ABC): |
| 21 | + """ |
| 22 | + Abstract base class for generative AI backends. |
| 23 | +
|
| 24 | + This class provides a common interface for creating and interacting with different |
| 25 | + generative AI backends. Subclasses should implement the abstract methods to |
| 26 | + define specific backend behavior. |
| 27 | +
|
| 28 | + :cvar _registry: A registration dictionary that maps BackendType to backend classes. |
| 29 | + :param type_: The type of the backend. |
| 30 | + """ |
| 31 | + |
| 32 | + _registry: Dict[BackendType, "Type[Backend]"] = {} |
| 33 | + |
| 34 | + @classmethod |
| 35 | + def register(cls, backend_type: BackendType): |
| 36 | + """ |
| 37 | + A decorator to register a backend class in the backend registry. |
| 38 | +
|
| 39 | + :param backend_type: The type of backend to register. |
| 40 | + :type backend_type: BackendType |
| 41 | + :return: The decorated backend class. |
| 42 | + :rtype: Type[Backend] |
| 43 | + """ |
| 44 | + if backend_type in cls._registry: |
| 45 | + raise ValueError(f"Backend type already registered: {backend_type}") |
| 46 | + |
| 47 | + if not issubclass(cls, Backend): |
| 48 | + raise TypeError("Only subclasses of Backend can be registered") |
| 49 | + |
| 50 | + def inner_wrapper(wrapped_class: Type["Backend"]): |
| 51 | + cls._registry[backend_type] = wrapped_class |
| 52 | + logger.info("Registered backend type: {}", backend_type) |
| 53 | + return wrapped_class |
| 54 | + |
| 55 | + return inner_wrapper |
| 56 | + |
| 57 | + @classmethod |
| 58 | + def create(cls, type_: BackendType, **kwargs) -> "Backend": |
| 59 | + """ |
| 60 | + Factory method to create a backend instance based on the backend type. |
| 61 | +
|
| 62 | + :param type_: The type of backend to create. |
| 63 | + :type type_: BackendType |
| 64 | + :param kwargs: Additional arguments for backend initialization. |
| 65 | + :return: An instance of a subclass of Backend. |
| 66 | + :rtype: Backend |
| 67 | + :raises ValueError: If the backend type is not registered. |
| 68 | + """ |
| 69 | + |
| 70 | + logger.info("Creating backend of type {}", type_) |
| 71 | + |
| 72 | + if type_ not in cls._registry: |
| 73 | + err = ValueError(f"Unsupported backend type: {type_}") |
| 74 | + logger.error("{}", err) |
| 75 | + raise err |
| 76 | + |
| 77 | + return Backend._registry[type_](**kwargs) |
| 78 | + |
| 79 | + def __init__(self, type_: BackendType): |
| 80 | + self._type = type_ |
| 81 | + |
| 82 | + @property |
| 83 | + def type_(self) -> BackendType: |
| 84 | + """ |
| 85 | + :return: The type of the backend. |
| 86 | + """ |
| 87 | + return self._type |
| 88 | + |
| 89 | + @property |
| 90 | + @abstractmethod |
| 91 | + def target(self) -> str: |
| 92 | + """ |
| 93 | + :return: The target location for the backend. |
| 94 | + """ |
| 95 | + ... |
| 96 | + |
| 97 | + @property |
| 98 | + @abstractmethod |
| 99 | + def model(self) -> Optional[str]: |
| 100 | + """ |
| 101 | + :return: The model used for the backend requests. |
| 102 | + """ |
| 103 | + ... |
| 104 | + |
| 105 | + def validate(self): |
| 106 | + """ |
| 107 | + Handle final setup and validate the backend is ready for use. |
| 108 | + If not successful, raises the appropriate exception. |
| 109 | + """ |
| 110 | + logger.info("{} validating backend {}", self.__class__.__name__, self.type_) |
| 111 | + self.check_setup() |
| 112 | + models = self.available_models() |
| 113 | + if not models: |
| 114 | + raise ValueError("No models available for the backend") |
| 115 | + |
| 116 | + async def _test_request(): |
| 117 | + async for _ in self.text_completions( |
| 118 | + prompt="Test connection", output_token_count=1 |
| 119 | + ): # type: ignore[attr-defined] |
| 120 | + pass |
| 121 | + |
| 122 | + asyncio.run(_test_request()) |
| 123 | + |
| 124 | + @abstractmethod |
| 125 | + def check_setup(self): |
| 126 | + """ |
| 127 | + Check the setup for the backend. |
| 128 | + If unsuccessful, raises the appropriate exception. |
| 129 | +
|
| 130 | + :raises ValueError: If the setup check fails. |
| 131 | + """ |
| 132 | + ... |
| 133 | + |
| 134 | + @abstractmethod |
| 135 | + def available_models(self) -> List[str]: |
| 136 | + """ |
| 137 | + Get the list of available models for the backend. |
| 138 | +
|
| 139 | + :return: The list of available models. |
| 140 | + :rtype: List[str] |
| 141 | + """ |
| 142 | + ... |
| 143 | + |
| 144 | + @abstractmethod |
| 145 | + async def text_completions( |
| 146 | + self, |
| 147 | + prompt: Union[str, List[str]], |
| 148 | + request_id: Optional[str] = None, |
| 149 | + prompt_token_count: Optional[int] = None, |
| 150 | + output_token_count: Optional[int] = None, |
| 151 | + **kwargs, |
| 152 | + ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]: |
| 153 | + """ |
| 154 | + Generate text only completions for the given prompt. |
| 155 | + Does not support multiple modalities, complicated chat interfaces, |
| 156 | + or chat templates. Specifically, it requests with only the prompt. |
| 157 | +
|
| 158 | + :param prompt: The prompt (or list of prompts) to generate a completion for. |
| 159 | + If a list is supplied, these are concatenated and run through the model |
| 160 | + for a single prompt. |
| 161 | + :param request_id: The unique identifier for the request, if any. |
| 162 | + Added to logging statements and the response for tracking purposes. |
| 163 | + :param prompt_token_count: The number of tokens measured in the prompt, if any. |
| 164 | + Returned in the response stats for later analysis, if applicable. |
| 165 | + :param output_token_count: If supplied, the number of tokens to enforce |
| 166 | + generation of for the output for this request. |
| 167 | + :param kwargs: Additional keyword arguments to pass with the request. |
| 168 | + :return: An async generator that yields a StreamingTextResponse for start, |
| 169 | + a StreamingTextResponse for each received iteration, |
| 170 | + and a ResponseSummary for the final response. |
| 171 | + """ |
| 172 | + ... |
| 173 | + |
| 174 | + @abstractmethod |
| 175 | + async def chat_completions( |
| 176 | + self, |
| 177 | + content: Union[ |
| 178 | + str, |
| 179 | + List[Union[str, Dict[str, Union[str, Dict[str, str]]], Path, Image.Image]], |
| 180 | + Any, |
| 181 | + ], |
| 182 | + request_id: Optional[str] = None, |
| 183 | + prompt_token_count: Optional[int] = None, |
| 184 | + output_token_count: Optional[int] = None, |
| 185 | + raw_content: bool = False, |
| 186 | + **kwargs, |
| 187 | + ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]: |
| 188 | + """ |
| 189 | + Generate chat completions for the given content. |
| 190 | + Supports multiple modalities, complicated chat interfaces, and chat templates. |
| 191 | + Specifically, it requests with the content, which can be any combination of |
| 192 | + text, images, and audio provided the target model supports it, |
| 193 | + and returns the output text. Additionally, any chat templates |
| 194 | + for the model are applied within the backend. |
| 195 | +
|
| 196 | + :param content: The content (or list of content) to generate a completion for. |
| 197 | + This supports any combination of text, images, and audio (model dependent). |
| 198 | + Supported text only request examples: |
| 199 | + content="Sample prompt", content=["Sample prompt", "Second prompt"], |
| 200 | + content=[{"type": "text", "value": "Sample prompt"}. |
| 201 | + Supported text and image request examples: |
| 202 | + content=["Describe the image", PIL.Image.open("image.jpg")], |
| 203 | + content=["Describe the image", Path("image.jpg")], |
| 204 | + content=["Describe the image", {"type": "image_url", |
| 205 | + "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}]. |
| 206 | + Supported text and audio request examples: |
| 207 | + content=["Transcribe the audio", Path("audio.wav")], |
| 208 | + content=["Transcribe the audio", {"type": "input_audio", |
| 209 | + "input_audio": {"data": f"{base64_bytes}", "format": "wav}]. |
| 210 | + Additionally, if raw_content=True then the content is passed directly to the |
| 211 | + backend without any processing. |
| 212 | + :param request_id: The unique identifier for the request, if any. |
| 213 | + Added to logging statements and the response for tracking purposes. |
| 214 | + :param prompt_token_count: The number of tokens measured in the prompt, if any. |
| 215 | + Returned in the response stats for later analysis, if applicable. |
| 216 | + :param output_token_count: If supplied, the number of tokens to enforce |
| 217 | + generation of for the output for this request. |
| 218 | + :param kwargs: Additional keyword arguments to pass with the request. |
| 219 | + :return: An async generator that yields a StreamingTextResponse for start, |
| 220 | + a StreamingTextResponse for each received iteration, |
| 221 | + and a ResponseSummary for the final response. |
| 222 | + """ |
| 223 | + ... |
0 commit comments