vllm-project · parfeniukink · Jul 2, 2024 · Jul 10, 2024 · Jul 10, 2024 · Jul 10, 2024
diff --git a/.env.example b/.env.example
@@ -0,0 +1,8 @@
+## Docker configurations
+
+# You can hardcode the platform to build the vLLM locally since it is supported only
+# for the x86 CPU architecture. ARM CPU architecture may cause to some issues without that.
+# BUILDPLATFORM=linux/x86_64
+
+# This environment variable defines which port will be available locally
+DOCKER_VLLM_PORT_EXPOSE=8000
diff --git a/.gitignore b/.gitignore
@@ -160,3 +160,6 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+# MacOS files
+.DS_Store
diff --git a/Makefile b/Makefile
@@ -15,8 +15,9 @@ style:
 	isort src tests
 	flake8 src tests --max-line-length 88
 
-# test:
-#     pytest tests
+
+test:
+	python -m pytest --cache-clear -vvv -x -s ./tests
 
 build:
 	python setup.py sdist bdist_wheel
@@ -31,4 +32,4 @@ clean:
 	rm -rf .mypy_cache
 	rm -rf .pytest_cache
 
-.PHONY: install install-dev quality style test test-unit test-integration test-e2e test-smoke test-sanity test-regression build clean
+.PHONY: install install-dev quality style test build clean
diff --git a/pyproject.toml b/pyproject.toml
@@ -20,6 +20,9 @@ lint.select = ["E", "F", "W"]
 max-line-length = 88
 
 [tool.pytest.ini_options]
+addopts = '-s -vvv --cache-clear'
+asyncio_mode = 'auto'
+python_classes = "DisableTestClasses"
 markers = [
     "smoke: quick tests to check basic functionality",
     "sanity: detailed tests to ensure major functions work correctly",

diff --git a/setup.py b/setup.py
@@ -1,46 +1,48 @@
-from setuptools import setup, find_packages
 from typing import Tuple
 
+from setuptools import find_packages, setup
+
 
 def _setup_long_description() -> Tuple[str, str]:
     return open("README.md", "r", encoding="utf-8").read(), "text/markdown"
 
 
 setup(
-    name='guidellm',
-    version='0.1.0',
-    author='Neuralmagic, Inc.',
-    description='Guidance platform for deploying and managing large language models.',
+    name="guidellm",
+    version="0.1.0",
+    author="Neuralmagic, Inc.",
+    description="Guidance platform for deploying and managing large language models.",
     long_description=_setup_long_description()[0],
     long_description_content_type=_setup_long_description()[1],
     license="Apache",
     url="https://github.com/neuralmagic/guidellm",
-    packages=find_packages(where='src'),
-    package_dir={'': 'src'},
+    packages=find_packages(where="src"),
+    package_dir={"": "src"},
     include_package_data=True,
     install_requires=[
-        'datasets',
-        'loguru',
-        'numpy',
-        'openai',
-        'requests',
-        'transformers',
+        "datasets",
+        "loguru",
+        "numpy",
+        "openai",
+        "requests",
+        "transformers",
     ],
     extras_require={
-        'dev': [
-            'pytest',
-            'sphinx',
-            'ruff',
-            'mypy',
-            'black',
-            'isort',
-            'flake8',
-            'pre-commit',
+        "dev": [
+            "flake8",
+            "isort",
+            "mypy",
+            "polyfactory",
+            "pre-commit",
+            "pytest",
+            "pytest-mock",
+            "ruff",
+            "sphinx",
         ],
     },
     entry_points={
-        'console_scripts': [
-            'guidellm=guidellm.main:main',
+        "console_scripts": [
+            "guidellm=guidellm.main:main",
         ],
     },
     python_requires=">=3.8.0",

diff --git a/src/guidellm/__init__.py → src/config/__init__.py b/src/guidellm/__init__.py → src/config/__init__.py
diff --git a/src/domain/__init__.py b/src/domain/__init__.py
diff --git a/src/guidellm/backend/__init__.py → src/domain/backend/__init__.py b/src/guidellm/backend/__init__.py → src/domain/backend/__init__.py
@@ -1,9 +1,9 @@
-from .base import Backend, BackendTypes, GenerativeResponse
+from .base import Backend, BackendEngine, GenerativeResponse
 from .openai import OpenAIBackend
 
 __all__ = [
     "Backend",
-    "BackendTypes",
+    "BackendEngine",
     "GenerativeResponse",
     "OpenAIBackend",
 ]
diff --git a/src/guidellm/backend/base.py → src/domain/backend/base.py b/src/guidellm/backend/base.py → src/domain/backend/base.py
@@ -1,18 +1,23 @@
-import uuid
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from enum import Enum
 from typing import Iterator, List, Optional, Type, Union
 
 from loguru import logger
 
-from guidellm.core.request import TextGenerationRequest
-from guidellm.core.result import TextGenerationResult
+from domain.core import TextGenerationRequest, TextGenerationResult
 
-__all__ = ["Backend", "BackendTypes", "GenerativeResponse"]
+__all__ = ["Backend", "BackendEngine", "GenerativeResponse"]
 
 
-class BackendTypes(Enum):
+class BackendEngine(str, Enum):
+    """
+    Determines the Engine of the LLM Backend.
+    All the implemented backends in the project have the engine.
+
+    NOTE: the `TEST` engine has to be used only for testing purposes.
+    """
+
     TEST = "test"
     OPENAI_SERVER = "openai_server"
 
@@ -33,43 +38,46 @@ class GenerativeResponse:
 
 class Backend(ABC):
     """
-    An abstract base class for generative AI backends.
+    An abstract base class with template methods for generative AI backends.
     """
 
     _registry = {}
 
-    @staticmethod
-    def register_backend(backend_type: BackendTypes):
+    @classmethod
+    def register(cls, backend_type: BackendEngine):
         """
         A decorator to register a backend class in the backend registry.
 
         :param backend_type: The type of backend to register.
-        :type backend_type: BackendTypes
+        :type backend_type: BackendType
         """
 
         def inner_wrapper(wrapped_class: Type["Backend"]):
-            Backend._registry[backend_type] = wrapped_class
+            cls._registry[backend_type] = wrapped_class
             return wrapped_class
 
         return inner_wrapper
 
-    @staticmethod
-    def create_backend(backend_type: Union[str, BackendTypes], **kwargs) -> "Backend":
+    @classmethod
+    def create(cls, backend_type: Union[str, BackendEngine], **kwargs) -> "Backend":
         """
         Factory method to create a backend based on the backend type.
 
         :param backend_type: The type of backend to create.
-        :type backend_type: BackendTypes
+        :type backend_type: BackendType
         :param kwargs: Additional arguments for backend initialization.
         :type kwargs: dict
         :return: An instance of a subclass of Backend.
         :rtype: Backend
         """
+
         logger.info(f"Creating backend of type {backend_type}")
-        if backend_type not in Backend._registry:
+
+        if backend_type not in cls._registry:
             logger.error(f"Unsupported backend type: {backend_type}")
             raise ValueError(f"Unsupported backend type: {backend_type}")
-        return Backend._registry[backend_type](**kwargs)
+
+        return cls._registry[backend_type](**kwargs)
 
     def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
         """
@@ -80,23 +88,25 @@ def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
         :return: The populated result result.
         :rtype: TextGenerationResult
         """
+
         logger.info(f"Submitting request with prompt: {request.prompt}")
-        result_id = str(uuid.uuid4())
-        result = TextGenerationResult(result_id)
+        result = TextGenerationResult(request=request)
         result.start(request.prompt)
 
         for response in self.make_request(request):
             if response.type_ == "token_iter" and response.add_token:
                 result.output_token(response.add_token)
             elif response.type_ == "final":
                 result.end(
-                    response.output,
+                    # NOTE: clarify if the `or ""` makesa any sense
+                    response.output or "",
                     response.prompt_token_count,
                     response.output_token_count,
                 )
                 break
 
         logger.info(f"Request completed with output: {result.output}")
+
         return result
 
     @abstractmethod
@@ -121,8 +131,10 @@ def available_models(self) -> List[str]:
         :return: A list of available models.
         :rtype: List[str]
         """
-        raise NotImplementedError()
 
+        pass
+
+    @property
     @abstractmethod
     def default_model(self) -> str:
         """
@@ -131,7 +143,8 @@ def default_model(self) -> str:
         :return: The default model.
         :rtype: str
         """
-        raise NotImplementedError()
+
+        pass
 
     @abstractmethod
     def model_tokenizer(self, model: str) -> Optional[str]:
@@ -143,4 +156,5 @@ def model_tokenizer(self, model: str) -> Optional[str]:
         :return: The tokenizer for the model, or None if it cannot be created.
         :rtype: Optional[str]
         """
-        raise NotImplementedError()
+
+        pass