Respect GUIDELLM__PREFERRED_ROUTE during backend validation (#223)

rymc · web-flow · commit 4d463688b1fa · 2025-07-10T08:45:38.000-04:00
`Backend.validate()` always issued a smoke-test call to the legacy
/v1/completions endpoint, even when the caller specified
`GUIDELLM__PREFERRED_ROUTE=chat_completions`. This breaks validation
against deployments that expose only the chat-completions route.

The PR makes backend validation honor the `GUIDELLM__PREFERRED_ROUTE`
setting. Instead of always using the /v1/completions endpoint, it now
chooses between text_completions and chat_completions based on the
configured preference.
diff --git a/src/guidellm/backend/backend.py b/src/guidellm/backend/backend.py
@@ -7,6 +7,7 @@
 from PIL import Image
 
 from guidellm.backend.response import ResponseSummary, StreamingTextResponse
+from guidellm.config import settings
 
 __all__ = [
     "Backend",
@@ -129,10 +130,20 @@ async def validate(self):
         if not models:
             raise ValueError("No models available for the backend")
 
-        async for _ in self.text_completions(
-            prompt="Test connection", output_token_count=1
-        ):  # type: ignore[attr-defined]
-            pass
+        # Use the preferred route defined in the global settings when performing the
+        # validation request. This avoids calling an unavailable endpoint (ie
+        # /v1/completions) when the deployment only supports the chat completions
+        # endpoint.
+        if settings.preferred_route == "chat_completions":
+            async for _ in self.chat_completions(  # type: ignore[attr-defined]
+                content="Test connection", output_token_count=1
+            ):
+                pass
+        else:
+            async for _ in self.text_completions(  # type: ignore[attr-defined]
+                prompt="Test connection", output_token_count=1
+            ):
+                pass
 
         await self.reset()