community-of-python · vrslev · Jan 27, 2025 · Jan 23, 2025
diff --git a/README.md b/README.md
@@ -23,7 +23,11 @@ import asyncio
 import any_llm_client
 
 
-config = any_llm_client.OpenAIConfig(url="http://127.0.0.1:11434/v1/chat/completions", model_name="qwen2.5-coder:1.5b")
+config = any_llm_client.OpenAIConfig(
+    url="http://127.0.0.1:11434/v1/chat/completions", 
+    model_name="qwen2.5-coder:1.5b",
+    request_extra={"best_of": 3}
+)
 
 
 async def main() -> None:
@@ -52,7 +56,11 @@ import asyncio
 import any_llm_client
 
 
-config = any_llm_client.OpenAIConfig(url="http://127.0.0.1:11434/v1/chat/completions", model_name="qwen2.5-coder:1.5b")
+config = any_llm_client.OpenAIConfig(
+    url="http://127.0.0.1:11434/v1/chat/completions", 
+    model_name="qwen2.5-coder:1.5b",
+    request_extra={"best_of": 3}
+)
 
 
 async def main() -> None:
@@ -127,7 +135,8 @@ class Settings(pydantic_settings.BaseSettings):
 os.environ["LLM_MODEL"] = """{
     "api_type": "openai",
     "url": "http://127.0.0.1:11434/v1/chat/completions",
-    "model_name": "qwen2.5-coder:1.5b"
+    "model_name": "qwen2.5-coder:1.5b",
+    "request_extra": {"best_of": 3}
 }"""
 settings = Settings()
 
@@ -146,6 +155,7 @@ config = any_llm_client.OpenAIConfig(
     url=pydantic.HttpUrl("https://api.openai.com/v1/chat/completions"),
     auth_token=os.environ["OPENAI_API_KEY"],
     model_name="gpt-4o-mini",
+    request_extra={"best_of": 3}
 )
 
 async with any_llm_client.OpenAIClient(config, ...) as client:
@@ -191,3 +201,5 @@ async with any_llm_client.get_client(..., request_retry=any_llm_client.RequestRe
 ```python
 await client.request_llm_message("Кек, чо как вообще на нарах?", extra={"best_of": 3})
 ```
+
+The `extra` parameter is united with `request_extra` in OpenAIConfig
diff --git a/any_llm_client/clients/openai.py b/any_llm_client/clients/openai.py
@@ -34,6 +34,7 @@ class OpenAIConfig(LLMConfig):
         url: pydantic.HttpUrl
     auth_token: str | None = pydantic.Field(default_factory=lambda: os.environ.get(OPENAI_AUTH_TOKEN_ENV_NAME))
     model_name: str
+    request_extra: dict[str, typing.Any] = pydantic.Field(default_factory=dict)
     force_user_assistant_message_alternation: bool = False
     "Gemma 2 doesn't support {role: system, text: ...} message, and requires alternated messages"
     api_type: typing.Literal["openai"] = "openai"
@@ -149,7 +150,7 @@ async def request_llm_message(
             model=self.config.model_name,
             messages=self._prepare_messages(messages),
             temperature=temperature,
-            **extra or {},
+            **self.config.request_extra | (extra or {}),
         ).model_dump(mode="json")
         try:
             response: typing.Final = await make_http_request(
@@ -182,7 +183,7 @@ async def stream_llm_message_chunks(
             model=self.config.model_name,
             messages=self._prepare_messages(messages),
             temperature=temperature,
-            **extra or {},
+            **self.config.request_extra | (extra or {}),
         ).model_dump(mode="json")
         try:
             async with make_streaming_http_request(