BerriAI
diff --git a/‎docs/my-website/docs/tutorials/openai_codex.md
Lines changed: 139 additions & 0 deletions b/‎docs/my-website/docs/tutorials/openai_codex.md
Lines changed: 139 additions & 0 deletions
diff --git a/‎docs/my-website/img/litellm_codex.gif
12 MB b/‎docs/my-website/img/litellm_codex.gif
12 MB
diff --git a/‎docs/my-website/sidebars.js
Lines changed: 1 addition & 0 deletions b/‎docs/my-website/sidebars.js
Lines changed: 1 addition & 0 deletions
diff --git a/‎litellm/proxy/proxy_config.yaml
Lines changed: 11 additions & 4 deletions b/‎litellm/proxy/proxy_config.yaml
Lines changed: 11 additions & 4 deletions
diff --git a/‎litellm/responses/litellm_completion_transformation/handler.py
Lines changed: 115 additions & 0 deletions b/‎litellm/responses/litellm_completion_transformation/handler.py
Lines changed: 115 additions & 0 deletions
diff --git a/‎litellm/responses/litellm_completion_transformation/session_handler.py
Lines changed: 59 additions & 0 deletions b/‎litellm/responses/litellm_completion_transformation/session_handler.py
Lines changed: 59 additions & 0 deletions
@@ -0,0 +1,139 @@
+import Image from '@theme/IdealImage';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Using LiteLLM with OpenAI Codex
+
+This guide walks you through connecting OpenAI Codex to LiteLLM. Using LiteLLM with Codex allows teams to:
+- Access 100+ LLMs through the Codex interface
+- Use powerful models like Gemini through a familiar interface
+- Track spend and usage with LiteLLM's built-in analytics
+- Control model access with virtual keys
+
+<Image img={require('../../img/litellm_codex.gif')} />
+
+## Quickstart
+
+Make sure to set up LiteLLM with the [LiteLLM Getting Started Guide](../proxy/docker_quick_start.md).
+
+## 1. Install OpenAI Codex
+
+Install the OpenAI Codex CLI tool globally using npm:
+
+<Tabs>
+<TabItem value="npm" label="npm">
+
+```bash showLineNumbers
+npm i -g @openai/codex
+```
+
+</TabItem>
+<TabItem value="yarn" label="yarn">
+
+```bash showLineNumbers
+yarn global add @openai/codex
+```
+
+</TabItem>
+</Tabs>
+
+## 2. Start LiteLLM Proxy
+
+<Tabs>
+<TabItem value="docker" label="Docker">
+
+```bash showLineNumbers
+docker run \
+    -v $(pwd)/litellm_config.yaml:/app/config.yaml \
+    -p 4000:4000 \
+    ghcr.io/berriai/litellm:main-latest \
+    --config /app/config.yaml
+```
+
+</TabItem>
+<TabItem value="pip" label="LiteLLM CLI">
+
+```bash showLineNumbers
+litellm --config /path/to/config.yaml
+```
+
+</TabItem>
+</Tabs>
+
+LiteLLM should now be running on [http://localhost:4000](http://localhost:4000)
+
+## 3. Configure LiteLLM for Model Routing
+
+Ensure your LiteLLM Proxy is properly configured to route to your desired models. Create a `litellm_config.yaml` file with the following content:
+
+```yaml showLineNumbers
+model_list:
+  - model_name: o3-mini
+    litellm_params:
+      model: openai/o3-mini
+      api_key: os.environ/OPENAI_API_KEY
+  - model_name: claude-3-7-sonnet-latest
+    litellm_params:
+      model: anthropic/claude-3-7-sonnet-latest
+      api_key: os.environ/ANTHROPIC_API_KEY
+  - model_name: gemini-2.0-flash
+    litellm_params:
+      model: gemini/gemini-2.0-flash
+      api_key: os.environ/GEMINI_API_KEY
+
+litellm_settings:
+  drop_params: true
+```
+
+This configuration enables routing to specific OpenAI, Anthropic, and Gemini models with explicit names.
+
+## 4. Configure Codex to Use LiteLLM Proxy
+
+Set the required environment variables to point Codex to your LiteLLM Proxy:
+
+```bash
+# Point to your LiteLLM Proxy server
+export OPENAI_BASE_URL=http://0.0.0.0:4000 
+
+# Use your LiteLLM API key (if you've set up authentication)
+export OPENAI_API_KEY="sk-1234"
+```
+
+## 5. Run Codex with Gemini
+
+With everything configured, you can now run Codex with Gemini:
+
+```bash showLineNumbers
+codex --model gemini-flash --full-auto
+```
+
+<Image img={require('../../img/litellm_codex.gif')} />
+
+The `--full-auto` flag allows Codex to automatically generate code without additional prompting.
+
+## 6. Advanced Options
+
+### Using Different Models
+
+You can use any model configured in your LiteLLM proxy:
+
+```bash
+# Use Claude models
+codex --model claude-3-7-sonnet-latest
+
+# Use Google AI Studio Gemini models
+codex --model gemini/gemini-2.0-flash
+```
+
+## Troubleshooting
+
+- If you encounter connection issues, ensure your LiteLLM Proxy is running and accessible at the specified URL
+- Verify your LiteLLM API key is valid if you're using authentication
+- Check that your model routing configuration is correct
+- For model-specific errors, ensure the model is properly configured in your LiteLLM setup
+
+## Additional Resources
+
+- [LiteLLM Docker Quick Start Guide](../proxy/docker_quick_start.md)
+- [OpenAI Codex GitHub Repository](https://github.com/openai/codex)
+- [LiteLLM Virtual Keys and Authentication](../proxy/virtual_keys.md)
@@ -443,6 +443,7 @@ const sidebars = {
       label: "Tutorials",
       items: [
         "tutorials/openweb_ui",
+        "tutorials/openai_codex",
         "tutorials/msft_sso",
         "tutorials/prompt_caching",
         "tutorials/tag_management",
 
@@ -1,6 +1,13 @@
 model_list:
-  - model_name: fake-openai-endpoint
+  - model_name: openai/*
     litellm_params:
-      model: openai/fake
-      api_key: fake-key
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+      model: openai/*
+  - model_name: anthropic/*
+    litellm_params:
+      model: anthropic/*
+  - model_name: gemini/*
+    litellm_params:
+      model: gemini/*
+litellm_settings:
+  drop_params: true
+      
@@ -0,0 +1,115 @@
+"""
+Handler for transforming responses api requests to litellm.completion requests
+"""
+
+from typing import Any, Coroutine, Optional, Union
+
+import litellm
+from litellm.responses.litellm_completion_transformation.streaming_iterator import (
+    LiteLLMCompletionStreamingIterator,
+)
+from litellm.responses.litellm_completion_transformation.transformation import (
+    LiteLLMCompletionResponsesConfig,
+)
+from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
+from litellm.types.llms.openai import (
+    ResponseInputParam,
+    ResponsesAPIOptionalRequestParams,
+    ResponsesAPIResponse,
+)
+from litellm.types.utils import ModelResponse
+
+
+class LiteLLMCompletionTransformationHandler:
+
+    def response_api_handler(
+        self,
+        model: str,
+        input: Union[str, ResponseInputParam],
+        responses_api_request: ResponsesAPIOptionalRequestParams,
+        custom_llm_provider: Optional[str] = None,
+        _is_async: bool = False,
+        stream: Optional[bool] = None,
+        **kwargs,
+    ) -> Union[
+        ResponsesAPIResponse,
+        BaseResponsesAPIStreamingIterator,
+        Coroutine[
+            Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]
+        ],
+    ]:
+        litellm_completion_request: dict = (
+            LiteLLMCompletionResponsesConfig.transform_responses_api_request_to_chat_completion_request(
+                model=model,
+                input=input,
+                responses_api_request=responses_api_request,
+                custom_llm_provider=custom_llm_provider,
+                stream=stream,
+                **kwargs,
+            )
+        )
+
+        if _is_async:
+            return self.async_response_api_handler(
+                litellm_completion_request=litellm_completion_request,
+                request_input=input,
+                responses_api_request=responses_api_request,
+                **kwargs,
+            )
+
+        litellm_completion_response: Union[
+            ModelResponse, litellm.CustomStreamWrapper
+        ] = litellm.completion(
+            **litellm_completion_request,
+            **kwargs,
+        )
+
+        if isinstance(litellm_completion_response, ModelResponse):
+            responses_api_response: ResponsesAPIResponse = (
+                LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
+                    chat_completion_response=litellm_completion_response,
+                    request_input=input,
+                    responses_api_request=responses_api_request,
+                )
+            )
+
+            return responses_api_response
+
+        elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
+            return LiteLLMCompletionStreamingIterator(
+                litellm_custom_stream_wrapper=litellm_completion_response,
+                request_input=input,
+                responses_api_request=responses_api_request,
+            )
+
+    async def async_response_api_handler(
+        self,
+        litellm_completion_request: dict,
+        request_input: Union[str, ResponseInputParam],
+        responses_api_request: ResponsesAPIOptionalRequestParams,
+        **kwargs,
+    ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
+        litellm_completion_response: Union[
+            ModelResponse, litellm.CustomStreamWrapper
+        ] = await litellm.acompletion(
+            **litellm_completion_request,
+            **kwargs,
+        )
+
+        if isinstance(litellm_completion_response, ModelResponse):
+            responses_api_response: ResponsesAPIResponse = (
+                LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
+                    chat_completion_response=litellm_completion_response,
+                    request_input=request_input,
+                    responses_api_request=responses_api_request,
+                )
+            )
+
+            return responses_api_response
+
+        elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
+            return LiteLLMCompletionStreamingIterator(
+                litellm_custom_stream_wrapper=litellm_completion_response,
+                request_input=request_input,
+                responses_api_request=responses_api_request,
+            )
@@ -0,0 +1,59 @@
+"""
+Responses API has previous_response_id, which is the id of the previous response.
+
+LiteLLM needs to maintain a cache of the previous response input, output, previous_response_id, and model.
+
+This class handles that cache.
+"""
+
+from typing import List, Optional, Tuple, Union
+
+from typing_extensions import TypedDict
+
+from litellm.caching import InMemoryCache
+from litellm.types.llms.openai import ResponseInputParam, ResponsesAPIResponse
+
+RESPONSES_API_PREVIOUS_RESPONSES_CACHE = InMemoryCache()
+MAX_PREV_SESSION_INPUTS = 50
+
+
+class ResponsesAPISessionElement(TypedDict, total=False):
+    input: Union[str, ResponseInputParam]
+    output: ResponsesAPIResponse
+    response_id: str
+    previous_response_id: Optional[str]
+
+
+class SessionHandler:
+
+    def add_completed_response_to_cache(
+        self, response_id: str, session_element: ResponsesAPISessionElement
+    ):
+        RESPONSES_API_PREVIOUS_RESPONSES_CACHE.set_cache(
+            key=response_id, value=session_element
+        )
+
+    def get_chain_of_previous_input_output_pairs(
+        self, previous_response_id: str
+    ) -> List[Tuple[ResponseInputParam, ResponsesAPIResponse]]:
+        response_api_inputs: List[Tuple[ResponseInputParam, ResponsesAPIResponse]] = []
+        current_previous_response_id = previous_response_id
+
+        count_session_elements = 0
+        while current_previous_response_id:
+            if count_session_elements > MAX_PREV_SESSION_INPUTS:
+                break
+            session_element = RESPONSES_API_PREVIOUS_RESPONSES_CACHE.get_cache(
+                key=current_previous_response_id
+            )
+            if session_element:
+                response_api_inputs.append(
+                    (session_element.get("input"), session_element.get("output"))
+                )
+                current_previous_response_id = session_element.get(
+                    "previous_response_id"
+                )
+            else:
+                break
+            count_session_elements += 1
+        return response_api_inputs