From 87565f4e75ae93e1e401a4b173b5604d03da32e8 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 29 Mar 2025 15:49:53 -0700
Subject: [PATCH 1/9] fix(internal_user_endpoints.py): cleanup unused variables
 on beta endpoint

no team/org split on daily user endpoint
---
 .../internal_user_endpoints.py                | 24 ++-----------------
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/litellm/proxy/management_endpoints/internal_user_endpoints.py b/litellm/proxy/management_endpoints/internal_user_endpoints.py
index 90444013a8f9..8124b7fd20cf 100644
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@@ -1370,22 +1370,6 @@ async def get_user_daily_activity(
         default=None,
         description="End date in YYYY-MM-DD format",
     ),
-    group_by: List[GroupByDimension] = fastapi.Query(
-        default=[GroupByDimension.DATE],
-        description="Dimensions to group by. Can combine multiple (e.g. date,team)",
-    ),
-    view_by: Literal["team", "organization", "user"] = fastapi.Query(
-        default="user",
-        description="View spend at team/org/user level",
-    ),
-    team_id: Optional[str] = fastapi.Query(
-        default=None,
-        description="Filter by specific team",
-    ),
-    org_id: Optional[str] = fastapi.Query(
-        default=None,
-        description="Filter by specific organization",
-    ),
     model: Optional[str] = fastapi.Query(
         default=None,
         description="Filter by specific model",
@@ -1408,13 +1392,13 @@ async def get_user_daily_activity(
     Meant to optimize querying spend data for analytics for a user.
 
     Returns:
-    (by date/team/org/user/model/api_key/model_group/provider)
+    (by date)
     - spend
     - prompt_tokens
     - completion_tokens
     - total_tokens
     - api_requests
-    - breakdown by team, organization, user, model, api_key, model_group, provider
+    - breakdown by model, api_key, provider
     """
     from litellm.proxy.proxy_server import prisma_client
 
@@ -1439,10 +1423,6 @@ async def get_user_daily_activity(
             }
         }
 
-        if team_id:
-            where_conditions["team_id"] = team_id
-        if org_id:
-            where_conditions["organization_id"] = org_id
         if model:
             where_conditions["model"] = model
         if api_key:

From 4f94987eba760e92fdff253fc5ca8b4c2d16d7ab Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 29 Mar 2025 16:11:13 -0700
Subject: [PATCH 2/9] build(model_prices_and_context_window.json):
 gemini-2.0-flash supports audio input

---
 litellm/model_prices_and_context_window_backup.json | 2 ++
 model_prices_and_context_window.json                | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 4ce592d7b07f..64525d660bef 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -4696,6 +4696,8 @@
         "supports_vision": true,
         "supports_response_schema": true,
         "supports_audio_output": true,
+        "supports_audio_input": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
         "supports_tool_choice": true,
         "source": "https://ai.google.dev/pricing#2_0flash"
     },
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 4ce592d7b07f..64525d660bef 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -4696,6 +4696,8 @@
         "supports_vision": true,
         "supports_response_schema": true,
         "supports_audio_output": true,
+        "supports_audio_input": true,
+        "supported_modalities": ["text", "image", "audio", "video"],
         "supports_tool_choice": true,
         "source": "https://ai.google.dev/pricing#2_0flash"
     },

From 4d40eba5888c46575804a15d3daf4ee0322c4618 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 29 Mar 2025 16:31:41 -0700
Subject: [PATCH 3/9] feat(gemini/transformation.py): support passing audio
 input to gemini

---
 .../llms/vertex_ai/gemini/transformation.py   | 13 +++++
 tests/llm_translation/base_llm_unit_tests.py  | 47 +++++++++++++++++++
 tests/llm_translation/test_gemini.py          |  2 +-
 tests/llm_translation/test_gpt4o_audio.py     |  7 ++-
 4 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py
index 96b33ee1875e..b08033baf374 100644
--- a/litellm/llms/vertex_ai/gemini/transformation.py
+++ b/litellm/llms/vertex_ai/gemini/transformation.py
@@ -27,6 +27,7 @@
 from litellm.types.llms.openai import (
     AllMessageValues,
     ChatCompletionAssistantMessage,
+    ChatCompletionAudioObject,
     ChatCompletionImageObject,
     ChatCompletionTextObject,
 )
@@ -174,6 +175,18 @@ def _gemini_convert_messages_with_history(  # noqa: PLR0915
                                 image_url=image_url, format=format
                             )
                             _parts.append(_part)
+                        elif element["type"] == "input_audio":
+                            audio_element = cast(ChatCompletionAudioObject, element)
+                            if audio_element["input_audio"].get("data") is not None:
+                                _part = PartType(
+                                    inline_data=BlobType(
+                                        data=audio_element["input_audio"]["data"],
+                                        mime_type="audio/{}".format(
+                                            audio_element["input_audio"]["format"]
+                                        ),
+                                    )
+                                )
+                                _parts.append(_part)
                     user_content.extend(_parts)
                 elif (
                     _message_content is not None
diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py
index eb5851219781..2d9da94caf12 100644
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@@ -7,6 +7,7 @@
 import os
 import uuid
 import time
+import base64
 
 sys.path.insert(
     0, os.path.abspath("../..")
@@ -890,6 +891,50 @@ async def test_completion_cost(self):
         assert cost > 0
 
 
+    def test_supports_audio_input(self):
+        from litellm.utils import return_raw_request, supports_audio_input
+        from litellm.types.utils import CallTypes
+        os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+        litellm.model_cost = litellm.get_model_cost_map(url="")
+
+
+        litellm.drop_params = True
+        base_completion_call_args = self.get_base_completion_call_args()
+        if not supports_audio_input(base_completion_call_args["model"], None):
+            print("Model does not support audio input")
+            pytest.skip("Model does not support audio input")
+
+        url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"
+        response = httpx.get(url)
+        response.raise_for_status()
+        wav_data = response.content
+        audio_format = "wav"
+        encoded_string = base64.b64encode(wav_data).decode("utf-8")
+
+        raw_request = return_raw_request(
+            endpoint=CallTypes.completion,
+            kwargs={
+                **base_completion_call_args, 
+                "modalities": ["text", "audio"],
+                "audio": {"voice": "alloy", "format": audio_format},
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": "What is in this recording?"},
+                            {
+                                "type": "input_audio",
+                                "input_audio": {"data": encoded_string, "format": "wav"},
+                            },
+                        ],
+                    },
+                ]
+            }
+        )
+        print("raw_request: ", raw_request)
+
+        assert encoded_string in json.dumps(raw_request), "Audio data not sent to gemini"
+
 class BaseOSeriesModelsTest(ABC):  # test across azure/openai
     @abstractmethod
     def get_base_completion_call_args(self):
@@ -1089,3 +1134,5 @@ class RFormat(BaseModel):
         )
 
         print(response)
+
+ 
\ No newline at end of file
diff --git a/tests/llm_translation/test_gemini.py b/tests/llm_translation/test_gemini.py
index f592337593f9..7c7c10daeecc 100644
--- a/tests/llm_translation/test_gemini.py
+++ b/tests/llm_translation/test_gemini.py
@@ -15,7 +15,7 @@
 
 class TestGoogleAIStudioGemini(BaseLLMChatTest):
     def get_base_completion_call_args(self) -> dict:
-        return {"model": "gemini/gemini-1.5-flash-002"}
+        return {"model": "gemini/gemini-2.0-flash"}
 
     def test_tool_call_no_arguments(self, tool_call_no_arguments):
         """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
diff --git a/tests/llm_translation/test_gpt4o_audio.py b/tests/llm_translation/test_gpt4o_audio.py
index 822cfb035610..2ba33529cc16 100644
--- a/tests/llm_translation/test_gpt4o_audio.py
+++ b/tests/llm_translation/test_gpt4o_audio.py
@@ -84,12 +84,14 @@ async def test_audio_output_from_model(stream):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("stream", [True, False])
-async def test_audio_input_to_model(stream):
+@pytest.mark.parametrize("model", ["gemini/gemini-2.0-flash"]) # "gpt-4o-audio-preview", 
+async def test_audio_input_to_model(stream, model):
     # Fetch the audio file and convert it to a base64 encoded string
     audio_format = "pcm16"
     if stream is False:
         audio_format = "wav"
     litellm._turn_on_debug()
+    litellm.drop_params = True
     url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"
     response = requests.get(url)
     response.raise_for_status()
@@ -97,7 +99,7 @@ async def test_audio_input_to_model(stream):
     encoded_string = base64.b64encode(wav_data).decode("utf-8")
     try:
         completion = await litellm.acompletion(
-            model="gpt-4o-audio-preview",
+            model=model,
             modalities=["text", "audio"],
             audio={"voice": "alloy", "format": audio_format},
             stream=stream,
@@ -120,6 +122,7 @@ async def test_audio_input_to_model(stream):
     except Exception as e:
         if "openai-internal" in str(e):
             pytest.skip("Skipping test due to openai-internal error")
+        raise e
     if stream is True:
         await check_streaming_response(completion)
     else:

From 2d4709d20b3c1905bfab84dec5d2c334fed60758 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 29 Mar 2025 16:35:09 -0700
Subject: [PATCH 4/9] test: fix test

---
 tests/llm_translation/test_gpt4o_audio.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/llm_translation/test_gpt4o_audio.py b/tests/llm_translation/test_gpt4o_audio.py
index 2ba33529cc16..f41dabb66650 100644
--- a/tests/llm_translation/test_gpt4o_audio.py
+++ b/tests/llm_translation/test_gpt4o_audio.py
@@ -84,7 +84,7 @@ async def test_audio_output_from_model(stream):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("stream", [True, False])
-@pytest.mark.parametrize("model", ["gemini/gemini-2.0-flash"]) # "gpt-4o-audio-preview", 
+@pytest.mark.parametrize("model", ["gpt-4o-audio-preview"]) # "gpt-4o-audio-preview", 
 async def test_audio_input_to_model(stream, model):
     # Fetch the audio file and convert it to a base64 encoded string
     audio_format = "pcm16"

From ea56aa8f72b92631dd299cee759810bc582f3169 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 29 Mar 2025 16:50:48 -0700
Subject: [PATCH 5/9] fix(gemini/transformation.py): support audio input as a
 url

enables passing google cloud bucket urls
---
 .../llms/vertex_ai/gemini/transformation.py   | 15 +++++++
 tests/llm_translation/base_llm_unit_tests.py  | 44 ++++++++++++++-----
 2 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py
index b08033baf374..3b2375c35e99 100644
--- a/litellm/llms/vertex_ai/gemini/transformation.py
+++ b/litellm/llms/vertex_ai/gemini/transformation.py
@@ -28,6 +28,7 @@
     AllMessageValues,
     ChatCompletionAssistantMessage,
     ChatCompletionAudioObject,
+    ChatCompletionFileObject,
     ChatCompletionImageObject,
     ChatCompletionTextObject,
 )
@@ -187,6 +188,20 @@ def _gemini_convert_messages_with_history(  # noqa: PLR0915
                                     )
                                 )
                                 _parts.append(_part)
+                        elif element["type"] == "file":
+                            file_element = cast(ChatCompletionFileObject, element)
+                            file_id = file_element["file"].get("file_id")
+                            if not file_id:
+                                continue
+                            mime_type = _get_image_mime_type_from_url(file_id)
+                            if mime_type is not None:
+                                _part = PartType(
+                                    file_data=FileDataType(
+                                        file_uri=file_id,
+                                        mime_type=mime_type,
+                                    )
+                                )
+                                _parts.append(_part)
                     user_content.extend(_parts)
                 elif (
                     _message_content is not None
diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py
index 2d9da94caf12..432fd848b565 100644
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@@ -890,8 +890,8 @@ async def test_completion_cost(self):
 
         assert cost > 0
 
-
-    def test_supports_audio_input(self):
+    @pytest.mark.parametrize("input_type", ["input_audio", "audio_url"])
+    def test_supports_audio_input(self, input_type):
         from litellm.utils import return_raw_request, supports_audio_input
         from litellm.types.utils import CallTypes
         os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
@@ -911,6 +911,33 @@ def test_supports_audio_input(self):
         audio_format = "wav"
         encoded_string = base64.b64encode(wav_data).decode("utf-8")
 
+        audio_content = [
+            {
+                "type": "text",
+                "text": "What is in this recording?"
+            }
+        ]
+
+        test_file_id = "gs://bucket/file.wav"
+
+        if input_type == "input_audio":
+            audio_content.append({
+                "type": "input_audio",
+                "input_audio": {"data": encoded_string, "format": audio_format},
+            })
+        elif input_type == "audio_url":
+            audio_content.append(
+                {
+                    "type": "file",
+                    "file": {
+                        "file_id": test_file_id,
+                        "filename": "my-sample-audio-file",
+                    }
+                }
+            )
+            
+                
+
         raw_request = return_raw_request(
             endpoint=CallTypes.completion,
             kwargs={
@@ -920,20 +947,17 @@ def test_supports_audio_input(self):
                 "messages": [
                     {
                         "role": "user",
-                        "content": [
-                            {"type": "text", "text": "What is in this recording?"},
-                            {
-                                "type": "input_audio",
-                                "input_audio": {"data": encoded_string, "format": "wav"},
-                            },
-                        ],
+                        "content": audio_content,
                     },
                 ]
             }
         )
         print("raw_request: ", raw_request)
 
-        assert encoded_string in json.dumps(raw_request), "Audio data not sent to gemini"
+        if input_type == "input_audio":
+            assert encoded_string in json.dumps(raw_request), "Audio data not sent to gemini"
+        elif input_type == "audio_url":
+            assert test_file_id in json.dumps(raw_request), "Audio URL not sent to gemini"
 
 class BaseOSeriesModelsTest(ABC):  # test across azure/openai
     @abstractmethod

From e89ef649601797228a8c16c5cab2a5c646395474 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 29 Mar 2025 17:21:48 -0700
Subject: [PATCH 6/9] fix(gemini/transformation.py): support explicitly passing
 format of file

---
 litellm/llms/vertex_ai/gemini/transformation.py | 13 +++++++++++--
 litellm/proxy/_new_secret_config.yaml           | 16 ++++++++++------
 litellm/types/llms/openai.py                    |  9 +++++----
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py
index 3b2375c35e99..b0456108eac3 100644
--- a/litellm/llms/vertex_ai/gemini/transformation.py
+++ b/litellm/llms/vertex_ai/gemini/transformation.py
@@ -154,7 +154,7 @@ def _gemini_convert_messages_with_history(  # noqa: PLR0915
                 _message_content = messages[msg_i].get("content")
                 if _message_content is not None and isinstance(_message_content, list):
                     _parts: List[PartType] = []
-                    for element in _message_content:
+                    for element_idx, element in enumerate(_message_content):
                         if (
                             element["type"] == "text"
                             and "text" in element
@@ -191,9 +191,12 @@ def _gemini_convert_messages_with_history(  # noqa: PLR0915
                         elif element["type"] == "file":
                             file_element = cast(ChatCompletionFileObject, element)
                             file_id = file_element["file"].get("file_id")
+                            format = file_element["file"].get("format")
+
                             if not file_id:
                                 continue
-                            mime_type = _get_image_mime_type_from_url(file_id)
+                            mime_type = format or _get_image_mime_type_from_url(file_id)
+
                             if mime_type is not None:
                                 _part = PartType(
                                     file_data=FileDataType(
@@ -202,6 +205,12 @@ def _gemini_convert_messages_with_history(  # noqa: PLR0915
                                     )
                                 )
                                 _parts.append(_part)
+                            else:
+                                raise Exception(
+                                    "Unable to determine mime type for file_id: {}, set this explicitly using message[{}].content[{}].file.format".format(
+                                        file_id, msg_i, element_idx
+                                    )
+                                )
                     user_content.extend(_parts)
                 elif (
                     _message_content is not None
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 72edeb55ca0a..6c57afc76ef3 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -16,14 +16,18 @@ model_list:
  - model_name: "bedrock-nova"
    litellm_params:
     model: us.amazon.nova-pro-v1:0
+ - model_name: "gemini-2.0-flash"
+   litellm_params:
+    model: gemini/gemini-2.0-flash
+    api_key: os.environ/GEMINI_API_KEY
 
 litellm_settings:
   num_retries: 0
   callbacks: ["prometheus"]
-  json_logs: true
+  # json_logs: true
 
-router_settings:
-  routing_strategy: usage-based-routing-v2 # 👈 KEY CHANGE
-  redis_host: os.environ/REDIS_HOST
-  redis_password: os.environ/REDIS_PASSWORD
-  redis_port: os.environ/REDIS_PORT
+# router_settings:
+#   routing_strategy: usage-based-routing-v2 # 👈 KEY CHANGE
+#   redis_host: os.environ/REDIS_HOST
+#   redis_password: os.environ/REDIS_PASSWORD
+#   redis_port: os.environ/REDIS_PORT
diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py
index 3ba5a3a4e0fb..6378d0288814 100644
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@@ -505,10 +505,11 @@ class ChatCompletionDocumentObject(TypedDict):
     citations: Optional[CitationsObject]
 
 
-class ChatCompletionFileObjectFile(TypedDict):
-    file_data: Optional[str]
-    file_id: Optional[str]
-    filename: Optional[str]
+class ChatCompletionFileObjectFile(TypedDict, total=False):
+    file_data: str
+    file_id: str
+    filename: str
+    format: str
 
 
 class ChatCompletionFileObject(TypedDict):

From 52e45d29a45f5ff91ff16ba806cd808c2a8d5689 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 29 Mar 2025 17:32:06 -0700
Subject: [PATCH 7/9] fix(gemini/transformation.py): expand support for
 inferred file types from url

---
 .../llms/vertex_ai/gemini/transformation.py   | 61 ++++++++++++++-----
 1 file changed, 45 insertions(+), 16 deletions(-)

diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py
index b0456108eac3..8067d51c87b3 100644
--- a/litellm/llms/vertex_ai/gemini/transformation.py
+++ b/litellm/llms/vertex_ai/gemini/transformation.py
@@ -105,24 +105,53 @@ def _get_image_mime_type_from_url(url: str) -> Optional[str]:
     See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
 
     Supported by Gemini:
-     - PNG (`image/png`)
-     - JPEG (`image/jpeg`)
-     - WebP (`image/webp`)
-    Example:
-        url = https://example.com/image.jpg
-        Returns: image/jpeg
+     application/pdf
+    audio/mpeg
+    audio/mp3
+    audio/wav
+    image/png
+    image/jpeg
+    image/webp
+    text/plain
+    video/mov
+    video/mpeg
+    video/mp4
+    video/mpg
+    video/avi
+    video/wmv
+    video/mpegps
+    video/flv
     """
     url = url.lower()
-    if url.endswith((".jpg", ".jpeg")):
-        return "image/jpeg"
-    elif url.endswith(".png"):
-        return "image/png"
-    elif url.endswith(".webp"):
-        return "image/webp"
-    elif url.endswith(".mp4"):
-        return "video/mp4"
-    elif url.endswith(".pdf"):
-        return "application/pdf"
+
+    # Map file extensions to mime types
+    mime_types = {
+        # Images
+        (".jpg", ".jpeg"): "image/jpeg",
+        (".png",): "image/png",
+        (".webp",): "image/webp",
+        # Videos
+        (".mp4",): "video/mp4",
+        (".mov",): "video/mov",
+        (".mpeg", ".mpg"): "video/mpeg",
+        (".avi",): "video/avi",
+        (".wmv",): "video/wmv",
+        (".mpegps",): "video/mpegps",
+        (".flv",): "video/flv",
+        # Audio
+        (".mp3",): "audio/mp3",
+        (".wav",): "audio/wav",
+        (".mpeg",): "audio/mpeg",
+        # Documents
+        (".pdf",): "application/pdf",
+        (".txt",): "text/plain",
+    }
+
+    # Check each extension group against the URL
+    for extensions, mime_type in mime_types.items():
+        if any(url.endswith(ext) for ext in extensions):
+            return mime_type
+
     return None
 
 

From cca8973f7107c2b91e2627b2961b9c73c2161a5c Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 29 Mar 2025 17:37:51 -0700
Subject: [PATCH 8/9] fix(sagemaker/completion/transformation.py): fix special
 token error when counting sagemaker tokens

---
 litellm/llms/sagemaker/completion/transformation.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/litellm/llms/sagemaker/completion/transformation.py b/litellm/llms/sagemaker/completion/transformation.py
index 9923c0e45d0a..df3d028c99a4 100644
--- a/litellm/llms/sagemaker/completion/transformation.py
+++ b/litellm/llms/sagemaker/completion/transformation.py
@@ -19,6 +19,7 @@
 from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import ModelResponse, Usage
+from litellm.utils import token_counter
 
 from ..common_utils import SagemakerError
 
@@ -238,9 +239,12 @@ def transform_response(
             )
 
         ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
-        prompt_tokens = len(encoding.encode(prompt))
-        completion_tokens = len(
-            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+        prompt_tokens = token_counter(
+            text=prompt, count_response_tokens=True
+        )  # doesn't apply any default token count from openai's chat template
+        completion_tokens = token_counter(
+            text=model_response["choices"][0]["message"].get("content", ""),
+            count_response_tokens=True,
         )
 
         model_response.created = int(time.time())

From 942c9b04624542e817e396b7c577f0af7529b021 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 29 Mar 2025 17:55:52 -0700
Subject: [PATCH 9/9] test: fix import

---
 tests/proxy_unit_tests/test_key_generate_prisma.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/proxy_unit_tests/test_key_generate_prisma.py b/tests/proxy_unit_tests/test_key_generate_prisma.py
index 99fc415c1b0d..ea837b717bb4 100644
--- a/tests/proxy_unit_tests/test_key_generate_prisma.py
+++ b/tests/proxy_unit_tests/test_key_generate_prisma.py
@@ -2285,10 +2285,10 @@ def test_update_logs_with_spend_logs_url(prisma_client):
     """
     Unit test for making sure spend logs list is still updated when url passed in
     """
-    from litellm.proxy.proxy_server import _set_spend_logs_payload
+    from litellm.proxy.db.db_spend_update_writer import DBSpendUpdateWriter
 
     payload = {"startTime": datetime.now(), "endTime": datetime.now()}
-    _set_spend_logs_payload(payload=payload, prisma_client=prisma_client)
+    DBSpendUpdateWriter._set_spend_logs_payload(payload=payload, prisma_client=prisma_client)
 
     assert len(prisma_client.spend_log_transactions) > 0
 
@@ -2296,7 +2296,7 @@ def test_update_logs_with_spend_logs_url(prisma_client):
 
     spend_logs_url = ""
     payload = {"startTime": datetime.now(), "endTime": datetime.now()}
-    _set_spend_logs_payload(
+    DBSpendUpdateWriter._set_spend_logs_payload(
         payload=payload, spend_logs_url=spend_logs_url, prisma_client=prisma_client
     )