diff --git a/docs/my-website/docs/observability/langfuse_integration.md b/docs/my-website/docs/observability/langfuse_integration.md index 576135ba67cc..34b213f0e219 100644 --- a/docs/my-website/docs/observability/langfuse_integration.md +++ b/docs/my-website/docs/observability/langfuse_integration.md @@ -21,7 +21,7 @@ Example trace in Langfuse using multiple models via LiteLLM: ### Pre-Requisites Ensure you have run `pip install langfuse` for this integration ```shell -pip install langfuse>=2.0.0 litellm +pip install langfuse==2.45.0 litellm ``` ### Quick Start diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py index 022d400a606d..e33d0e9b285f 100644 --- a/litellm/litellm_core_utils/prompt_templates/factory.py +++ b/litellm/litellm_core_utils/prompt_templates/factory.py @@ -989,7 +989,14 @@ def _gemini_tool_call_invoke_helper( ) -> Optional[VertexFunctionCall]: name = function_call_params.get("name", "") or "" arguments = function_call_params.get("arguments", "") - arguments_dict = json.loads(arguments) + if ( + isinstance(arguments, str) and len(arguments) == 0 + ): # pass empty dict, if arguments is empty string - prevents call from failing + arguments_dict = { + "type": "object", + } + else: + arguments_dict = json.loads(arguments) function_call = VertexFunctionCall( name=name, args=arguments_dict, diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py index ad7bfddb02d1..e5b78aaef872 100644 --- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py @@ -315,6 +315,7 @@ def get_tool_value(tool: dict, tool_name: str) -> Optional[dict]: if ( "parameters" in _openai_function_object and _openai_function_object["parameters"] is not None + and isinstance(_openai_function_object["parameters"], dict) ): # OPENAI accepts JSON Schema, Google accepts OpenAPI schema. _openai_function_object["parameters"] = _build_vertex_schema( _openai_function_object["parameters"] @@ -344,6 +345,10 @@ def get_tool_value(tool: dict, tool_name: str) -> Optional[dict]: ) _description = openai_function_object.get("description", None) _parameters = openai_function_object.get("parameters", None) + if isinstance(_parameters, str) and len(_parameters) == 0: + _parameters = { + "type": "object", + } if _description is not None: gtool_func_declaration["description"] = _description if _parameters is not None: diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 5207a3415540..e88637a67876 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -4153,6 +4153,32 @@ "supports_assistant_prefill": true, "supports_tool_choice": true }, + "mistral/magistral-medium-2506": { + "max_tokens": 40000, + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "input_cost_per_token": 2e-06, + "output_cost_per_token": 5e-06, + "litellm_provider": "mistral", + "mode": "chat", + "source": "https://mistral.ai/news/magistral", + "supports_function_calling": true, + "supports_assistant_prefill": true, + "supports_tool_choice": true + }, + "mistral/magistral-small-2506": { + "max_tokens": 40000, + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "mistral", + "mode": "chat", + "source": "https://mistral.ai/news/magistral", + "supports_function_calling": true, + "supports_assistant_prefill": true, + "supports_tool_choice": true + }, "mistral/mistral-embed": { "max_tokens": 8192, "max_input_tokens": 8192, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 7151a3e065b6..1dd7d34cdac1 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -72,6 +72,11 @@ model_list: model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ + - model_name: "anthropic-claude-vertex" + litellm_params: + model: vertex_ai/claude-3-5-sonnet@20240620 + vertex_project: internal-litellm-local-dev + general_settings: store_model_in_db: true diff --git a/litellm/proxy/management_endpoints/internal_user_endpoints.py b/litellm/proxy/management_endpoints/internal_user_endpoints.py index 1fb38b28063f..070fb6e20028 100644 --- a/litellm/proxy/management_endpoints/internal_user_endpoints.py +++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py @@ -392,6 +392,26 @@ def get_team_from_list( return None +def get_user_id_from_request(request: Request) -> Optional[str]: + """ + Get the user id from the request + """ + # Get the raw query string and parse it properly to handle + characters + user_id: Optional[str] = None + query_string = str(request.url.query) + if "user_id=" in query_string: + # Extract the user_id value from the raw query string + import re + from urllib.parse import unquote + + match = re.search(r"user_id=([^&]*)", query_string) + if match: + # Use unquote instead of unquote_plus to preserve + characters + raw_user_id = unquote(match.group(1)) + user_id = raw_user_id + return user_id + + @router.get( "/user/info", tags=["Internal User management"], @@ -400,6 +420,7 @@ def get_team_from_list( ) @management_endpoint_wrapper async def user_info( + request: Request, user_id: Optional[str] = fastapi.Query( default=None, description="User ID in the request parameters" ), @@ -421,6 +442,12 @@ async def user_info( from litellm.proxy.proxy_server import prisma_client try: + # Handle URL encoding properly by getting user_id from the original request + if ( + user_id is not None and " " in user_id + ): # if user_id is not None and contains a space, get the user_id from the request - this is to handle the case where the user_id is encoded in the url + user_id = get_user_id_from_request(request=request) + if prisma_client is None: raise Exception( "Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys" @@ -433,10 +460,12 @@ async def user_info( elif user_id is None: user_id = user_api_key_dict.user_id ## GET USER ROW ## + if user_id is not None: user_info = await prisma_client.get_data(user_id=user_id) else: user_info = None + ## GET ALL TEAMS ## team_list = [] team_id_list = [] diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py index c744cd00972e..fdc8e73dad38 100644 --- a/litellm/proxy/management_endpoints/key_management_endpoints.py +++ b/litellm/proxy/management_endpoints/key_management_endpoints.py @@ -1405,6 +1405,7 @@ async def generate_key_helper_fn( # noqa: PLR0915 try: # Create a new verification token (you may want to enhance this logic based on your needs) + user_data = { "max_budget": max_budget, "user_email": user_email, diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index ae34fbec051f..091fe0a173fb 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -1564,10 +1564,12 @@ async def get_data( # noqa: PLR0915 if query_type == "find_unique": if key_val is None: key_val = {"user_id": user_id} + response = await self.db.litellm_usertable.find_unique( # type: ignore where=key_val, # type: ignore include={"organization_memberships": True}, ) + elif query_type == "find_all" and key_val is not None: response = await self.db.litellm_usertable.find_many( where=key_val # type: ignore diff --git a/test_url_encoding.py b/test_url_encoding.py new file mode 100644 index 000000000000..c3b1a6e205c6 --- /dev/null +++ b/test_url_encoding.py @@ -0,0 +1,31 @@ +# Test URL encoding handling for emails with + characters +import re +from urllib.parse import unquote + +def test_user_id_parsing(): + # Simulate the raw query string that would come from the URL + # When user calls: http://0.0.0.0:4000/user/info?user_id=machine-user+alp-air-admin-b58-b@tempus.com + # The query string would be: user_id=machine-user+alp-air-admin-b58-b@tempus.com + + test_cases = [ + "user_id=machine-user+alp-air-admin-b58-b@tempus.com", + "user_id=machine-user%2Balp-air-admin-b58-b@tempus.com", # URL encoded + + "user_id=regular@email.com", + "user_id=test-user@domain.com&other_param=value" + ] + + for query_string in test_cases: + print(f"\nTesting query string: {query_string}") + + if 'user_id=' in query_string: + match = re.search(r'user_id=([^&]*)', query_string) + if match: + raw_user_id = unquote(match.group(1)) + print(f"Extracted user_id: {raw_user_id}") + else: + print("No match found") + else: + print("user_id not found in query string") + +if __name__ == "__main__": + test_user_id_parsing() \ No newline at end of file diff --git a/tests/llm_translation/test_gemini.py b/tests/llm_translation/test_gemini.py index 11f613ec0ca5..710a8602b151 100644 --- a/tests/llm_translation/test_gemini.py +++ b/tests/llm_translation/test_gemini.py @@ -221,4 +221,21 @@ def test_gemini_with_grounding(): assert complete_response is not None usage: Usage = complete_response.usage assert usage.prompt_tokens_details.web_search_requests is not None - assert usage.prompt_tokens_details.web_search_requests > 0 \ No newline at end of file + assert usage.prompt_tokens_details.web_search_requests > 0 + + +def test_gemini_with_empty_function_call_arguments(): + from litellm import completion + litellm._turn_on_debug() + tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "parameters": "", + }, + } + ] + response = completion(model="gemini/gemini-2.0-flash", messages=[{"role": "user", "content": "What is the capital of France?"}], tools=tools) + print(response) + assert response.choices[0].message.content is not None \ No newline at end of file diff --git a/tests/proxy_unit_tests/test_jwt.py b/tests/proxy_unit_tests/test_jwt.py index d96fb691f744..1b5a634ac3a7 100644 --- a/tests/proxy_unit_tests/test_jwt.py +++ b/tests/proxy_unit_tests/test_jwt.py @@ -677,7 +677,7 @@ async def aaaatest_user_token_output( request._url = URL(url="/team/new") result = await user_api_key_auth(request=request, api_key=bearer_token) - await user_info(user_id=user_id) + await user_info(request=request, user_id=user_id) except Exception as e: pytest.fail(f"This should not fail - {str(e)}") else: diff --git a/tests/proxy_unit_tests/test_key_generate_prisma.py b/tests/proxy_unit_tests/test_key_generate_prisma.py index e1890732ec00..0e201e43a3f2 100644 --- a/tests/proxy_unit_tests/test_key_generate_prisma.py +++ b/tests/proxy_unit_tests/test_key_generate_prisma.py @@ -234,6 +234,7 @@ async def test_new_user_response(prisma_client): ) def test_generate_and_call_with_valid_key(prisma_client, api_route): # 1. Generate a Key, and use it to make a call + from unittest.mock import MagicMock print("prisma client=", prisma_client) @@ -256,8 +257,9 @@ async def test(): user_id = key.user_id # check /user/info to verify user_role was set correctly + request_mock = MagicMock() new_user_info = await user_info( - user_id=user_id, user_api_key_dict=user_api_key_dict + request=request_mock, user_id=user_id, user_api_key_dict=user_api_key_dict ) new_user_info = new_user_info.user_info print("new_user_info=", new_user_info) diff --git a/tests/proxy_unit_tests/test_proxy_server.py b/tests/proxy_unit_tests/test_proxy_server.py index b503272077a0..878a6e1c1725 100644 --- a/tests/proxy_unit_tests/test_proxy_server.py +++ b/tests/proxy_unit_tests/test_proxy_server.py @@ -1274,6 +1274,7 @@ async def test_user_info_team_list(prisma_client): try: await user_info( + request=MagicMock(), user_id=None, user_api_key_dict=UserAPIKeyAuth( api_key="sk-1234", user_id="default_user_id" diff --git a/tests/test_litellm/litellm_core_utils/prompt_templates/test_litellm_core_utils_prompt_templates_factory.py b/tests/test_litellm/litellm_core_utils/prompt_templates/test_litellm_core_utils_prompt_templates_factory.py index fcb2e664efed..927e1afe50c7 100644 --- a/tests/test_litellm/litellm_core_utils/prompt_templates/test_litellm_core_utils_prompt_templates_factory.py +++ b/tests/test_litellm/litellm_core_utils/prompt_templates/test_litellm_core_utils_prompt_templates_factory.py @@ -288,3 +288,23 @@ def test_bedrock_validate_format_image_or_video(): # assert "### System:\nBe helpful\n\n" in result["prompt"] # assert "### Assistant:\nI see a cat in the image.\n\n" in result["prompt"] # assert result["images"] == ["http://example.com/image.jpg"] + + +def test_vertex_ai_transform_empty_function_call_arguments(): + """ + Test that the _transform_parts method handles empty function call arguments correctly + """ + from litellm.litellm_core_utils.prompt_templates.factory import ( + VertexFunctionCall, + _gemini_tool_call_invoke_helper, + ) + + function_call = { + "name": "get_weather", + "arguments": "", + } + result: VertexFunctionCall = _gemini_tool_call_invoke_helper(function_call) + print(result) + assert result["args"] == { + "type": "object", + } diff --git a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py index 5c1648088c23..e47cac75dd74 100644 --- a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py +++ b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py @@ -602,7 +602,7 @@ def test_vertex_ai_streaming_usage_web_search_calculation(): def test_vertex_ai_transform_parts(): """ - Test the _transform_parts method for converting Vertex AI function calls + Test the _transform_parts method for converting Vertex AI function calls to OpenAI-compatible tool calls and function calls. """ from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( @@ -615,17 +615,16 @@ def test_vertex_ai_transform_parts(): HttpxPartType( functionCall={ "name": "get_current_weather", - "args": {"location": "Boston", "unit": "celsius"} + "args": {"location": "Boston", "unit": "celsius"}, } ), - HttpxPartType(text="Some text content") + HttpxPartType(text="Some text content"), ] - + function, tools = VertexGeminiConfig._transform_parts( - parts=parts_with_function, - is_function_call=True + parts=parts_with_function, is_function_call=True ) - + # Should return function, no tools assert function is not None assert function["name"] == "get_current_weather" @@ -634,42 +633,39 @@ def test_vertex_ai_transform_parts(): # Test case 2: Tool call mode (is_function_call=False) function, tools = VertexGeminiConfig._transform_parts( - parts=parts_with_function, - is_function_call=False + parts=parts_with_function, is_function_call=False ) - + # Should return tools, no function assert function is None assert tools is not None assert len(tools) == 1 assert tools[0]["type"] == "function" assert tools[0]["function"]["name"] == "get_current_weather" - assert tools[0]["function"]["arguments"] == '{"location": "Boston", "unit": "celsius"}' + assert ( + tools[0]["function"]["arguments"] == '{"location": "Boston", "unit": "celsius"}' + ) assert tools[0]["id"].startswith("call_") assert tools[0]["index"] == 0 # Test case 3: Multiple function calls parts_with_multiple_functions = [ HttpxPartType( - functionCall={ - "name": "get_current_weather", - "args": {"location": "Boston"} - } + functionCall={"name": "get_current_weather", "args": {"location": "Boston"}} ), HttpxPartType( functionCall={ "name": "get_forecast", - "args": {"location": "New York", "days": 3} + "args": {"location": "New York", "days": 3}, } ), - HttpxPartType(text="Some text content") + HttpxPartType(text="Some text content"), ] - + function, tools = VertexGeminiConfig._transform_parts( - parts=parts_with_multiple_functions, - is_function_call=False + parts=parts_with_multiple_functions, is_function_call=False ) - + # Should return multiple tools assert function is None assert tools is not None @@ -683,43 +679,35 @@ def test_vertex_ai_transform_parts(): # Test case 4: No function calls parts_without_functions = [ HttpxPartType(text="Just some text content"), - HttpxPartType(text="More text") + HttpxPartType(text="More text"), ] - + function, tools = VertexGeminiConfig._transform_parts( - parts=parts_without_functions, - is_function_call=False + parts=parts_without_functions, is_function_call=False ) - + # Should return nothing assert function is None assert tools is None # Test case 5: Empty parts list function, tools = VertexGeminiConfig._transform_parts( - parts=[], - is_function_call=False + parts=[], is_function_call=False ) - + # Should return nothing assert function is None assert tools is None # Test case 6: Function call with empty args parts_with_empty_args = [ - HttpxPartType( - functionCall={ - "name": "simple_function", - "args": {} - } - ) + HttpxPartType(functionCall={"name": "simple_function", "args": {}}) ] - + function, tools = VertexGeminiConfig._transform_parts( - parts=parts_with_empty_args, - is_function_call=True + parts=parts_with_empty_args, is_function_call=True ) - + # Should handle empty args correctly assert function is not None assert function["name"] == "simple_function" diff --git a/tests/test_litellm/proxy/management_endpoints/test_internal_user_endpoints.py b/tests/test_litellm/proxy/management_endpoints/test_internal_user_endpoints.py index 6d14e043aca1..9ff79f4f785a 100644 --- a/tests/test_litellm/proxy/management_endpoints/test_internal_user_endpoints.py +++ b/tests/test_litellm/proxy/management_endpoints/test_internal_user_endpoints.py @@ -297,3 +297,55 @@ async def mock_check_duplicate_user_email(*args, **kwargs): # Verify that the license check was called with the correct user count mock_license_check.is_over_limit.assert_called_once_with(total_users=1000) + + +@pytest.mark.asyncio +async def test_user_info_url_encoding_plus_character(mocker): + """ + Test that /user/info endpoint properly handles email addresses with + characters + when passed in the URL query parameters. + + Issue: + characters in emails get converted to spaces due to URL encoding + Solution: Parse the raw query string to preserve + characters + """ + from fastapi import Request + + from litellm.proxy._types import LiteLLM_UserTable, UserAPIKeyAuth + from litellm.proxy.management_endpoints.internal_user_endpoints import user_info + + # Mock the prisma client + mock_prisma_client = mocker.MagicMock() + mock_prisma_client.get_data = mocker.AsyncMock() + + # Patch the prisma client import in the endpoint + mocker.patch("litellm.proxy.proxy_server.prisma_client", mock_prisma_client) + + # Create a mock request with the raw query string containing + + mock_request = mocker.MagicMock(spec=Request) + mock_request.url.query = "user_id=machine-user+alp-air-admin-b58-b@tempus.com" + + # Mock user_api_key_dict + mock_user_api_key_dict = UserAPIKeyAuth( + user_id="test_admin", user_role="proxy_admin" + ) + + # Call user_info function with the URL-decoded user_id (as FastAPI would pass it) + # FastAPI would normally convert + to space, but our fix should handle this + decoded_user_id = ( + "machine-user alp-air-admin-b58-b@tempus.com" # What FastAPI gives us + ) + expected_user_id = "machine-user+alp-air-admin-b58-b@tempus.com" + try: + response = await user_info( + user_id=decoded_user_id, + user_api_key_dict=mock_user_api_key_dict, + request=mock_request, + ) + except Exception as e: + print(f"Error in user_info: {e}") + + # Verify that the response contains the correct user data + print( + f"mock_prisma_client.get_data.call_args: {mock_prisma_client.get_data.call_args.kwargs}" + ) + assert mock_prisma_client.get_data.call_args.kwargs["user_id"] == expected_user_id