From 92c31ae401a358f2c1aa76bc0d4956c643b6b33a Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 3 Jan 2025 12:09:29 -0800 Subject: [PATCH 1/2] fix(types/utils.py): support langfuse + humanloop routes on llm router --- litellm/proxy/_new_secret_config.yaml | 7 +++++++ litellm/types/utils.py | 2 ++ 2 files changed, 9 insertions(+) diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 35e27ada0d7f..dc7bf9d62b35 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -3,6 +3,13 @@ model_list: litellm_params: model: openai/gpt-3.5-turbo api_key: os.environ/OPENAI_API_KEY + - model_name: chatbot_actions + litellm_params: + model: langfuse/azure/gpt-4o + api_base: "os.environ/AZURE_API_BASE" + api_key: "os.environ/AZURE_API_KEY" + tpm: 1000000 + prompt_id: "jokes" litellm_settings: default_team_settings: diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 623400ae4552..d80b75f73a5a 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -1800,6 +1800,8 @@ class LlmProviders(str, Enum): INFINITY = "infinity" DEEPGRAM = "deepgram" AIOHTTP_OPENAI = "aiohttp_openai" + LANGFUSE = "langfuse" + HUMANLOOP = "humanloop" class LiteLLMLoggingBaseClass: From d48b01a477dab67d103308a2bc62a4b8c2f48d18 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 3 Jan 2025 12:16:59 -0800 Subject: [PATCH 2/2] fix(main.py): remove acompletion elif block just await if coroutine returned --- litellm/main.py | 84 +++++++++++-------------------------------------- 1 file changed, 18 insertions(+), 66 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index e35b68abf8a1..157d2a786550 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -435,74 +435,26 @@ async def acompletion( ctx = contextvars.copy_context() func_with_context = partial(ctx.run, func) + init_response = await loop.run_in_executor(None, func_with_context) + if isinstance(init_response, dict) or isinstance( + init_response, ModelResponse + ): ## CACHING SCENARIO + if isinstance(init_response, dict): + response = ModelResponse(**init_response) + response = init_response + elif asyncio.iscoroutine(init_response): + response = await init_response + else: + response = init_response # type: ignore + if ( - custom_llm_provider == "openai" - or custom_llm_provider == "azure" - or custom_llm_provider == "azure_text" - or custom_llm_provider == "custom_openai" - or custom_llm_provider == "anyscale" - or custom_llm_provider == "mistral" - or custom_llm_provider == "openrouter" - or custom_llm_provider == "deepinfra" - or custom_llm_provider == "perplexity" - or custom_llm_provider == "groq" - or custom_llm_provider == "nvidia_nim" - or custom_llm_provider == "cohere_chat" - or custom_llm_provider == "cohere" - or custom_llm_provider == "cerebras" - or custom_llm_provider == "sambanova" - or custom_llm_provider == "ai21_chat" - or custom_llm_provider == "ai21" - or custom_llm_provider == "volcengine" - or custom_llm_provider == "codestral" + custom_llm_provider == "text-completion-openai" or custom_llm_provider == "text-completion-codestral" - or custom_llm_provider == "deepseek" - or custom_llm_provider == "text-completion-openai" - or custom_llm_provider == "huggingface" - or custom_llm_provider == "ollama" - or custom_llm_provider == "ollama_chat" - or custom_llm_provider == "replicate" - or custom_llm_provider == "vertex_ai" - or custom_llm_provider == "vertex_ai_beta" - or custom_llm_provider == "gemini" - or custom_llm_provider == "sagemaker" - or custom_llm_provider == "sagemaker_chat" - or custom_llm_provider == "anthropic" - or custom_llm_provider == "anthropic_text" - or custom_llm_provider == "predibase" - or custom_llm_provider == "bedrock" - or custom_llm_provider == "databricks" - or custom_llm_provider == "triton" - or custom_llm_provider == "clarifai" - or custom_llm_provider == "watsonx" - or custom_llm_provider == "cloudflare" - or custom_llm_provider == "aiohttp_openai" - or custom_llm_provider in litellm.openai_compatible_providers - or custom_llm_provider in litellm._custom_providers - ): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all. - init_response = await loop.run_in_executor(None, func_with_context) - if isinstance(init_response, dict) or isinstance( - init_response, ModelResponse - ): ## CACHING SCENARIO - if isinstance(init_response, dict): - response = ModelResponse(**init_response) - response = init_response - elif asyncio.iscoroutine(init_response): - response = await init_response - else: - response = init_response # type: ignore - - if ( - custom_llm_provider == "text-completion-openai" - or custom_llm_provider == "text-completion-codestral" - ) and isinstance(response, TextCompletionResponse): - response = litellm.OpenAITextCompletionConfig().convert_to_chat_model_response_object( - response_object=response, - model_response_object=litellm.ModelResponse(), - ) - else: - # Call the synchronous function using run_in_executor - response = await loop.run_in_executor(None, func_with_context) # type: ignore + ) and isinstance(response, TextCompletionResponse): + response = litellm.OpenAITextCompletionConfig().convert_to_chat_model_response_object( + response_object=response, + model_response_object=litellm.ModelResponse(), + ) if isinstance(response, CustomStreamWrapper): response.set_logging_event_loop( loop=loop