From 1ce0fa5fd47a71c87b3286b7e524bcda07a82b2b Mon Sep 17 00:00:00 2001 From: Cole McIntosh Date: Fri, 6 Jun 2025 15:55:40 -0600 Subject: [PATCH 01/11] Move `test_exceptions.py` into `tests/test_litellm/litellm_core_utils` --- .../litellm_core_utils}/test_exceptions.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{local_testing => test_litellm/litellm_core_utils}/test_exceptions.py (100%) diff --git a/tests/local_testing/test_exceptions.py b/tests/test_litellm/litellm_core_utils/test_exceptions.py similarity index 100% rename from tests/local_testing/test_exceptions.py rename to tests/test_litellm/litellm_core_utils/test_exceptions.py From 93375bfe6447273a621ef35e71bfd657e3101ea6 Mon Sep 17 00:00:00 2001 From: Cole McIntosh Date: Fri, 6 Jun 2025 16:11:15 -0600 Subject: [PATCH 02/11] Enhance rate limit detection in ExceptionCheckers by implementing regex patterns for improved accuracy. Fallback to original logic if regex fails. Update import statement for verbose_logger. --- .../exception_mapping_utils.py | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/litellm/litellm_core_utils/exception_mapping_utils.py b/litellm/litellm_core_utils/exception_mapping_utils.py index e898ebbcb062..af62dac9ea9a 100644 --- a/litellm/litellm_core_utils/exception_mapping_utils.py +++ b/litellm/litellm_core_utils/exception_mapping_utils.py @@ -5,7 +5,7 @@ import httpx import litellm -from litellm._logging import verbose_logger +from litellm import verbose_logger from ..exceptions import ( APIConnectionError, @@ -42,8 +42,23 @@ def is_error_str_rate_limit(error_str: str) -> bool: """ if not isinstance(error_str, str): return False - - return "429" in error_str or "rate limit" in error_str.lower() + + # Simple regex patterns to match the most common rate limiting messages + rate_limit_patterns = [ + r"429", # HTTP 429 status code + r"rate[\s\-]?limit", # "rate limit", "rate-limit", "ratelimit" + r"too.?many.?requests?", # "too many requests", "too many request", "toomanyrequest", etc. + ] + + # Combine all patterns with case-insensitive matching + combined_pattern = r"|".join(rate_limit_patterns) + + try: + import re + return bool(re.search(combined_pattern, error_str, re.IGNORECASE)) + except Exception: + # Fallback to original logic if regex fails + return "429" in error_str or "rate limit" in error_str.lower() def get_error_message(error_obj) -> Optional[str]: @@ -2334,4 +2349,4 @@ def _add_key_name_and_team_to_alert(request_info: str, metadata: dict) -> str: return request_info except Exception: - return request_info + return request_info \ No newline at end of file From d0159073f5d61406cd0281e00378a852fff7a44f Mon Sep 17 00:00:00 2001 From: Cole McIntosh Date: Fri, 6 Jun 2025 16:33:43 -0600 Subject: [PATCH 03/11] Refactor ExceptionCheckers to simplify rate limit detection by removing regex patterns, relying on string checks instead. This change enhances performance and maintains functionality. --- .../exception_mapping_utils.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/litellm/litellm_core_utils/exception_mapping_utils.py b/litellm/litellm_core_utils/exception_mapping_utils.py index af62dac9ea9a..c4a96772837e 100644 --- a/litellm/litellm_core_utils/exception_mapping_utils.py +++ b/litellm/litellm_core_utils/exception_mapping_utils.py @@ -42,23 +42,8 @@ def is_error_str_rate_limit(error_str: str) -> bool: """ if not isinstance(error_str, str): return False - - # Simple regex patterns to match the most common rate limiting messages - rate_limit_patterns = [ - r"429", # HTTP 429 status code - r"rate[\s\-]?limit", # "rate limit", "rate-limit", "ratelimit" - r"too.?many.?requests?", # "too many requests", "too many request", "toomanyrequest", etc. - ] - - # Combine all patterns with case-insensitive matching - combined_pattern = r"|".join(rate_limit_patterns) - - try: - import re - return bool(re.search(combined_pattern, error_str, re.IGNORECASE)) - except Exception: - # Fallback to original logic if regex fails - return "429" in error_str or "rate limit" in error_str.lower() + + return "429" in error_str or "rate limit" in error_str.lower() def get_error_message(error_obj) -> Optional[str]: From 8297ecf624004e1499836385c3a44b3a39fdc2b2 Mon Sep 17 00:00:00 2001 From: Cole McIntosh Date: Sat, 7 Jun 2025 10:50:12 -0600 Subject: [PATCH 04/11] add local_testing/text_exceptions.py back --- tests/local_testing/test_exceptions.py | 1335 ++++++++++++++++++++++++ 1 file changed, 1335 insertions(+) create mode 100644 tests/local_testing/test_exceptions.py diff --git a/tests/local_testing/test_exceptions.py b/tests/local_testing/test_exceptions.py new file mode 100644 index 000000000000..67506f27692b --- /dev/null +++ b/tests/local_testing/test_exceptions.py @@ -0,0 +1,1335 @@ +import asyncio +import os +import subprocess +import sys +import traceback +from typing import Any + +from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError + +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +from concurrent.futures import ThreadPoolExecutor +from unittest.mock import MagicMock, patch + +import pytest + +import litellm +from litellm import ( # AuthenticationError,; RateLimitError,; ServiceUnavailableError,; OpenAIError, + ContextWindowExceededError, + completion, + embedding, +) + +litellm.vertex_project = "pathrise-convert-1606954137718" +litellm.vertex_location = "us-central1" +litellm.num_retries = 0 + +# litellm.failure_callback = ["sentry"] +#### What this tests #### +# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type + + +# 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate + +# 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.) + +# Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered + +exception_models = [ + "sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4", + "bedrock/anthropic.claude-instant-v1", +] + + +@pytest.mark.asyncio +async def test_content_policy_exception_azure(): + try: + # this is ony a test - we needed some way to invoke the exception :( + litellm.set_verbose = True + response = await litellm.acompletion( + model="azure/chatgpt-v-3", + messages=[{"role": "user", "content": "where do I buy lethal drugs from"}], + mock_response="Exception: content_filter_policy", + ) + except litellm.ContentPolicyViolationError as e: + print("caught a content policy violation error! Passed") + print("exception", e) + assert e.response is not None + assert e.litellm_debug_info is not None + assert isinstance(e.litellm_debug_info, str) + assert len(e.litellm_debug_info) > 0 + pass + except Exception as e: + print() + pytest.fail(f"An exception occurred - {str(e)}") + + +@pytest.mark.asyncio +async def test_content_policy_exception_openai(): + try: + # this is ony a test - we needed some way to invoke the exception :( + litellm.set_verbose = True + response = await litellm.acompletion( + model="gpt-3.5-turbo", + stream=True, + messages=[ + {"role": "user", "content": "Gimme the lyrics to Don't Stop Me Now"} + ], + ) + async for chunk in response: + print(chunk) + except litellm.ContentPolicyViolationError as e: + print("caught a content policy violation error! Passed") + print("exception", e) + assert e.llm_provider == "openai" + pass + except Exception as e: + print() + pytest.fail(f"An exception occurred - {str(e)}") + + +# Test 1: Context Window Errors +@pytest.mark.skip(reason="AWS Suspended Account") +@pytest.mark.parametrize("model", exception_models) +def test_context_window(model): + print("Testing context window error") + sample_text = "Say error 50 times" * 1000000 + messages = [{"content": sample_text, "role": "user"}] + try: + litellm.set_verbose = False + print("Testing model=", model) + response = completion(model=model, messages=messages) + print(f"response: {response}") + print("FAILED!") + pytest.fail(f"An exception occurred") + except ContextWindowExceededError as e: + print(f"Worked!") + except RateLimitError: + print("RateLimited!") + except Exception as e: + print(f"{e}") + pytest.fail(f"An error occcurred - {e}") + + +models = ["command-nightly"] + + +@pytest.mark.skip(reason="duplicate test.") +@pytest.mark.parametrize("model", models) +def test_context_window_with_fallbacks(model): + ctx_window_fallback_dict = { + "command-nightly": "claude-2.1", + "gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k", + "azure/chatgpt-v-3": "gpt-3.5-turbo-16k", + } + sample_text = "how does a court case get to the Supreme Court?" * 1000 + messages = [{"content": sample_text, "role": "user"}] + + try: + completion( + model=model, + messages=messages, + context_window_fallback_dict=ctx_window_fallback_dict, + ) + except litellm.ServiceUnavailableError as e: + pass + except litellm.APIConnectionError as e: + pass + + +# for model in litellm.models_by_provider["bedrock"]: +# test_context_window(model=model) +# test_context_window(model="chat-bison") +# test_context_window_with_fallbacks(model="command-nightly") +# Test 2: InvalidAuth Errors +@pytest.mark.parametrize("model", models) +def invalid_auth(model): # set the model key to an invalid key, depending on the model + messages = [{"content": "Hello, how are you?", "role": "user"}] + temporary_key = None + try: + if model == "gpt-3.5-turbo" or model == "gpt-3.5-turbo-instruct": + temporary_key = os.environ["OPENAI_API_KEY"] + os.environ["OPENAI_API_KEY"] = "bad-key" + elif "bedrock" in model: + temporary_aws_access_key = os.environ["AWS_ACCESS_KEY_ID"] + os.environ["AWS_ACCESS_KEY_ID"] = "bad-key" + temporary_aws_region_name = os.environ["AWS_REGION_NAME"] + os.environ["AWS_REGION_NAME"] = "bad-key" + temporary_secret_key = os.environ["AWS_SECRET_ACCESS_KEY"] + os.environ["AWS_SECRET_ACCESS_KEY"] = "bad-key" + elif model == "azure/chatgpt-v-3": + temporary_key = os.environ["AZURE_API_KEY"] + os.environ["AZURE_API_KEY"] = "bad-key" + elif model == "claude-3-5-haiku-20241022": + temporary_key = os.environ["ANTHROPIC_API_KEY"] + os.environ["ANTHROPIC_API_KEY"] = "bad-key" + elif model == "command-nightly": + temporary_key = os.environ["COHERE_API_KEY"] + os.environ["COHERE_API_KEY"] = "bad-key" + elif "j2" in model: + temporary_key = os.environ["AI21_API_KEY"] + os.environ["AI21_API_KEY"] = "bad-key" + elif "togethercomputer" in model: + temporary_key = os.environ["TOGETHERAI_API_KEY"] + os.environ["TOGETHERAI_API_KEY"] = ( + "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a" + ) + elif model in litellm.openrouter_models: + temporary_key = os.environ["OPENROUTER_API_KEY"] + os.environ["OPENROUTER_API_KEY"] = "bad-key" + elif model in litellm.aleph_alpha_models: + temporary_key = os.environ["ALEPH_ALPHA_API_KEY"] + os.environ["ALEPH_ALPHA_API_KEY"] = "bad-key" + elif model in litellm.nlp_cloud_models: + temporary_key = os.environ["NLP_CLOUD_API_KEY"] + os.environ["NLP_CLOUD_API_KEY"] = "bad-key" + elif ( + model + == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" + ): + temporary_key = os.environ["REPLICATE_API_KEY"] + os.environ["REPLICATE_API_KEY"] = "bad-key" + print(f"model: {model}") + response = completion(model=model, messages=messages) + print(f"response: {response}") + except AuthenticationError as e: + print(f"AuthenticationError Caught Exception - {str(e)}") + except ( + OpenAIError + ) as e: # is at least an openai error -> in case of random model errors - e.g. overloaded server + print(f"OpenAIError Caught Exception - {e}") + except Exception as e: + print(type(e)) + print(type(AuthenticationError)) + print(e.__class__.__name__) + print(f"Uncaught Exception - {e}") + pytest.fail(f"Error occurred: {e}") + if temporary_key != None: # reset the key + if model == "gpt-3.5-turbo": + os.environ["OPENAI_API_KEY"] = temporary_key + elif model == "chatgpt-test": + os.environ["AZURE_API_KEY"] = temporary_key + azure = True + elif model == "claude-3-5-haiku-20241022": + os.environ["ANTHROPIC_API_KEY"] = temporary_key + elif model == "command-nightly": + os.environ["COHERE_API_KEY"] = temporary_key + elif ( + model + == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" + ): + os.environ["REPLICATE_API_KEY"] = temporary_key + elif "j2" in model: + os.environ["AI21_API_KEY"] = temporary_key + elif "togethercomputer" in model: + os.environ["TOGETHERAI_API_KEY"] = temporary_key + elif model in litellm.aleph_alpha_models: + os.environ["ALEPH_ALPHA_API_KEY"] = temporary_key + elif model in litellm.nlp_cloud_models: + os.environ["NLP_CLOUD_API_KEY"] = temporary_key + elif "bedrock" in model: + os.environ["AWS_ACCESS_KEY_ID"] = temporary_aws_access_key + os.environ["AWS_REGION_NAME"] = temporary_aws_region_name + os.environ["AWS_SECRET_ACCESS_KEY"] = temporary_secret_key + return + + +# for model in litellm.models_by_provider["bedrock"]: +# invalid_auth(model=model) +# invalid_auth(model="command-nightly") + + +# Test 3: Invalid Request Error +@pytest.mark.parametrize("model", models) +def test_invalid_request_error(model): + messages = [{"content": "hey, how's it going?", "role": "user"}] + + with pytest.raises(BadRequestError): + completion(model=model, messages=messages, max_tokens="hello world") + + + + + +def test_azure_embedding_exceptions(): + try: + + response = litellm.embedding( + model="azure/azure-embedding-model", + input="hello", + mock_response="error", + ) + pytest.fail(f"Bad request this should have failed but got {response}") + + except Exception as e: + print(vars(e)) + # CRUCIAL Test - Ensures our exceptions are readable and not overly complicated. some users have complained exceptions will randomly have another exception raised in our exception mapping + assert str(e) == "Mock error" + + +async def asynctest_completion_azure_exception(): + try: + import openai + + import litellm + + print("azure gpt-3.5 test\n\n") + litellm.set_verbose = True + ## Test azure call + old_azure_key = os.environ["AZURE_API_KEY"] + os.environ["AZURE_API_KEY"] = "good morning" + response = await litellm.acompletion( + model="azure/chatgpt-v-3", + messages=[{"role": "user", "content": "hello"}], + ) + print(f"response: {response}") + print(response) + except openai.AuthenticationError as e: + os.environ["AZURE_API_KEY"] = old_azure_key + print("good job got the correct error for azure when key not set") + print(e) + except Exception as e: + print("Got wrong exception") + print("exception", e) + pytest.fail(f"Error occurred: {e}") + + +# import asyncio +# asyncio.run( +# asynctest_completion_azure_exception() +# ) + + +def asynctest_completion_openai_exception_bad_model(): + try: + import asyncio + + import openai + + import litellm + + print("azure exception bad model\n\n") + litellm.set_verbose = True + + ## Test azure call + async def test(): + response = await litellm.acompletion( + model="openai/gpt-6", + messages=[{"role": "user", "content": "hello"}], + ) + + asyncio.run(test()) + except openai.NotFoundError: + print("Good job this is a NotFoundError for a model that does not exist!") + print("Passed") + except Exception as e: + print("Raised wrong type of exception", type(e)) + assert isinstance(e, openai.BadRequestError) + pytest.fail(f"Error occurred: {e}") + + +# asynctest_completion_openai_exception_bad_model() + + +def asynctest_completion_azure_exception_bad_model(): + try: + import asyncio + + import openai + + import litellm + + print("azure exception bad model\n\n") + litellm.set_verbose = True + + ## Test azure call + async def test(): + response = await litellm.acompletion( + model="azure/gpt-12", + messages=[{"role": "user", "content": "hello"}], + ) + + asyncio.run(test()) + except openai.NotFoundError: + print("Good job this is a NotFoundError for a model that does not exist!") + print("Passed") + except Exception as e: + print("Raised wrong type of exception", type(e)) + pytest.fail(f"Error occurred: {e}") + + +# asynctest_completion_azure_exception_bad_model() + + +def test_completion_openai_exception(): + # test if openai:gpt raises openai.AuthenticationError + try: + import openai + + print("openai gpt-3.5 test\n\n") + litellm.set_verbose = True + ## Test azure call + old_azure_key = os.environ["OPENAI_API_KEY"] + os.environ["OPENAI_API_KEY"] = "good morning" + response = completion( + model="gpt-4", + messages=[{"role": "user", "content": "hello"}], + ) + print(f"response: {response}") + print(response) + except openai.AuthenticationError as e: + os.environ["OPENAI_API_KEY"] = old_azure_key + print("OpenAI: good job got the correct error for openai when key not set") + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + +# test_completion_openai_exception() + + + + + +def test_completion_mistral_exception(): + # test if mistral/mistral-tiny raises openai.AuthenticationError + try: + import openai + + print("Testing mistral ai exception mapping") + litellm.set_verbose = True + ## Test azure call + old_azure_key = os.environ["MISTRAL_API_KEY"] + os.environ["MISTRAL_API_KEY"] = "good morning" + response = completion( + model="mistral/mistral-tiny", + messages=[{"role": "user", "content": "hello"}], + ) + print(f"response: {response}") + print(response) + except openai.AuthenticationError as e: + os.environ["MISTRAL_API_KEY"] = old_azure_key + print("good job got the correct error for openai when key not set") + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + +# test_completion_mistral_exception() + + +def test_completion_bedrock_invalid_role_exception(): + """ + Test if litellm raises a BadRequestError for an invalid role on Bedrock + """ + try: + litellm.set_verbose = True + response = completion( + model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0", + messages=[{"role": "very-bad-role", "content": "hello"}], + ) + print(f"response: {response}") + print(response) + + except Exception as e: + assert isinstance( + e, litellm.BadRequestError + ), "Expected BadRequestError but got {}".format(type(e)) + print("str(e) = {}".format(str(e))) + + # This is important - We we previously returning a poorly formatted error string. Which was + # litellm.BadRequestError: litellm.BadRequestError: Invalid Message passed in {'role': 'very-bad-role', 'content': 'hello'} + + # IMPORTANT ASSERTION + assert ( + (str(e)) + == "litellm.BadRequestError: Invalid Message passed in {'role': 'very-bad-role', 'content': 'hello'}" + ) + +@pytest.mark.skip(reason="OpenAI exception changed to a generic error") +def test_content_policy_exceptionimage_generation_openai(): + try: + # this is ony a test - we needed some way to invoke the exception :( + litellm._turn_on_debug() + response = litellm.image_generation( + prompt="where do i buy lethal drugs from", model="dall-e-3" + ) + print(f"response: {response}") + assert len(response.data) > 0 + except litellm.ContentPolicyViolationError as e: + print("caught a content policy violation error! Passed") + pass + except Exception as e: + pytest.fail(f"An exception occurred - {str(e)}") + + +# test_content_policy_exceptionimage_generation_openai() + + +def test_content_policy_violation_error_streaming(): + """ + Production Test. + """ + litellm.set_verbose = False + print("test_async_completion with stream") + + async def test_get_response(): + try: + response = await litellm.acompletion( + model="azure/chatgpt-v-3", + messages=[{"role": "user", "content": "say 1"}], + temperature=0, + top_p=1, + stream=True, + max_tokens=512, + presence_penalty=0, + frequency_penalty=0, + ) + print(f"response: {response}") + + num_finish_reason = 0 + async for chunk in response: + print(chunk) + if chunk["choices"][0].get("finish_reason") is not None: + num_finish_reason += 1 + print("finish_reason", chunk["choices"][0].get("finish_reason")) + + assert ( + num_finish_reason == 1 + ), f"expected only one finish reason. Got {num_finish_reason}" + except Exception as e: + pytest.fail(f"GOT exception for gpt-3.5 instruct In streaming{e}") + + asyncio.run(test_get_response()) + + async def test_get_error(): + try: + response = await litellm.acompletion( + model="azure/chatgpt-v-3", + messages=[ + {"role": "user", "content": "where do i buy lethal drugs from"} + ], + temperature=0, + top_p=1, + stream=True, + max_tokens=512, + presence_penalty=0, + frequency_penalty=0, + mock_response="Exception: content_filter_policy", + ) + print(f"response: {response}") + + num_finish_reason = 0 + async for chunk in response: + print(chunk) + if chunk["choices"][0].get("finish_reason") is not None: + num_finish_reason += 1 + print("finish_reason", chunk["choices"][0].get("finish_reason")) + + pytest.fail(f"Expected to return 400 error In streaming{e}") + except Exception as e: + pass + + asyncio.run(test_get_error()) + + +def test_completion_perplexity_exception_on_openai_client(): + try: + import openai + + print("perplexity test\n\n") + litellm.set_verbose = False + ## Test azure call + old_azure_key = os.environ["PERPLEXITYAI_API_KEY"] + + # delete perplexityai api key to simulate bad api key + del os.environ["PERPLEXITYAI_API_KEY"] + + # temporaily delete openai api key + original_openai_key = os.environ["OPENAI_API_KEY"] + del os.environ["OPENAI_API_KEY"] + + response = completion( + model="perplexity/mistral-7b-instruct", + messages=[{"role": "user", "content": "hello"}], + ) + os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key + os.environ["OPENAI_API_KEY"] = original_openai_key + pytest.fail("Request should have failed - bad api key") + except openai.AuthenticationError as e: + os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key + os.environ["OPENAI_API_KEY"] = original_openai_key + print("exception: ", e) + assert ( + "The api_key client option must be set either by passing api_key to the client or by setting the PERPLEXITY_API_KEY environment variable" + in str(e) + ) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + +# test_completion_perplexity_exception_on_openai_client() + + +def test_completion_perplexity_exception(): + try: + import openai + + print("perplexity test\n\n") + litellm.set_verbose = True + ## Test azure call + old_azure_key = os.environ["PERPLEXITYAI_API_KEY"] + os.environ["PERPLEXITYAI_API_KEY"] = "good morning" + response = completion( + model="perplexity/mistral-7b-instruct", + messages=[{"role": "user", "content": "hello"}], + ) + os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key + pytest.fail("Request should have failed - bad api key") + except openai.AuthenticationError as e: + os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key + print("exception: ", e) + assert "PerplexityException" in str(e) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + +def test_completion_openai_api_key_exception(): + try: + import openai + + print("gpt-3.5 test\n\n") + litellm.set_verbose = True + ## Test azure call + old_azure_key = os.environ["OPENAI_API_KEY"] + os.environ["OPENAI_API_KEY"] = "good morning" + response = completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "hello"}], + ) + os.environ["OPENAI_API_KEY"] = old_azure_key + pytest.fail("Request should have failed - bad api key") + except openai.AuthenticationError as e: + os.environ["OPENAI_API_KEY"] = old_azure_key + print("exception: ", e) + assert "OpenAIException" in str(e) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + +# tesy_async_acompletion() + + +def test_router_completion_vertex_exception(): + try: + import litellm + + litellm.set_verbose = True + router = litellm.Router( + model_list=[ + { + "model_name": "vertex-gemini-pro", + "litellm_params": { + "model": "vertex_ai/gemini-pro", + "api_key": "good-morning", + }, + }, + ] + ) + response = router.completion( + model="vertex-gemini-pro", + messages=[{"role": "user", "content": "hello"}], + vertex_project="bad-project", + ) + pytest.fail("Request should have failed - bad api key") + except Exception as e: + print("exception: ", e) + + +def test_litellm_completion_vertex_exception(): + try: + import litellm + + litellm.set_verbose = True + response = completion( + model="vertex_ai/gemini-pro", + api_key="good-morning", + messages=[{"role": "user", "content": "hello"}], + vertex_project="bad-project", + ) + pytest.fail("Request should have failed - bad api key") + except Exception as e: + print("exception: ", e) + + +def test_litellm_predibase_exception(): + """ + Test - Assert that the Predibase API Key is not returned on Authentication Errors + """ + try: + import litellm + + litellm.set_verbose = True + response = completion( + model="predibase/llama-3-8b-instruct", + messages=[{"role": "user", "content": "What is the meaning of life?"}], + tenant_id="c4768f95", + api_key="hf-rawapikey", + ) + pytest.fail("Request should have failed - bad api key") + except Exception as e: + assert "hf-rawapikey" not in str(e) + print("exception: ", e) + + +# # test_invalid_request_error(model="command-nightly") +# # Test 3: Rate Limit Errors +# def test_model_call(model): +# try: +# sample_text = "how does a court case get to the Supreme Court?" +# messages = [{ "content": sample_text,"role": "user"}] +# print(f"model: {model}") +# response = completion(model=model, messages=messages) +# except RateLimitError as e: +# print(f"headers: {e.response.headers}") +# return True +# # except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server +# # return True +# except Exception as e: +# print(f"Uncaught Exception {model}: {type(e).__name__} - {e}") +# traceback.print_exc() +# pass +# return False +# # Repeat each model 500 times +# # extended_models = [model for model in models for _ in range(250)] +# extended_models = ["azure/chatgpt-v-3" for _ in range(250)] + +# def worker(model): +# return test_model_call(model) + +# # Create a dictionary to store the results +# counts = {True: 0, False: 0} + +# # Use Thread Pool Executor +# with ThreadPoolExecutor(max_workers=500) as executor: +# # Use map to start the operation in thread pool +# results = executor.map(worker, extended_models) + +# # Iterate over results and count True/False +# for result in results: +# counts[result] += 1 + +# accuracy_score = counts[True]/(counts[True] + counts[False]) +# print(f"accuracy_score: {accuracy_score}") + + +@pytest.mark.parametrize( + "provider", ["predibase", "vertex_ai_beta", "anthropic", "databricks", "watsonx", "fireworks_ai"] +) +def test_exception_mapping(provider): + """ + For predibase, run through a set of mock exceptions + + assert that they are being mapped correctly + """ + litellm.set_verbose = True + error_map = { + 400: litellm.BadRequestError, + 401: litellm.AuthenticationError, + 404: litellm.NotFoundError, + 408: litellm.Timeout, + 429: litellm.RateLimitError, + 500: litellm.InternalServerError, + 503: litellm.ServiceUnavailableError, + } + + for code, expected_exception in error_map.items(): + mock_response = Exception() + setattr(mock_response, "text", "This is an error message") + setattr(mock_response, "llm_provider", provider) + setattr(mock_response, "status_code", code) + + response: Any = None + try: + response = completion( + model="{}/test-model".format(provider), + messages=[{"role": "user", "content": "Hey, how's it going?"}], + mock_response=mock_response, + ) + except expected_exception: + continue + except Exception as e: + traceback.print_exc() + response = "{}".format(str(e)) + pytest.fail( + "Did not raise expected exception. Expected={}, Return={},".format( + expected_exception, response + ) + ) + + pass + + +def test_fireworks_ai_exception_mapping(): + """ + Comprehensive test for Fireworks AI exception mapping, including: + 1. Standard 429 rate limit errors + 2. Text-based rate limit detection (the main issue fixed) + 3. Generic 400 errors that should NOT be rate limits + 4. ExceptionCheckers utility function + + Related to: https://github.com/BerriAI/litellm/pull/11455 + Based on Fireworks AI documentation: https://docs.fireworks.ai/tools-sdks/python-client/api-reference + """ + import litellm + from litellm.llms.fireworks_ai.common_utils import FireworksAIException + from litellm.litellm_core_utils.exception_mapping_utils import ExceptionCheckers + + # Test scenarios covering all important cases + test_scenarios = [ + { + "name": "Standard 429 rate limit with proper status code", + "status_code": 429, + "message": "Rate limit exceeded. Please try again in 60 seconds.", + "expected_exception": litellm.RateLimitError, + }, + { + "name": "Status 400 with rate limit text (the main issue fixed)", + "status_code": 400, + "message": '{"error":{"object":"error","type":"invalid_request_error","message":"rate limit exceeded, please try again later"}}', + "expected_exception": litellm.RateLimitError, + }, + { + "name": "Status 400 with generic invalid request (should NOT be rate limit)", + "status_code": 400, + "message": '{"error":{"type":"invalid_request_error","message":"Invalid parameter value"}}', + "expected_exception": litellm.BadRequestError, + }, + ] + + # Test each scenario + for scenario in test_scenarios: + mock_exception = FireworksAIException( + status_code=scenario["status_code"], + message=scenario["message"], + headers={} + ) + + try: + response = litellm.completion( + model="fireworks_ai/llama-v3p1-70b-instruct", + messages=[{"role": "user", "content": "Hello"}], + mock_response=mock_exception, + ) + pytest.fail(f"Expected {scenario['expected_exception'].__name__} to be raised") + except scenario["expected_exception"] as e: + if scenario["expected_exception"] == litellm.RateLimitError: + assert "rate limit" in str(e).lower() or "429" in str(e) + except Exception as e: + pytest.fail(f"Expected {scenario['expected_exception'].__name__} but got {type(e).__name__}: {e}") + + # Test ExceptionCheckers.is_error_str_rate_limit() method directly + + # Test cases that should return True (rate limit detected) + rate_limit_strings = [ + "429 rate limit exceeded", + "Rate limit exceeded, please try again later", + "RATE LIMIT ERROR", + "Error 429: rate limit", + '{"error":{"type":"invalid_request_error","message":"rate limit exceeded, please try again later"}}', + "HTTP 429 Too Many Requests", + ] + + for error_str in rate_limit_strings: + assert ExceptionCheckers.is_error_str_rate_limit(error_str), f"Should detect rate limit in: {error_str}" + + # Test cases that should return False (not rate limit) + non_rate_limit_strings = [ + "400 Bad Request", + "Authentication failed", + "Invalid model specified", + "Context window exceeded", + "Internal server error", + "", + "Some other error message", + ] + + for error_str in non_rate_limit_strings: + assert not ExceptionCheckers.is_error_str_rate_limit(error_str), f"Should NOT detect rate limit in: {error_str}" + + # Test edge cases + assert not ExceptionCheckers.is_error_str_rate_limit(None) # type: ignore + assert not ExceptionCheckers.is_error_str_rate_limit(42) # type: ignore + + +def test_anthropic_tool_calling_exception(): + """ + Related - https://github.com/BerriAI/litellm/issues/4348 + """ + tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": {}, + }, + } + ] + try: + litellm.completion( + model="claude-3-5-sonnet-20240620", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + tools=tools, + ) + except litellm.BadRequestError: + pass + + +from typing import Optional, Union + +from openai import AsyncOpenAI, OpenAI + + +def _pre_call_utils( + call_type: str, + data: dict, + client: Union[OpenAI, AsyncOpenAI], + sync_mode: bool, + streaming: Optional[bool], +): + if call_type == "embedding": + data["input"] = "Hello world!" + mapped_target: Any = client.embeddings.with_raw_response + if sync_mode: + original_function = litellm.embedding + else: + original_function = litellm.aembedding + elif call_type == "chat_completion": + data["messages"] = [{"role": "user", "content": "Hello world"}] + if streaming is True: + data["stream"] = True + mapped_target = client.chat.completions.with_raw_response # type: ignore + if sync_mode: + original_function = litellm.completion + else: + original_function = litellm.acompletion + elif call_type == "completion": + data["prompt"] = "Hello world" + if streaming is True: + data["stream"] = True + mapped_target = client.completions.with_raw_response # type: ignore + if sync_mode: + original_function = litellm.text_completion + else: + original_function = litellm.atext_completion + + return data, original_function, mapped_target + + +def _pre_call_utils_httpx( + call_type: str, + data: dict, + client: Union[HTTPHandler, AsyncHTTPHandler], + sync_mode: bool, + streaming: Optional[bool], +): + mapped_target: Any = client.client + if call_type == "embedding": + data["input"] = "Hello world!" + + if sync_mode: + original_function = litellm.embedding + else: + original_function = litellm.aembedding + elif call_type == "chat_completion": + data["messages"] = [{"role": "user", "content": "Hello world"}] + if streaming is True: + data["stream"] = True + + if sync_mode: + original_function = litellm.completion + else: + original_function = litellm.acompletion + elif call_type == "completion": + data["prompt"] = "Hello world" + if streaming is True: + data["stream"] = True + if sync_mode: + original_function = litellm.text_completion + else: + original_function = litellm.atext_completion + + return data, original_function, mapped_target + + +@pytest.mark.parametrize( + "sync_mode", + [True, False], +) +@pytest.mark.parametrize( + "provider, model, call_type, streaming", + [ + ("openai", "text-embedding-ada-002", "embedding", None), + ("openai", "gpt-3.5-turbo", "chat_completion", False), + ("openai", "gpt-3.5-turbo", "chat_completion", True), + ("openai", "gpt-3.5-turbo-instruct", "completion", True), + ("azure", "azure/chatgpt-v-3", "chat_completion", True), + ("azure", "azure/text-embedding-ada-002", "embedding", True), + ("azure", "azure_text/gpt-3.5-turbo-instruct", "completion", True), + ], +) +@pytest.mark.asyncio +async def test_exception_with_headers(sync_mode, provider, model, call_type, streaming): + """ + User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds" + but Azure says to retry in at most 9s + + ``` + {"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"} + ``` + """ + print(f"Received args: {locals()}") + import openai + + if sync_mode: + if provider == "openai": + openai_client = openai.OpenAI(api_key="") + elif provider == "azure": + openai_client = openai.AzureOpenAI( + api_key="", base_url="", api_version=litellm.AZURE_DEFAULT_API_VERSION + ) + else: + if provider == "openai": + openai_client = openai.AsyncOpenAI(api_key="") + elif provider == "azure": + openai_client = openai.AsyncAzureOpenAI( + api_key="", base_url="", api_version=litellm.AZURE_DEFAULT_API_VERSION + ) + + data = {"model": model} + data, original_function, mapped_target = _pre_call_utils( + call_type=call_type, + data=data, + client=openai_client, + sync_mode=sync_mode, + streaming=streaming, + ) + + cooldown_time = 30.0 + + def _return_exception(*args, **kwargs): + import datetime + + from httpx import Headers, Request, Response + + kwargs = { + "request": Request("POST", "https://www.google.com"), + "message": "Error code: 429 - Rate Limit Error!", + "body": {"detail": "Rate Limit Error!"}, + "code": None, + "param": None, + "type": None, + "response": Response( + status_code=429, + headers=Headers( + { + "date": "Sat, 21 Sep 2024 22:56:53 GMT", + "server": "uvicorn", + "retry-after": "30", + "content-length": "30", + "content-type": "application/json", + } + ), + request=Request("POST", "http://0.0.0.0:9000/chat/completions"), + ), + "status_code": 429, + "request_id": None, + } + + exception = Exception() + for k, v in kwargs.items(): + setattr(exception, k, v) + raise exception + + with patch.object( + mapped_target, + "create", + side_effect=_return_exception, + ): + new_retry_after_mock_client = MagicMock(return_value=-1) + + litellm.utils._get_retry_after_from_exception_header = ( + new_retry_after_mock_client + ) + + exception_raised = False + try: + if sync_mode: + resp = original_function(**data, client=openai_client) + if streaming: + for chunk in resp: + continue + else: + resp = await original_function(**data, client=openai_client) + + if streaming: + async for chunk in resp: + continue + + except litellm.RateLimitError as e: + exception_raised = True + assert e.litellm_response_headers is not None + assert int(e.litellm_response_headers["retry-after"]) == cooldown_time + + if exception_raised is False: + print(resp) + assert exception_raised + + +def test_openai_gateway_timeout_error(): + """ + Test that the OpenAI gateway timeout error is raised + """ + openai_client = OpenAI() + mapped_target = openai_client.chat.completions.with_raw_response # type: ignore + def _return_exception(*args, **kwargs): + import datetime + + from httpx import Headers, Request, Response + + kwargs = { + "request": Request("POST", "https://www.google.com"), + "message": "Error code: 504 - Gateway Timeout Error!", + "body": {"detail": "Gateway Timeout Error!"}, + "code": None, + "param": None, + "type": None, + "response": Response( + status_code=504, + headers=Headers( + { + "date": "Sat, 21 Sep 2024 22:56:53 GMT", + "server": "uvicorn", + "content-length": "30", + "content-type": "application/json", + } + ), + request=Request("POST", "http://0.0.0.0:9000/chat/completions"), + ), + "status_code": 504, + "request_id": None, + } + + exception = Exception() + for k, v in kwargs.items(): + setattr(exception, k, v) + raise exception + + try: + with patch.object( + mapped_target, + "create", + side_effect=_return_exception, + ): + litellm.completion(model="openai/gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}], client=openai_client) + pytest.fail("Expected to raise Timeout") + except litellm.Timeout as e: + assert e.status_code == 504 + + +@pytest.mark.parametrize( + "sync_mode", + [True, False], +) +@pytest.mark.parametrize("streaming", [True, False]) +@pytest.mark.parametrize( + "provider, model, call_type", + [ + ("anthropic", "claude-3-haiku-20240307", "chat_completion"), + ], +) +@pytest.mark.asyncio +async def test_exception_with_headers_httpx( + sync_mode, provider, model, call_type, streaming +): + """ + User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds" + but Azure says to retry in at most 9s + + ``` + {"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"} + ``` + """ + print(f"Received args: {locals()}") + import openai + + if sync_mode: + client = HTTPHandler() + else: + client = AsyncHTTPHandler() + + data = {"model": model} + data, original_function, mapped_target = _pre_call_utils_httpx( + call_type=call_type, + data=data, + client=client, + sync_mode=sync_mode, + streaming=streaming, + ) + + cooldown_time = 30.0 + + def _return_exception(*args, **kwargs): + import datetime + + from httpx import Headers, HTTPStatusError, Request, Response + + # Create the Request object + request = Request("POST", "http://0.0.0.0:9000/chat/completions") + + # Create the Response object with the necessary headers and status code + response = Response( + status_code=429, + headers=Headers( + { + "date": "Sat, 21 Sep 2024 22:56:53 GMT", + "server": "uvicorn", + "retry-after": "30", + "content-length": "30", + "content-type": "application/json", + } + ), + request=request, + ) + + # Create and raise the HTTPStatusError exception + raise HTTPStatusError( + message="Error code: 429 - Rate Limit Error!", + request=request, + response=response, + ) + + with patch.object( + mapped_target, + "send", + side_effect=_return_exception, + ): + new_retry_after_mock_client = MagicMock(return_value=-1) + + litellm.utils._get_retry_after_from_exception_header = ( + new_retry_after_mock_client + ) + + exception_raised = False + try: + if sync_mode: + resp = original_function(**data, client=client) + if streaming: + for chunk in resp: + continue + else: + resp = await original_function(**data, client=client) + + if streaming: + async for chunk in resp: + continue + + except litellm.RateLimitError as e: + exception_raised = True + assert ( + e.litellm_response_headers is not None + ), "litellm_response_headers is None" + print("e.litellm_response_headers", e.litellm_response_headers) + assert int(e.litellm_response_headers["retry-after"]) == cooldown_time + + if exception_raised is False: + print(resp) + assert exception_raised + + +@pytest.mark.asyncio +@pytest.mark.parametrize("model", ["azure/chatgpt-v-3", "openai/gpt-3.5-turbo"]) +async def test_bad_request_error_contains_httpx_response(model): + """ + Test that the BadRequestError contains the httpx response + + Relevant issue: https://github.com/BerriAI/litellm/issues/6732 + """ + try: + await litellm.acompletion( + model=model, + messages=[{"role": "user", "content": "Hello world"}], + bad_arg="bad_arg", + ) + pytest.fail("Expected to raise BadRequestError") + except litellm.BadRequestError as e: + print("e.response", e.response) + print("vars(e.response)", vars(e.response)) + assert e.response is not None + + +def test_exceptions_base_class(): + try: + raise litellm.RateLimitError( + message="BedrockException: Rate Limit Error", + model="model", + llm_provider="bedrock", + ) + except litellm.RateLimitError as e: + assert isinstance(e, litellm.RateLimitError) + assert e.code == "429" + assert e.type == "throttling_error" + + +def test_context_window_exceeded_error_from_litellm_proxy(): + from httpx import Response + from litellm.litellm_core_utils.exception_mapping_utils import ( + extract_and_raise_litellm_exception, + ) + + args = { + "response": Response(status_code=400, text="Bad Request"), + "error_str": "Error code: 400 - {'error': {'message': \"litellm.ContextWindowExceededError: litellm.BadRequestError: this is a mock context window exceeded error\\nmodel=gpt-3.5-turbo. context_window_fallbacks=None. fallbacks=None.\\n\\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks\\nReceived Model Group=gpt-3.5-turbo\\nAvailable Model Group Fallbacks=None\", 'type': None, 'param': None, 'code': '400'}}", + "model": "gpt-3.5-turbo", + "custom_llm_provider": "litellm_proxy", + } + with pytest.raises(litellm.ContextWindowExceededError): + extract_and_raise_litellm_exception(**args) + + +@pytest.mark.parametrize("sync_mode", [True, False]) +@pytest.mark.parametrize("stream_mode", [True, False]) +@pytest.mark.parametrize("model", ["azure/gpt-4o-new-test"]) # "gpt-4o-mini", +@pytest.mark.asyncio +async def test_exception_bubbling_up(sync_mode, stream_mode, model): + """ + make sure code, param, and type are bubbled up + """ + import litellm + + litellm.set_verbose = True + with pytest.raises(Exception) as exc_info: + if sync_mode: + litellm.completion( + model=model, + messages=[{"role": "usera", "content": "hi"}], + stream=stream_mode, + sync_stream=sync_mode, + ) + else: + await litellm.acompletion( + model=model, + messages=[{"role": "usera", "content": "hi"}], + stream=stream_mode, + sync_stream=sync_mode, + ) + + assert exc_info.value.code == "invalid_value" + assert exc_info.value.param is not None + assert exc_info.value.type == "invalid_request_error" + + + From ce35b1cc80f9e7b329456fc8f6655e16717739b5 Mon Sep 17 00:00:00 2001 From: Cole McIntosh Date: Sat, 7 Jun 2025 10:50:42 -0600 Subject: [PATCH 05/11] Remove tests that require an API key --- .../litellm_core_utils/test_exceptions.py | 54 +------------------ 1 file changed, 2 insertions(+), 52 deletions(-) diff --git a/tests/test_litellm/litellm_core_utils/test_exceptions.py b/tests/test_litellm/litellm_core_utils/test_exceptions.py index 6851325cc1fc..734083649611 100644 --- a/tests/test_litellm/litellm_core_utils/test_exceptions.py +++ b/tests/test_litellm/litellm_core_utils/test_exceptions.py @@ -252,30 +252,7 @@ def test_invalid_request_error(model): completion(model=model, messages=messages, max_tokens="hello world") -def test_completion_azure_exception(): - try: - import openai - - print("azure gpt-3.5 test\n\n") - litellm.set_verbose = True - ## Test azure call - old_azure_key = os.environ["AZURE_API_KEY"] - os.environ["AZURE_API_KEY"] = "good morning" - response = completion( - model="azure/chatgpt-v-3", - messages=[{"role": "user", "content": "hello"}], - ) - os.environ["AZURE_API_KEY"] = old_azure_key - print(f"response: {response}") - print(response) - except openai.AuthenticationError as e: - os.environ["AZURE_API_KEY"] = old_azure_key - print("good job got the correct error for azure when key not set") - except Exception as e: - pytest.fail(f"Error occurred: {e}") - -# test_completion_azure_exception() def test_azure_embedding_exceptions(): @@ -414,31 +391,7 @@ def test_completion_openai_exception(): # test_completion_openai_exception() -def test_anthropic_openai_exception(): - # test if anthropic raises litellm.AuthenticationError - try: - litellm.set_verbose = True - ## Test azure call - old_azure_key = os.environ["ANTHROPIC_API_KEY"] - os.environ.pop("ANTHROPIC_API_KEY") - response = completion( - model="anthropic/claude-3-sonnet-20240229", - messages=[{"role": "user", "content": "hello"}], - ) - print(f"response: {response}") - print(response) - except litellm.AuthenticationError as e: - os.environ["ANTHROPIC_API_KEY"] = old_azure_key - print("Exception vars=", vars(e)) - assert ( - "Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params" - in e.message - ) - print( - "ANTHROPIC_API_KEY: good job got the correct error for ANTHROPIC_API_KEY when key not set" - ) - except Exception as e: - pytest.fail(f"Error occurred: {e}") + def test_completion_mistral_exception(): @@ -1376,7 +1329,4 @@ async def test_exception_bubbling_up(sync_mode, stream_mode, model): assert exc_info.value.code == "invalid_value" assert exc_info.value.param is not None - assert exc_info.value.type == "invalid_request_error" - - - + assert exc_info.value.type == "invalid_request_error" \ No newline at end of file From 9c0cfe6c324594d1e8f618094267c79ea65a2a71 Mon Sep 17 00:00:00 2001 From: Cole McIntosh Date: Sat, 7 Jun 2025 10:59:50 -0600 Subject: [PATCH 06/11] Remove commented-out test for Mistral exception handling in `test_exceptions.py` to clean up the codebase. --- .../litellm_core_utils/test_exceptions.py | 29 ------------------- 1 file changed, 29 deletions(-) diff --git a/tests/test_litellm/litellm_core_utils/test_exceptions.py b/tests/test_litellm/litellm_core_utils/test_exceptions.py index 734083649611..53e08842b280 100644 --- a/tests/test_litellm/litellm_core_utils/test_exceptions.py +++ b/tests/test_litellm/litellm_core_utils/test_exceptions.py @@ -391,35 +391,6 @@ def test_completion_openai_exception(): # test_completion_openai_exception() - - - -def test_completion_mistral_exception(): - # test if mistral/mistral-tiny raises openai.AuthenticationError - try: - import openai - - print("Testing mistral ai exception mapping") - litellm.set_verbose = True - ## Test azure call - old_azure_key = os.environ["MISTRAL_API_KEY"] - os.environ["MISTRAL_API_KEY"] = "good morning" - response = completion( - model="mistral/mistral-tiny", - messages=[{"role": "user", "content": "hello"}], - ) - print(f"response: {response}") - print(response) - except openai.AuthenticationError as e: - os.environ["MISTRAL_API_KEY"] = old_azure_key - print("good job got the correct error for openai when key not set") - except Exception as e: - pytest.fail(f"Error occurred: {e}") - - -# test_completion_mistral_exception() - - def test_completion_bedrock_invalid_role_exception(): """ Test if litellm raises a BadRequestError for an invalid role on Bedrock From 8e9943e5a6ba83695e031693ac494c73a17ee410 Mon Sep 17 00:00:00 2001 From: Cole McIntosh Date: Sat, 7 Jun 2025 11:26:13 -0600 Subject: [PATCH 07/11] Remove commented-out tests and redundant exception handling in `test_exceptions.py` to streamline the codebase and improve readability. --- .../litellm_core_utils/test_exceptions.py | 468 ------------------ 1 file changed, 468 deletions(-) diff --git a/tests/test_litellm/litellm_core_utils/test_exceptions.py b/tests/test_litellm/litellm_core_utils/test_exceptions.py index 53e08842b280..5c0713eed0a0 100644 --- a/tests/test_litellm/litellm_core_utils/test_exceptions.py +++ b/tests/test_litellm/litellm_core_utils/test_exceptions.py @@ -252,450 +252,6 @@ def test_invalid_request_error(model): completion(model=model, messages=messages, max_tokens="hello world") - - - -def test_azure_embedding_exceptions(): - try: - - response = litellm.embedding( - model="azure/azure-embedding-model", - input="hello", - mock_response="error", - ) - pytest.fail(f"Bad request this should have failed but got {response}") - - except Exception as e: - print(vars(e)) - # CRUCIAL Test - Ensures our exceptions are readable and not overly complicated. some users have complained exceptions will randomly have another exception raised in our exception mapping - assert str(e) == "Mock error" - - -async def asynctest_completion_azure_exception(): - try: - import openai - - import litellm - - print("azure gpt-3.5 test\n\n") - litellm.set_verbose = True - ## Test azure call - old_azure_key = os.environ["AZURE_API_KEY"] - os.environ["AZURE_API_KEY"] = "good morning" - response = await litellm.acompletion( - model="azure/chatgpt-v-3", - messages=[{"role": "user", "content": "hello"}], - ) - print(f"response: {response}") - print(response) - except openai.AuthenticationError as e: - os.environ["AZURE_API_KEY"] = old_azure_key - print("good job got the correct error for azure when key not set") - print(e) - except Exception as e: - print("Got wrong exception") - print("exception", e) - pytest.fail(f"Error occurred: {e}") - - -# import asyncio -# asyncio.run( -# asynctest_completion_azure_exception() -# ) - - -def asynctest_completion_openai_exception_bad_model(): - try: - import asyncio - - import openai - - import litellm - - print("azure exception bad model\n\n") - litellm.set_verbose = True - - ## Test azure call - async def test(): - response = await litellm.acompletion( - model="openai/gpt-6", - messages=[{"role": "user", "content": "hello"}], - ) - - asyncio.run(test()) - except openai.NotFoundError: - print("Good job this is a NotFoundError for a model that does not exist!") - print("Passed") - except Exception as e: - print("Raised wrong type of exception", type(e)) - assert isinstance(e, openai.BadRequestError) - pytest.fail(f"Error occurred: {e}") - - -# asynctest_completion_openai_exception_bad_model() - - -def asynctest_completion_azure_exception_bad_model(): - try: - import asyncio - - import openai - - import litellm - - print("azure exception bad model\n\n") - litellm.set_verbose = True - - ## Test azure call - async def test(): - response = await litellm.acompletion( - model="azure/gpt-12", - messages=[{"role": "user", "content": "hello"}], - ) - - asyncio.run(test()) - except openai.NotFoundError: - print("Good job this is a NotFoundError for a model that does not exist!") - print("Passed") - except Exception as e: - print("Raised wrong type of exception", type(e)) - pytest.fail(f"Error occurred: {e}") - - -# asynctest_completion_azure_exception_bad_model() - - -def test_completion_openai_exception(): - # test if openai:gpt raises openai.AuthenticationError - try: - import openai - - print("openai gpt-3.5 test\n\n") - litellm.set_verbose = True - ## Test azure call - old_azure_key = os.environ["OPENAI_API_KEY"] - os.environ["OPENAI_API_KEY"] = "good morning" - response = completion( - model="gpt-4", - messages=[{"role": "user", "content": "hello"}], - ) - print(f"response: {response}") - print(response) - except openai.AuthenticationError as e: - os.environ["OPENAI_API_KEY"] = old_azure_key - print("OpenAI: good job got the correct error for openai when key not set") - except Exception as e: - pytest.fail(f"Error occurred: {e}") - - -# test_completion_openai_exception() - - -def test_completion_bedrock_invalid_role_exception(): - """ - Test if litellm raises a BadRequestError for an invalid role on Bedrock - """ - try: - litellm.set_verbose = True - response = completion( - model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0", - messages=[{"role": "very-bad-role", "content": "hello"}], - ) - print(f"response: {response}") - print(response) - - except Exception as e: - assert isinstance( - e, litellm.BadRequestError - ), "Expected BadRequestError but got {}".format(type(e)) - print("str(e) = {}".format(str(e))) - - # This is important - We we previously returning a poorly formatted error string. Which was - # litellm.BadRequestError: litellm.BadRequestError: Invalid Message passed in {'role': 'very-bad-role', 'content': 'hello'} - - # IMPORTANT ASSERTION - assert ( - (str(e)) - == "litellm.BadRequestError: Invalid Message passed in {'role': 'very-bad-role', 'content': 'hello'}" - ) - -@pytest.mark.skip(reason="OpenAI exception changed to a generic error") -def test_content_policy_exceptionimage_generation_openai(): - try: - # this is ony a test - we needed some way to invoke the exception :( - litellm._turn_on_debug() - response = litellm.image_generation( - prompt="where do i buy lethal drugs from", model="dall-e-3" - ) - print(f"response: {response}") - assert len(response.data) > 0 - except litellm.ContentPolicyViolationError as e: - print("caught a content policy violation error! Passed") - pass - except Exception as e: - pytest.fail(f"An exception occurred - {str(e)}") - - -# test_content_policy_exceptionimage_generation_openai() - - -def test_content_policy_violation_error_streaming(): - """ - Production Test. - """ - litellm.set_verbose = False - print("test_async_completion with stream") - - async def test_get_response(): - try: - response = await litellm.acompletion( - model="azure/chatgpt-v-3", - messages=[{"role": "user", "content": "say 1"}], - temperature=0, - top_p=1, - stream=True, - max_tokens=512, - presence_penalty=0, - frequency_penalty=0, - ) - print(f"response: {response}") - - num_finish_reason = 0 - async for chunk in response: - print(chunk) - if chunk["choices"][0].get("finish_reason") is not None: - num_finish_reason += 1 - print("finish_reason", chunk["choices"][0].get("finish_reason")) - - assert ( - num_finish_reason == 1 - ), f"expected only one finish reason. Got {num_finish_reason}" - except Exception as e: - pytest.fail(f"GOT exception for gpt-3.5 instruct In streaming{e}") - - asyncio.run(test_get_response()) - - async def test_get_error(): - try: - response = await litellm.acompletion( - model="azure/chatgpt-v-3", - messages=[ - {"role": "user", "content": "where do i buy lethal drugs from"} - ], - temperature=0, - top_p=1, - stream=True, - max_tokens=512, - presence_penalty=0, - frequency_penalty=0, - mock_response="Exception: content_filter_policy", - ) - print(f"response: {response}") - - num_finish_reason = 0 - async for chunk in response: - print(chunk) - if chunk["choices"][0].get("finish_reason") is not None: - num_finish_reason += 1 - print("finish_reason", chunk["choices"][0].get("finish_reason")) - - pytest.fail(f"Expected to return 400 error In streaming{e}") - except Exception as e: - pass - - asyncio.run(test_get_error()) - - -def test_completion_perplexity_exception_on_openai_client(): - try: - import openai - - print("perplexity test\n\n") - litellm.set_verbose = False - ## Test azure call - old_azure_key = os.environ["PERPLEXITYAI_API_KEY"] - - # delete perplexityai api key to simulate bad api key - del os.environ["PERPLEXITYAI_API_KEY"] - - # temporaily delete openai api key - original_openai_key = os.environ["OPENAI_API_KEY"] - del os.environ["OPENAI_API_KEY"] - - response = completion( - model="perplexity/mistral-7b-instruct", - messages=[{"role": "user", "content": "hello"}], - ) - os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key - os.environ["OPENAI_API_KEY"] = original_openai_key - pytest.fail("Request should have failed - bad api key") - except openai.AuthenticationError as e: - os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key - os.environ["OPENAI_API_KEY"] = original_openai_key - print("exception: ", e) - assert ( - "The api_key client option must be set either by passing api_key to the client or by setting the PERPLEXITY_API_KEY environment variable" - in str(e) - ) - except Exception as e: - pytest.fail(f"Error occurred: {e}") - - -# test_completion_perplexity_exception_on_openai_client() - - -def test_completion_perplexity_exception(): - try: - import openai - - print("perplexity test\n\n") - litellm.set_verbose = True - ## Test azure call - old_azure_key = os.environ["PERPLEXITYAI_API_KEY"] - os.environ["PERPLEXITYAI_API_KEY"] = "good morning" - response = completion( - model="perplexity/mistral-7b-instruct", - messages=[{"role": "user", "content": "hello"}], - ) - os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key - pytest.fail("Request should have failed - bad api key") - except openai.AuthenticationError as e: - os.environ["PERPLEXITYAI_API_KEY"] = old_azure_key - print("exception: ", e) - assert "PerplexityException" in str(e) - except Exception as e: - pytest.fail(f"Error occurred: {e}") - - -def test_completion_openai_api_key_exception(): - try: - import openai - - print("gpt-3.5 test\n\n") - litellm.set_verbose = True - ## Test azure call - old_azure_key = os.environ["OPENAI_API_KEY"] - os.environ["OPENAI_API_KEY"] = "good morning" - response = completion( - model="gpt-3.5-turbo", - messages=[{"role": "user", "content": "hello"}], - ) - os.environ["OPENAI_API_KEY"] = old_azure_key - pytest.fail("Request should have failed - bad api key") - except openai.AuthenticationError as e: - os.environ["OPENAI_API_KEY"] = old_azure_key - print("exception: ", e) - assert "OpenAIException" in str(e) - except Exception as e: - pytest.fail(f"Error occurred: {e}") - - -# tesy_async_acompletion() - - -def test_router_completion_vertex_exception(): - try: - import litellm - - litellm.set_verbose = True - router = litellm.Router( - model_list=[ - { - "model_name": "vertex-gemini-pro", - "litellm_params": { - "model": "vertex_ai/gemini-pro", - "api_key": "good-morning", - }, - }, - ] - ) - response = router.completion( - model="vertex-gemini-pro", - messages=[{"role": "user", "content": "hello"}], - vertex_project="bad-project", - ) - pytest.fail("Request should have failed - bad api key") - except Exception as e: - print("exception: ", e) - - -def test_litellm_completion_vertex_exception(): - try: - import litellm - - litellm.set_verbose = True - response = completion( - model="vertex_ai/gemini-pro", - api_key="good-morning", - messages=[{"role": "user", "content": "hello"}], - vertex_project="bad-project", - ) - pytest.fail("Request should have failed - bad api key") - except Exception as e: - print("exception: ", e) - - -def test_litellm_predibase_exception(): - """ - Test - Assert that the Predibase API Key is not returned on Authentication Errors - """ - try: - import litellm - - litellm.set_verbose = True - response = completion( - model="predibase/llama-3-8b-instruct", - messages=[{"role": "user", "content": "What is the meaning of life?"}], - tenant_id="c4768f95", - api_key="hf-rawapikey", - ) - pytest.fail("Request should have failed - bad api key") - except Exception as e: - assert "hf-rawapikey" not in str(e) - print("exception: ", e) - - -# # test_invalid_request_error(model="command-nightly") -# # Test 3: Rate Limit Errors -# def test_model_call(model): -# try: -# sample_text = "how does a court case get to the Supreme Court?" -# messages = [{ "content": sample_text,"role": "user"}] -# print(f"model: {model}") -# response = completion(model=model, messages=messages) -# except RateLimitError as e: -# print(f"headers: {e.response.headers}") -# return True -# # except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server -# # return True -# except Exception as e: -# print(f"Uncaught Exception {model}: {type(e).__name__} - {e}") -# traceback.print_exc() -# pass -# return False -# # Repeat each model 500 times -# # extended_models = [model for model in models for _ in range(250)] -# extended_models = ["azure/chatgpt-v-3" for _ in range(250)] - -# def worker(model): -# return test_model_call(model) - -# # Create a dictionary to store the results -# counts = {True: 0, False: 0} - -# # Use Thread Pool Executor -# with ThreadPoolExecutor(max_workers=500) as executor: -# # Use map to start the operation in thread pool -# results = executor.map(worker, extended_models) - -# # Iterate over results and count True/False -# for result in results: -# counts[result] += 1 - -# accuracy_score = counts[True]/(counts[True] + counts[False]) -# print(f"accuracy_score: {accuracy_score}") - - @pytest.mark.parametrize( "provider", ["predibase", "vertex_ai_beta", "anthropic", "databricks", "watsonx", "fireworks_ai"] ) @@ -835,30 +391,6 @@ def test_fireworks_ai_exception_mapping(): assert not ExceptionCheckers.is_error_str_rate_limit(42) # type: ignore -def test_anthropic_tool_calling_exception(): - """ - Related - https://github.com/BerriAI/litellm/issues/4348 - """ - tools = [ - { - "type": "function", - "function": { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": {}, - }, - } - ] - try: - litellm.completion( - model="claude-3-5-sonnet-20240620", - messages=[{"role": "user", "content": "Hey, how's it going?"}], - tools=tools, - ) - except litellm.BadRequestError: - pass - - from typing import Optional, Union from openai import AsyncOpenAI, OpenAI From 5585a8c556a8c479da7ddcb14b9517adadb4058b Mon Sep 17 00:00:00 2001 From: Cole McIntosh Date: Sat, 7 Jun 2025 11:33:40 -0600 Subject: [PATCH 08/11] Refactor `test_exceptions.py` by removing non-mock tests --- .../litellm_core_utils/test_exceptions.py | 190 +----------------- 1 file changed, 2 insertions(+), 188 deletions(-) diff --git a/tests/test_litellm/litellm_core_utils/test_exceptions.py b/tests/test_litellm/litellm_core_utils/test_exceptions.py index 5c0713eed0a0..d19a480faed1 100644 --- a/tests/test_litellm/litellm_core_utils/test_exceptions.py +++ b/tests/test_litellm/litellm_core_utils/test_exceptions.py @@ -68,28 +68,7 @@ async def test_content_policy_exception_azure(): pytest.fail(f"An exception occurred - {str(e)}") -@pytest.mark.asyncio -async def test_content_policy_exception_openai(): - try: - # this is ony a test - we needed some way to invoke the exception :( - litellm.set_verbose = True - response = await litellm.acompletion( - model="gpt-3.5-turbo", - stream=True, - messages=[ - {"role": "user", "content": "Gimme the lyrics to Don't Stop Me Now"} - ], - ) - async for chunk in response: - print(chunk) - except litellm.ContentPolicyViolationError as e: - print("caught a content policy violation error! Passed") - print("exception", e) - assert e.llm_provider == "openai" - pass - except Exception as e: - print() - pytest.fail(f"An exception occurred - {str(e)}") + # Test 1: Context Window Errors @@ -115,11 +94,8 @@ def test_context_window(model): pytest.fail(f"An error occcurred - {e}") -models = ["command-nightly"] - - @pytest.mark.skip(reason="duplicate test.") -@pytest.mark.parametrize("model", models) +@pytest.mark.parametrize("model", ["command-nightly"]) def test_context_window_with_fallbacks(model): ctx_window_fallback_dict = { "command-nightly": "claude-2.1", @@ -141,117 +117,6 @@ def test_context_window_with_fallbacks(model): pass -# for model in litellm.models_by_provider["bedrock"]: -# test_context_window(model=model) -# test_context_window(model="chat-bison") -# test_context_window_with_fallbacks(model="command-nightly") -# Test 2: InvalidAuth Errors -@pytest.mark.parametrize("model", models) -def invalid_auth(model): # set the model key to an invalid key, depending on the model - messages = [{"content": "Hello, how are you?", "role": "user"}] - temporary_key = None - try: - if model == "gpt-3.5-turbo" or model == "gpt-3.5-turbo-instruct": - temporary_key = os.environ["OPENAI_API_KEY"] - os.environ["OPENAI_API_KEY"] = "bad-key" - elif "bedrock" in model: - temporary_aws_access_key = os.environ["AWS_ACCESS_KEY_ID"] - os.environ["AWS_ACCESS_KEY_ID"] = "bad-key" - temporary_aws_region_name = os.environ["AWS_REGION_NAME"] - os.environ["AWS_REGION_NAME"] = "bad-key" - temporary_secret_key = os.environ["AWS_SECRET_ACCESS_KEY"] - os.environ["AWS_SECRET_ACCESS_KEY"] = "bad-key" - elif model == "azure/chatgpt-v-3": - temporary_key = os.environ["AZURE_API_KEY"] - os.environ["AZURE_API_KEY"] = "bad-key" - elif model == "claude-3-5-haiku-20241022": - temporary_key = os.environ["ANTHROPIC_API_KEY"] - os.environ["ANTHROPIC_API_KEY"] = "bad-key" - elif model == "command-nightly": - temporary_key = os.environ["COHERE_API_KEY"] - os.environ["COHERE_API_KEY"] = "bad-key" - elif "j2" in model: - temporary_key = os.environ["AI21_API_KEY"] - os.environ["AI21_API_KEY"] = "bad-key" - elif "togethercomputer" in model: - temporary_key = os.environ["TOGETHERAI_API_KEY"] - os.environ["TOGETHERAI_API_KEY"] = ( - "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a" - ) - elif model in litellm.openrouter_models: - temporary_key = os.environ["OPENROUTER_API_KEY"] - os.environ["OPENROUTER_API_KEY"] = "bad-key" - elif model in litellm.aleph_alpha_models: - temporary_key = os.environ["ALEPH_ALPHA_API_KEY"] - os.environ["ALEPH_ALPHA_API_KEY"] = "bad-key" - elif model in litellm.nlp_cloud_models: - temporary_key = os.environ["NLP_CLOUD_API_KEY"] - os.environ["NLP_CLOUD_API_KEY"] = "bad-key" - elif ( - model - == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" - ): - temporary_key = os.environ["REPLICATE_API_KEY"] - os.environ["REPLICATE_API_KEY"] = "bad-key" - print(f"model: {model}") - response = completion(model=model, messages=messages) - print(f"response: {response}") - except AuthenticationError as e: - print(f"AuthenticationError Caught Exception - {str(e)}") - except ( - OpenAIError - ) as e: # is at least an openai error -> in case of random model errors - e.g. overloaded server - print(f"OpenAIError Caught Exception - {e}") - except Exception as e: - print(type(e)) - print(type(AuthenticationError)) - print(e.__class__.__name__) - print(f"Uncaught Exception - {e}") - pytest.fail(f"Error occurred: {e}") - if temporary_key != None: # reset the key - if model == "gpt-3.5-turbo": - os.environ["OPENAI_API_KEY"] = temporary_key - elif model == "chatgpt-test": - os.environ["AZURE_API_KEY"] = temporary_key - azure = True - elif model == "claude-3-5-haiku-20241022": - os.environ["ANTHROPIC_API_KEY"] = temporary_key - elif model == "command-nightly": - os.environ["COHERE_API_KEY"] = temporary_key - elif ( - model - == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" - ): - os.environ["REPLICATE_API_KEY"] = temporary_key - elif "j2" in model: - os.environ["AI21_API_KEY"] = temporary_key - elif "togethercomputer" in model: - os.environ["TOGETHERAI_API_KEY"] = temporary_key - elif model in litellm.aleph_alpha_models: - os.environ["ALEPH_ALPHA_API_KEY"] = temporary_key - elif model in litellm.nlp_cloud_models: - os.environ["NLP_CLOUD_API_KEY"] = temporary_key - elif "bedrock" in model: - os.environ["AWS_ACCESS_KEY_ID"] = temporary_aws_access_key - os.environ["AWS_REGION_NAME"] = temporary_aws_region_name - os.environ["AWS_SECRET_ACCESS_KEY"] = temporary_secret_key - return - - -# for model in litellm.models_by_provider["bedrock"]: -# invalid_auth(model=model) -# invalid_auth(model="command-nightly") - - -# Test 3: Invalid Request Error -@pytest.mark.parametrize("model", models) -def test_invalid_request_error(model): - messages = [{"content": "hey, how's it going?", "role": "user"}] - - with pytest.raises(BadRequestError): - completion(model=model, messages=messages, max_tokens="hello world") - - @pytest.mark.parametrize( "provider", ["predibase", "vertex_ai_beta", "anthropic", "databricks", "watsonx", "fireworks_ai"] ) @@ -753,27 +618,6 @@ def _return_exception(*args, **kwargs): assert exception_raised -@pytest.mark.asyncio -@pytest.mark.parametrize("model", ["azure/chatgpt-v-3", "openai/gpt-3.5-turbo"]) -async def test_bad_request_error_contains_httpx_response(model): - """ - Test that the BadRequestError contains the httpx response - - Relevant issue: https://github.com/BerriAI/litellm/issues/6732 - """ - try: - await litellm.acompletion( - model=model, - messages=[{"role": "user", "content": "Hello world"}], - bad_arg="bad_arg", - ) - pytest.fail("Expected to raise BadRequestError") - except litellm.BadRequestError as e: - print("e.response", e.response) - print("vars(e.response)", vars(e.response)) - assert e.response is not None - - def test_exceptions_base_class(): try: raise litellm.RateLimitError( @@ -803,33 +647,3 @@ def test_context_window_exceeded_error_from_litellm_proxy(): extract_and_raise_litellm_exception(**args) -@pytest.mark.parametrize("sync_mode", [True, False]) -@pytest.mark.parametrize("stream_mode", [True, False]) -@pytest.mark.parametrize("model", ["azure/gpt-4o-new-test"]) # "gpt-4o-mini", -@pytest.mark.asyncio -async def test_exception_bubbling_up(sync_mode, stream_mode, model): - """ - make sure code, param, and type are bubbled up - """ - import litellm - - litellm.set_verbose = True - with pytest.raises(Exception) as exc_info: - if sync_mode: - litellm.completion( - model=model, - messages=[{"role": "usera", "content": "hi"}], - stream=stream_mode, - sync_stream=sync_mode, - ) - else: - await litellm.acompletion( - model=model, - messages=[{"role": "usera", "content": "hi"}], - stream=stream_mode, - sync_stream=sync_mode, - ) - - assert exc_info.value.code == "invalid_value" - assert exc_info.value.param is not None - assert exc_info.value.type == "invalid_request_error" \ No newline at end of file From 3a08c238230745f7d195c5e3f1132fe2b8d05883 Mon Sep 17 00:00:00 2001 From: Cole McIntosh Date: Sat, 7 Jun 2025 11:41:35 -0600 Subject: [PATCH 09/11] Refactor `test_exceptions.py` by removing outdated tests and comments to enhance clarity and maintainability. --- .../litellm_core_utils/test_exceptions.py | 120 ------------------ 1 file changed, 120 deletions(-) diff --git a/tests/test_litellm/litellm_core_utils/test_exceptions.py b/tests/test_litellm/litellm_core_utils/test_exceptions.py index d19a480faed1..b5cf354a4c59 100644 --- a/tests/test_litellm/litellm_core_utils/test_exceptions.py +++ b/tests/test_litellm/litellm_core_utils/test_exceptions.py @@ -24,26 +24,6 @@ embedding, ) -litellm.vertex_project = "pathrise-convert-1606954137718" -litellm.vertex_location = "us-central1" -litellm.num_retries = 0 - -# litellm.failure_callback = ["sentry"] -#### What this tests #### -# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type - - -# 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate - -# 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.) - -# Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered - -exception_models = [ - "sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4", - "bedrock/anthropic.claude-instant-v1", -] - @pytest.mark.asyncio async def test_content_policy_exception_azure(): @@ -68,55 +48,6 @@ async def test_content_policy_exception_azure(): pytest.fail(f"An exception occurred - {str(e)}") - - - -# Test 1: Context Window Errors -@pytest.mark.skip(reason="AWS Suspended Account") -@pytest.mark.parametrize("model", exception_models) -def test_context_window(model): - print("Testing context window error") - sample_text = "Say error 50 times" * 1000000 - messages = [{"content": sample_text, "role": "user"}] - try: - litellm.set_verbose = False - print("Testing model=", model) - response = completion(model=model, messages=messages) - print(f"response: {response}") - print("FAILED!") - pytest.fail(f"An exception occurred") - except ContextWindowExceededError as e: - print(f"Worked!") - except RateLimitError: - print("RateLimited!") - except Exception as e: - print(f"{e}") - pytest.fail(f"An error occcurred - {e}") - - -@pytest.mark.skip(reason="duplicate test.") -@pytest.mark.parametrize("model", ["command-nightly"]) -def test_context_window_with_fallbacks(model): - ctx_window_fallback_dict = { - "command-nightly": "claude-2.1", - "gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k", - "azure/chatgpt-v-3": "gpt-3.5-turbo-16k", - } - sample_text = "how does a court case get to the Supreme Court?" * 1000 - messages = [{"content": sample_text, "role": "user"}] - - try: - completion( - model=model, - messages=messages, - context_window_fallback_dict=ctx_window_fallback_dict, - ) - except litellm.ServiceUnavailableError as e: - pass - except litellm.APIConnectionError as e: - pass - - @pytest.mark.parametrize( "provider", ["predibase", "vertex_ai_beta", "anthropic", "databricks", "watsonx", "fireworks_ai"] ) @@ -457,57 +388,6 @@ def _return_exception(*args, **kwargs): assert exception_raised -def test_openai_gateway_timeout_error(): - """ - Test that the OpenAI gateway timeout error is raised - """ - openai_client = OpenAI() - mapped_target = openai_client.chat.completions.with_raw_response # type: ignore - def _return_exception(*args, **kwargs): - import datetime - - from httpx import Headers, Request, Response - - kwargs = { - "request": Request("POST", "https://www.google.com"), - "message": "Error code: 504 - Gateway Timeout Error!", - "body": {"detail": "Gateway Timeout Error!"}, - "code": None, - "param": None, - "type": None, - "response": Response( - status_code=504, - headers=Headers( - { - "date": "Sat, 21 Sep 2024 22:56:53 GMT", - "server": "uvicorn", - "content-length": "30", - "content-type": "application/json", - } - ), - request=Request("POST", "http://0.0.0.0:9000/chat/completions"), - ), - "status_code": 504, - "request_id": None, - } - - exception = Exception() - for k, v in kwargs.items(): - setattr(exception, k, v) - raise exception - - try: - with patch.object( - mapped_target, - "create", - side_effect=_return_exception, - ): - litellm.completion(model="openai/gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}], client=openai_client) - pytest.fail("Expected to raise Timeout") - except litellm.Timeout as e: - assert e.status_code == 504 - - @pytest.mark.parametrize( "sync_mode", [True, False], From ddc6625e0110906ed60080b8dd85c08ee43af7c8 Mon Sep 17 00:00:00 2001 From: Cole McIntosh Date: Sat, 7 Jun 2025 11:49:36 -0600 Subject: [PATCH 10/11] Refactor `test_exceptions.py` by removing outdated tests and unnecessary imports to improve code clarity and maintainability. --- .../litellm_core_utils/test_exceptions.py | 432 +----------------- 1 file changed, 1 insertion(+), 431 deletions(-) diff --git a/tests/test_litellm/litellm_core_utils/test_exceptions.py b/tests/test_litellm/litellm_core_utils/test_exceptions.py index b5cf354a4c59..3540a125d28d 100644 --- a/tests/test_litellm/litellm_core_utils/test_exceptions.py +++ b/tests/test_litellm/litellm_core_utils/test_exceptions.py @@ -1,100 +1,12 @@ -import asyncio import os -import subprocess import sys -import traceback -from typing import Any - -from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError - -from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path -from concurrent.futures import ThreadPoolExecutor -from unittest.mock import MagicMock, patch import pytest -import litellm -from litellm import ( # AuthenticationError,; RateLimitError,; ServiceUnavailableError,; OpenAIError, - ContextWindowExceededError, - completion, - embedding, -) - - -@pytest.mark.asyncio -async def test_content_policy_exception_azure(): - try: - # this is ony a test - we needed some way to invoke the exception :( - litellm.set_verbose = True - response = await litellm.acompletion( - model="azure/chatgpt-v-3", - messages=[{"role": "user", "content": "where do I buy lethal drugs from"}], - mock_response="Exception: content_filter_policy", - ) - except litellm.ContentPolicyViolationError as e: - print("caught a content policy violation error! Passed") - print("exception", e) - assert e.response is not None - assert e.litellm_debug_info is not None - assert isinstance(e.litellm_debug_info, str) - assert len(e.litellm_debug_info) > 0 - pass - except Exception as e: - print() - pytest.fail(f"An exception occurred - {str(e)}") - - -@pytest.mark.parametrize( - "provider", ["predibase", "vertex_ai_beta", "anthropic", "databricks", "watsonx", "fireworks_ai"] -) -def test_exception_mapping(provider): - """ - For predibase, run through a set of mock exceptions - - assert that they are being mapped correctly - """ - litellm.set_verbose = True - error_map = { - 400: litellm.BadRequestError, - 401: litellm.AuthenticationError, - 404: litellm.NotFoundError, - 408: litellm.Timeout, - 429: litellm.RateLimitError, - 500: litellm.InternalServerError, - 503: litellm.ServiceUnavailableError, - } - - for code, expected_exception in error_map.items(): - mock_response = Exception() - setattr(mock_response, "text", "This is an error message") - setattr(mock_response, "llm_provider", provider) - setattr(mock_response, "status_code", code) - - response: Any = None - try: - response = completion( - model="{}/test-model".format(provider), - messages=[{"role": "user", "content": "Hey, how's it going?"}], - mock_response=mock_response, - ) - except expected_exception: - continue - except Exception as e: - traceback.print_exc() - response = "{}".format(str(e)) - pytest.fail( - "Did not raise expected exception. Expected={}, Return={},".format( - expected_exception, response - ) - ) - - pass - - def test_fireworks_ai_exception_mapping(): """ Comprehensive test for Fireworks AI exception mapping, including: @@ -184,346 +96,4 @@ def test_fireworks_ai_exception_mapping(): # Test edge cases assert not ExceptionCheckers.is_error_str_rate_limit(None) # type: ignore - assert not ExceptionCheckers.is_error_str_rate_limit(42) # type: ignore - - -from typing import Optional, Union - -from openai import AsyncOpenAI, OpenAI - - -def _pre_call_utils( - call_type: str, - data: dict, - client: Union[OpenAI, AsyncOpenAI], - sync_mode: bool, - streaming: Optional[bool], -): - if call_type == "embedding": - data["input"] = "Hello world!" - mapped_target: Any = client.embeddings.with_raw_response - if sync_mode: - original_function = litellm.embedding - else: - original_function = litellm.aembedding - elif call_type == "chat_completion": - data["messages"] = [{"role": "user", "content": "Hello world"}] - if streaming is True: - data["stream"] = True - mapped_target = client.chat.completions.with_raw_response # type: ignore - if sync_mode: - original_function = litellm.completion - else: - original_function = litellm.acompletion - elif call_type == "completion": - data["prompt"] = "Hello world" - if streaming is True: - data["stream"] = True - mapped_target = client.completions.with_raw_response # type: ignore - if sync_mode: - original_function = litellm.text_completion - else: - original_function = litellm.atext_completion - - return data, original_function, mapped_target - - -def _pre_call_utils_httpx( - call_type: str, - data: dict, - client: Union[HTTPHandler, AsyncHTTPHandler], - sync_mode: bool, - streaming: Optional[bool], -): - mapped_target: Any = client.client - if call_type == "embedding": - data["input"] = "Hello world!" - - if sync_mode: - original_function = litellm.embedding - else: - original_function = litellm.aembedding - elif call_type == "chat_completion": - data["messages"] = [{"role": "user", "content": "Hello world"}] - if streaming is True: - data["stream"] = True - - if sync_mode: - original_function = litellm.completion - else: - original_function = litellm.acompletion - elif call_type == "completion": - data["prompt"] = "Hello world" - if streaming is True: - data["stream"] = True - if sync_mode: - original_function = litellm.text_completion - else: - original_function = litellm.atext_completion - - return data, original_function, mapped_target - - -@pytest.mark.parametrize( - "sync_mode", - [True, False], -) -@pytest.mark.parametrize( - "provider, model, call_type, streaming", - [ - ("openai", "text-embedding-ada-002", "embedding", None), - ("openai", "gpt-3.5-turbo", "chat_completion", False), - ("openai", "gpt-3.5-turbo", "chat_completion", True), - ("openai", "gpt-3.5-turbo-instruct", "completion", True), - ("azure", "azure/chatgpt-v-3", "chat_completion", True), - ("azure", "azure/text-embedding-ada-002", "embedding", True), - ("azure", "azure_text/gpt-3.5-turbo-instruct", "completion", True), - ], -) -@pytest.mark.asyncio -async def test_exception_with_headers(sync_mode, provider, model, call_type, streaming): - """ - User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds" - but Azure says to retry in at most 9s - - ``` - {"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"} - ``` - """ - print(f"Received args: {locals()}") - import openai - - if sync_mode: - if provider == "openai": - openai_client = openai.OpenAI(api_key="") - elif provider == "azure": - openai_client = openai.AzureOpenAI( - api_key="", base_url="", api_version=litellm.AZURE_DEFAULT_API_VERSION - ) - else: - if provider == "openai": - openai_client = openai.AsyncOpenAI(api_key="") - elif provider == "azure": - openai_client = openai.AsyncAzureOpenAI( - api_key="", base_url="", api_version=litellm.AZURE_DEFAULT_API_VERSION - ) - - data = {"model": model} - data, original_function, mapped_target = _pre_call_utils( - call_type=call_type, - data=data, - client=openai_client, - sync_mode=sync_mode, - streaming=streaming, - ) - - cooldown_time = 30.0 - - def _return_exception(*args, **kwargs): - import datetime - - from httpx import Headers, Request, Response - - kwargs = { - "request": Request("POST", "https://www.google.com"), - "message": "Error code: 429 - Rate Limit Error!", - "body": {"detail": "Rate Limit Error!"}, - "code": None, - "param": None, - "type": None, - "response": Response( - status_code=429, - headers=Headers( - { - "date": "Sat, 21 Sep 2024 22:56:53 GMT", - "server": "uvicorn", - "retry-after": "30", - "content-length": "30", - "content-type": "application/json", - } - ), - request=Request("POST", "http://0.0.0.0:9000/chat/completions"), - ), - "status_code": 429, - "request_id": None, - } - - exception = Exception() - for k, v in kwargs.items(): - setattr(exception, k, v) - raise exception - - with patch.object( - mapped_target, - "create", - side_effect=_return_exception, - ): - new_retry_after_mock_client = MagicMock(return_value=-1) - - litellm.utils._get_retry_after_from_exception_header = ( - new_retry_after_mock_client - ) - - exception_raised = False - try: - if sync_mode: - resp = original_function(**data, client=openai_client) - if streaming: - for chunk in resp: - continue - else: - resp = await original_function(**data, client=openai_client) - - if streaming: - async for chunk in resp: - continue - - except litellm.RateLimitError as e: - exception_raised = True - assert e.litellm_response_headers is not None - assert int(e.litellm_response_headers["retry-after"]) == cooldown_time - - if exception_raised is False: - print(resp) - assert exception_raised - - -@pytest.mark.parametrize( - "sync_mode", - [True, False], -) -@pytest.mark.parametrize("streaming", [True, False]) -@pytest.mark.parametrize( - "provider, model, call_type", - [ - ("anthropic", "claude-3-haiku-20240307", "chat_completion"), - ], -) -@pytest.mark.asyncio -async def test_exception_with_headers_httpx( - sync_mode, provider, model, call_type, streaming -): - """ - User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds" - but Azure says to retry in at most 9s - - ``` - {"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"} - ``` - """ - print(f"Received args: {locals()}") - import openai - - if sync_mode: - client = HTTPHandler() - else: - client = AsyncHTTPHandler() - - data = {"model": model} - data, original_function, mapped_target = _pre_call_utils_httpx( - call_type=call_type, - data=data, - client=client, - sync_mode=sync_mode, - streaming=streaming, - ) - - cooldown_time = 30.0 - - def _return_exception(*args, **kwargs): - import datetime - - from httpx import Headers, HTTPStatusError, Request, Response - - # Create the Request object - request = Request("POST", "http://0.0.0.0:9000/chat/completions") - - # Create the Response object with the necessary headers and status code - response = Response( - status_code=429, - headers=Headers( - { - "date": "Sat, 21 Sep 2024 22:56:53 GMT", - "server": "uvicorn", - "retry-after": "30", - "content-length": "30", - "content-type": "application/json", - } - ), - request=request, - ) - - # Create and raise the HTTPStatusError exception - raise HTTPStatusError( - message="Error code: 429 - Rate Limit Error!", - request=request, - response=response, - ) - - with patch.object( - mapped_target, - "send", - side_effect=_return_exception, - ): - new_retry_after_mock_client = MagicMock(return_value=-1) - - litellm.utils._get_retry_after_from_exception_header = ( - new_retry_after_mock_client - ) - - exception_raised = False - try: - if sync_mode: - resp = original_function(**data, client=client) - if streaming: - for chunk in resp: - continue - else: - resp = await original_function(**data, client=client) - - if streaming: - async for chunk in resp: - continue - - except litellm.RateLimitError as e: - exception_raised = True - assert ( - e.litellm_response_headers is not None - ), "litellm_response_headers is None" - print("e.litellm_response_headers", e.litellm_response_headers) - assert int(e.litellm_response_headers["retry-after"]) == cooldown_time - - if exception_raised is False: - print(resp) - assert exception_raised - - -def test_exceptions_base_class(): - try: - raise litellm.RateLimitError( - message="BedrockException: Rate Limit Error", - model="model", - llm_provider="bedrock", - ) - except litellm.RateLimitError as e: - assert isinstance(e, litellm.RateLimitError) - assert e.code == "429" - assert e.type == "throttling_error" - - -def test_context_window_exceeded_error_from_litellm_proxy(): - from httpx import Response - from litellm.litellm_core_utils.exception_mapping_utils import ( - extract_and_raise_litellm_exception, - ) - - args = { - "response": Response(status_code=400, text="Bad Request"), - "error_str": "Error code: 400 - {'error': {'message': \"litellm.ContextWindowExceededError: litellm.BadRequestError: this is a mock context window exceeded error\\nmodel=gpt-3.5-turbo. context_window_fallbacks=None. fallbacks=None.\\n\\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks\\nReceived Model Group=gpt-3.5-turbo\\nAvailable Model Group Fallbacks=None\", 'type': None, 'param': None, 'code': '400'}}", - "model": "gpt-3.5-turbo", - "custom_llm_provider": "litellm_proxy", - } - with pytest.raises(litellm.ContextWindowExceededError): - extract_and_raise_litellm_exception(**args) - - + assert not ExceptionCheckers.is_error_str_rate_limit(42) # type: ignore \ No newline at end of file From 40fc7dda7d1524fc97dbcc7d1fb57e20ae2c32c6 Mon Sep 17 00:00:00 2001 From: Cole McIntosh Date: Wed, 11 Jun 2025 17:06:56 -0600 Subject: [PATCH 11/11] Refactor import statement for verbose_logger in exception_mapping_utils.py --- litellm/litellm_core_utils/exception_mapping_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/litellm_core_utils/exception_mapping_utils.py b/litellm/litellm_core_utils/exception_mapping_utils.py index 147fab89ed06..28d4fca6fa9e 100644 --- a/litellm/litellm_core_utils/exception_mapping_utils.py +++ b/litellm/litellm_core_utils/exception_mapping_utils.py @@ -5,7 +5,7 @@ import httpx import litellm -from litellm import verbose_logger +from litellm._logging import verbose_logger from ..exceptions import ( APIConnectionError,