Skip to content

Commit 5b9606e

Browse files
committed
fix contenxt windown
1 parent 685d20f commit 5b9606e

File tree

2 files changed

+60
-5
lines changed

2 files changed

+60
-5
lines changed

src/crewai/llm.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
warnings.simplefilter("ignore", UserWarning)
3838
import litellm
3939
from litellm import Choices
40+
from litellm.exceptions import ContextWindowExceededError
4041
from litellm.litellm_core_utils.get_supported_openai_params import (
4142
get_supported_openai_params,
4243
)
@@ -597,6 +598,11 @@ def _handle_streaming_response(
597598
self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL)
598599
return full_response
599600

601+
except ContextWindowExceededError as e:
602+
# Catch context window errors from litellm and convert them to our own exception type.
603+
# This exception is handled by CrewAgentExecutor._invoke_loop() which can then
604+
# decide whether to summarize the content or abort based on the respect_context_window flag.
605+
raise LLMContextLengthExceededException(str(e))
600606
except Exception as e:
601607
logging.error(f"Error in streaming response: {str(e)}")
602608
if full_response.strip():
@@ -711,7 +717,16 @@ def _handle_non_streaming_response(
711717
str: The response text
712718
"""
713719
# --- 1) Make the completion call
714-
response = litellm.completion(**params)
720+
try:
721+
# Attempt to make the completion call, but catch context window errors
722+
# and convert them to our own exception type for consistent handling
723+
# across the codebase. This allows CrewAgentExecutor to handle context
724+
# length issues appropriately.
725+
response = litellm.completion(**params)
726+
except ContextWindowExceededError as e:
727+
# Convert litellm's context window error to our own exception type
728+
# for consistent handling in the rest of the codebase
729+
raise LLMContextLengthExceededException(str(e))
715730

716731
# --- 2) Extract response message and content
717732
response_message = cast(Choices, cast(ModelResponse, response).choices)[
@@ -870,15 +885,17 @@ def call(
870885
params, callbacks, available_functions
871886
)
872887

888+
except LLMContextLengthExceededException:
889+
# Re-raise LLMContextLengthExceededException as it should be handled
890+
# by the CrewAgentExecutor._invoke_loop method, which can then decide
891+
# whether to summarize the content or abort based on the respect_context_window flag
892+
raise
873893
except Exception as e:
874894
crewai_event_bus.emit(
875895
self,
876896
event=LLMCallFailedEvent(error=str(e)),
877897
)
878-
if not LLMContextLengthExceededException(
879-
str(e)
880-
)._is_context_limit_error(str(e)):
881-
logging.error(f"LiteLLM call failed: {str(e)}")
898+
logging.error(f"LiteLLM call failed: {str(e)}")
882899
raise
883900

884901
def _handle_emit_call_events(self, response: Any, call_type: LLMCallType):

tests/llm_test.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,44 @@ def get_weather_tool_schema():
373373
},
374374
}
375375

376+
def test_context_window_exceeded_error_handling():
377+
"""Test that litellm.ContextWindowExceededError is converted to LLMContextLengthExceededException."""
378+
from litellm.exceptions import ContextWindowExceededError
379+
from crewai.utilities.exceptions.context_window_exceeding_exception import (
380+
LLMContextLengthExceededException,
381+
)
382+
383+
llm = LLM(model="gpt-4")
384+
385+
# Test non-streaming response
386+
with patch("litellm.completion") as mock_completion:
387+
mock_completion.side_effect = ContextWindowExceededError(
388+
"This model's maximum context length is 8192 tokens. However, your messages resulted in 10000 tokens.",
389+
model="gpt-4",
390+
llm_provider="openai"
391+
)
392+
393+
with pytest.raises(LLMContextLengthExceededException) as excinfo:
394+
llm.call("This is a test message")
395+
396+
assert "context length exceeded" in str(excinfo.value).lower()
397+
assert "8192 tokens" in str(excinfo.value)
398+
399+
# Test streaming response
400+
llm = LLM(model="gpt-4", stream=True)
401+
with patch("litellm.completion") as mock_completion:
402+
mock_completion.side_effect = ContextWindowExceededError(
403+
"This model's maximum context length is 8192 tokens. However, your messages resulted in 10000 tokens.",
404+
model="gpt-4",
405+
llm_provider="openai"
406+
)
407+
408+
with pytest.raises(LLMContextLengthExceededException) as excinfo:
409+
llm.call("This is a test message")
410+
411+
assert "context length exceeded" in str(excinfo.value).lower()
412+
assert "8192 tokens" in str(excinfo.value)
413+
376414

377415
@pytest.mark.vcr(filter_headers=["authorization"])
378416
@pytest.fixture

0 commit comments

Comments
 (0)