|
37 | 37 | warnings.simplefilter("ignore", UserWarning)
|
38 | 38 | import litellm
|
39 | 39 | from litellm import Choices
|
| 40 | + from litellm.exceptions import ContextWindowExceededError |
40 | 41 | from litellm.litellm_core_utils.get_supported_openai_params import (
|
41 | 42 | get_supported_openai_params,
|
42 | 43 | )
|
@@ -597,6 +598,11 @@ def _handle_streaming_response(
|
597 | 598 | self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL)
|
598 | 599 | return full_response
|
599 | 600 |
|
| 601 | + except ContextWindowExceededError as e: |
| 602 | + # Catch context window errors from litellm and convert them to our own exception type. |
| 603 | + # This exception is handled by CrewAgentExecutor._invoke_loop() which can then |
| 604 | + # decide whether to summarize the content or abort based on the respect_context_window flag. |
| 605 | + raise LLMContextLengthExceededException(str(e)) |
600 | 606 | except Exception as e:
|
601 | 607 | logging.error(f"Error in streaming response: {str(e)}")
|
602 | 608 | if full_response.strip():
|
@@ -711,7 +717,16 @@ def _handle_non_streaming_response(
|
711 | 717 | str: The response text
|
712 | 718 | """
|
713 | 719 | # --- 1) Make the completion call
|
714 |
| - response = litellm.completion(**params) |
| 720 | + try: |
| 721 | + # Attempt to make the completion call, but catch context window errors |
| 722 | + # and convert them to our own exception type for consistent handling |
| 723 | + # across the codebase. This allows CrewAgentExecutor to handle context |
| 724 | + # length issues appropriately. |
| 725 | + response = litellm.completion(**params) |
| 726 | + except ContextWindowExceededError as e: |
| 727 | + # Convert litellm's context window error to our own exception type |
| 728 | + # for consistent handling in the rest of the codebase |
| 729 | + raise LLMContextLengthExceededException(str(e)) |
715 | 730 |
|
716 | 731 | # --- 2) Extract response message and content
|
717 | 732 | response_message = cast(Choices, cast(ModelResponse, response).choices)[
|
@@ -870,15 +885,17 @@ def call(
|
870 | 885 | params, callbacks, available_functions
|
871 | 886 | )
|
872 | 887 |
|
| 888 | + except LLMContextLengthExceededException: |
| 889 | + # Re-raise LLMContextLengthExceededException as it should be handled |
| 890 | + # by the CrewAgentExecutor._invoke_loop method, which can then decide |
| 891 | + # whether to summarize the content or abort based on the respect_context_window flag |
| 892 | + raise |
873 | 893 | except Exception as e:
|
874 | 894 | crewai_event_bus.emit(
|
875 | 895 | self,
|
876 | 896 | event=LLMCallFailedEvent(error=str(e)),
|
877 | 897 | )
|
878 |
| - if not LLMContextLengthExceededException( |
879 |
| - str(e) |
880 |
| - )._is_context_limit_error(str(e)): |
881 |
| - logging.error(f"LiteLLM call failed: {str(e)}") |
| 898 | + logging.error(f"LiteLLM call failed: {str(e)}") |
882 | 899 | raise
|
883 | 900 |
|
884 | 901 | def _handle_emit_call_events(self, response: Any, call_type: LLMCallType):
|
|
0 commit comments