refactor(base_agent): 优化工具调用处理和输出消息逻辑

yufei311 · yufei311 · commit eafce12fbdb2 · 2025-06-11T15:38:08.000+08:00
简化工具调用处理流程，移除冗余的条件判断
重构输出消息处理逻辑，提高代码可读性
统一工具参数处理方式，避免重复代码
diff --git a/src/agent/base_agent.py b/src/agent/base_agent.py
@@ -3,8 +3,9 @@
     Callable,
     Optional
 )
-import json
+
 import yaml
+import json
 from rich.panel import Panel
 from rich.text import Text
 
@@ -21,11 +22,9 @@
 from src.memory import (ActionStep,
                         ToolCall,
                         AgentMemory)
-from src.logger import (
-    LogLevel, 
-    YELLOW_HEX,
-    logger # AsyncMultiStepAgent creates its own self.logger
-)
+from src.logger import (LogLevel, 
+                        YELLOW_HEX, 
+                        logger)
 from src.models import Model, parse_json_if_needed, ChatMessage
 from src.utils.agent_types import (
     AgentAudio,
@@ -94,7 +93,6 @@ def __init__(
             system_prompt=self.system_prompt,
             user_prompt=self.user_prompt,
         )
-        # self.logger is inherited from AsyncMultiStepAgent and uses agent_name_to_use
 
     def initialize_system_prompt(self) -> str:
         """Initialize the system prompt for the agent."""
@@ -115,7 +113,6 @@ def initialize_user_prompt(self) -> str:
         
     def initialize_task_instruction(self) -> str:
         """Initialize the task instruction for the agent."""
-        # self.task is set by the __call__ method of AsyncMultiStepAgent at runtime
         task_instruction = populate_template(
             self.prompt_templates["task_instruction"],
             variables={"task": self.task},
@@ -220,42 +217,20 @@ async def step(self, memory_step: ActionStep) -> None | Any:
                 title="Output message of the LLM:",
                 level=LogLevel.DEBUG,
             )
+
             memory_step.model_output_message.content = model_output
             memory_step.model_output = model_output
         except Exception as e:
             raise AgentGenerationError(f"Error while generating output:\n{e}", self.logger) from e
 
         if chat_message.tool_calls is None or len(chat_message.tool_calls) == 0:
             try:
-                # Attempt to parse tool calls if they were not automatically populated
                 chat_message = self.model.parse_tool_calls(chat_message)
             except Exception as e:
-                # If parsing failed and there is model_output, it can be considered a direct answer
-                if model_output:
-                    self.logger.log(
-                        Text(f"Tool call not detected. Processing model output as final answer: {model_output}", style=f"bold {YELLOW_HEX}"),
-                        level=LogLevel.INFO,
-                    )
-                    memory_step.action_output = model_output
-                    return model_output
                 raise AgentParsingError(f"Error while parsing tool call from model output: {e}", self.logger)
-        
-        # If there are still no tool calls after attempting to parse
-        if not chat_message.tool_calls:
-            if model_output:
-                self.logger.log(
-                    Text(f"Tool call not detected after parsing. Processing model output as final answer: {model_output}", style=f"bold {YELLOW_HEX}"),
-                    level=LogLevel.INFO,
-                )
-                memory_step.action_output = model_output
-                return model_output
-            else:
-                # If there are no tool calls and no content, it's an error
-                raise AgentParsingError("Tool call not found, and there is no content in the model output.", self.logger)
-        
-        # Continue if there are tool calls
-        for tool_call in chat_message.tool_calls:
-            tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments)
+        else:
+            for tool_call in chat_message.tool_calls:
+                tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments)
 
         tool_call = chat_message.tool_calls[0]
         tool_name, tool_call_id = tool_call.function.name, tool_call.id
@@ -270,7 +245,10 @@ async def step(self, memory_step: ActionStep) -> None | Any:
         )
         if tool_name == "final_answer":
             if isinstance(tool_arguments, dict):
-                result = tool_arguments.get("result", tool_arguments)
+                if "result" in tool_arguments:
+                    result = tool_arguments["result"]
+                else:
+                    result = tool_arguments
             else:
                 result = tool_arguments
             if (
@@ -291,19 +269,21 @@ async def step(self, memory_step: ActionStep) -> None | Any:
             memory_step.action_output = final_result
             return final_result
         else:
-            tool_args_to_pass = tool_arguments if tool_arguments is not None else {}
-            observation = await self.execute_tool_call(tool_name, tool_args_to_pass)
+            if tool_arguments is None:
+                tool_arguments = {}
+            observation = await self.execute_tool_call(tool_name, tool_arguments)
             observation_type = type(observation)
-            
             if observation_type in [AgentImage, AgentAudio]:
-                observation_name = "image.png" if observation_type == AgentImage else "audio.mp3"
+                if observation_type == AgentImage:
+                    observation_name = "image.png"
+                elif observation_type == AgentAudio:
+                    observation_name = "audio.mp3"
                 # TODO: observation naming could allow for different names of same type
 
                 self.state[observation_name] = observation
                 updated_information = f"Stored '{observation_name}' in memory."
             else:
                 updated_information = str(observation).strip()
-            
             self.logger.log(
                 f"Observations: {updated_information.replace('[', '|')}",  # escape potential rich-tag-like components
                 level=LogLevel.INFO,