From 713824aabe799cf5fecf6f75a73fe5d9d6eb413e Mon Sep 17 00:00:00 2001 From: hoare Date: Thu, 5 Jun 2025 10:28:46 +0800 Subject: [PATCH 1/2] =?UTF-8?q?refactor(agent):=20=E9=87=8D=E6=9E=84?= =?UTF-8?q?=E5=A4=9A=E4=B8=AAagent=E7=B1=BB=E7=BB=A7=E6=89=BF=E8=87=AABase?= =?UTF-8?q?Agent?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 将browser_use_agent、deep_analyzer_agent、deep_researcher_agent和planning_agent重构为继承自新创建的BaseAgent类,减少代码重复。BaseAgent类包含了这些agent共有的核心功能实现。 主要变更: 1. 创建BaseAgent基类包含共享逻辑 2. 简化各子类实现,仅保留特定配置 3. 统一初始化流程 4. 添加.gitignore忽略.idea目录 --- .gitignore | 2 + src/agent/base_agent.py | 292 ++++++++++++++ .../browser_use_agent/browser_use_agent.py | 336 +++------------- .../deep_analyzer_agent.py | 336 +++------------- .../deep_researcher_agent.py | 337 +++------------- src/agent/planning_agent/planning_agent.py | 376 +++++------------- 6 files changed, 550 insertions(+), 1129 deletions(-) create mode 100755 src/agent/base_agent.py diff --git a/.gitignore b/.gitignore index a335fa3..3593d01 100644 --- a/.gitignore +++ b/.gitignore @@ -180,3 +180,5 @@ data/ # workdir workdir/ + +.idea diff --git a/src/agent/base_agent.py b/src/agent/base_agent.py new file mode 100755 index 0000000..70baa85 --- /dev/null +++ b/src/agent/base_agent.py @@ -0,0 +1,292 @@ +from typing import ( + Any, + Callable, + Optional +) + +import yaml +import json +from rich.panel import Panel +from rich.text import Text + +from src.tools import AsyncTool +from src.exception import ( + AgentGenerationError, + AgentParsingError, + AgentToolExecutionError, + AgentToolCallError +) +from src.base.async_multistep_agent import (PromptTemplates, + populate_template, + AsyncMultiStepAgent) +from src.memory import (ActionStep, + ToolCall, + AgentMemory) +from src.logger import (LogLevel, + YELLOW_HEX, + logger) +from src.models import Model, parse_json_if_needed, ChatMessage +from src.utils.agent_types import ( + AgentAudio, + AgentImage, +) +from src.utils import assemble_project_path + +class BaseAgent(AsyncMultiStepAgent): + """Base class for agents with common logic.""" + AGENT_NAME = "base_agent" # Must be overridden by subclasses + + def __init__( + self, + config, # Specific configuration object for the agent + tools: list[AsyncTool], + model: Model, + prompt_templates_path: str, # Path to the prompt templates file + prompt_templates: PromptTemplates | None = None, # For preloaded templates + max_steps: int = 20, + add_base_tools: bool = False, + verbosity_level: LogLevel = LogLevel.INFO, + grammar: dict[str, str] | None = None, + managed_agents: list | None = None, + step_callbacks: list[Callable] | None = None, + planning_interval: int | None = None, + name: str | None = None, # AGENT_NAME will be used if not specified + description: str | None = None, + provide_run_summary: bool = False, + final_answer_checks: list[Callable] | None = None, + **kwargs + ): + self.config = config # Save config for possible access by subclasses + + agent_name_to_use = name if name is not None else self.AGENT_NAME + + super().__init__( + tools=tools, + model=model, + prompt_templates=None, # Initialize as None, load later + max_steps=max_steps, + add_base_tools=add_base_tools, + verbosity_level=verbosity_level, + grammar=grammar, + managed_agents=managed_agents, + step_callbacks=step_callbacks, + planning_interval=planning_interval, + name=agent_name_to_use, # Use the defined agent name + description=description, + provide_run_summary=provide_run_summary, + final_answer_checks=final_answer_checks, + **kwargs # Pass remaining arguments to the parent class + ) + + # Loading prompt_templates + if prompt_templates: + self.prompt_templates = prompt_templates + else: + abs_template_path = assemble_project_path(prompt_templates_path) + with open(abs_template_path, "r", encoding='utf-8') as f: + self.prompt_templates = yaml.safe_load(f) + + self.system_prompt = self.initialize_system_prompt() + self.user_prompt = self.initialize_user_prompt() + + self.memory = AgentMemory( + system_prompt=self.system_prompt, + user_prompt=self.user_prompt, + ) + + def initialize_system_prompt(self) -> str: + """Initialize the system prompt for the agent.""" + system_prompt = populate_template( + self.prompt_templates["system_prompt"], + variables={"tools": self.tools, "managed_agents": self.managed_agents}, + ) + return system_prompt + + def initialize_user_prompt(self) -> str: + + user_prompt = populate_template( + self.prompt_templates["user_prompt"], + variables={}, + ) + + return user_prompt + + def initialize_task_instruction(self) -> str: + """Initialize the task instruction for the agent.""" + task_instruction = populate_template( + self.prompt_templates["task_instruction"], + variables={"task": self.task}, + ) + return task_instruction + + def _substitute_state_variables(self, arguments: dict[str, str] | str) -> dict[str, Any] | str: + """Replace string values in arguments with their corresponding state values if they exist.""" + if isinstance(arguments, dict): + return { + key: self.state.get(value, value) if isinstance(value, str) else value + for key, value in arguments.items() + } + return arguments + + async def execute_tool_call(self, tool_name: str, arguments: dict[str, str] | str) -> Any: + """ + Execute a tool or managed agent with the provided arguments. + + The arguments are replaced with the actual values from the state if they refer to state variables. + + Args: + tool_name (`str`): Name of the tool or managed agent to execute. + arguments (dict[str, str] | str): Arguments passed to the tool call. + """ + # Check if the tool exists + available_tools = {**self.tools, **self.managed_agents} + if tool_name not in available_tools: + raise AgentToolExecutionError( + f"Unknown tool {tool_name}, should be one of: {', '.join(available_tools)}.", self.logger + ) + + # Get the tool and substitute state variables in arguments + tool = available_tools[tool_name] + arguments = self._substitute_state_variables(arguments) + is_managed_agent = tool_name in self.managed_agents + + try: + # Call tool with appropriate arguments + if isinstance(arguments, dict): + return await tool(**arguments) if is_managed_agent else await tool(**arguments, sanitize_inputs_outputs=True) + elif isinstance(arguments, str): + return await tool(arguments) if is_managed_agent else await tool(arguments, sanitize_inputs_outputs=True) + else: + raise TypeError(f"Unsupported arguments type: {type(arguments)}") + + except TypeError as e: + # Handle invalid arguments + description = getattr(tool, "description", "No description") + if is_managed_agent: + error_msg = ( + f"Invalid request to team member '{tool_name}' with arguments {json.dumps(arguments, ensure_ascii=False)}: {e}\n" + "You should call this team member with a valid request.\n" + f"Team member description: {description}" + ) + else: + error_msg = ( + f"Invalid call to tool '{tool_name}' with arguments {json.dumps(arguments, ensure_ascii=False)}: {e}\n" + "You should call this tool with correct input arguments.\n" + f"Expected inputs: {json.dumps(tool.parameters)}\n" + f"Returns output type: {tool.output_type}\n" + f"Tool description: '{description}'" + ) + raise AgentToolCallError(error_msg, self.logger) from e + + except Exception as e: + # Handle execution errors + if is_managed_agent: + error_msg = ( + f"Error executing request to team member '{tool_name}' with arguments {json.dumps(arguments)}: {e}\n" + "Please try again or request to another team member" + ) + else: + error_msg = ( + f"Error executing tool '{tool_name}' with arguments {json.dumps(arguments)}: {type(e).__name__}: {e}\n" + "Please try again or use another tool" + ) + raise AgentToolExecutionError(error_msg, self.logger) from e + + async def step(self, memory_step: ActionStep) -> None | Any: + """ + Perform one step in the ReAct framework: the agent thinks, acts, and observes the result. + Returns None if the step is not final. + """ + memory_messages = await self.write_memory_to_messages() + + input_messages = memory_messages.copy() + + # Add new step in logs + memory_step.model_input_messages = input_messages + + try: + chat_message: ChatMessage = await self.model( + input_messages, + stop_sequences=["Observation:", "Calling tools:"], + tools_to_call_from=list(self.tools.values()), + ) + memory_step.model_output_message = chat_message + model_output = chat_message.content + self.logger.log_markdown( + content=model_output if model_output else str(chat_message.raw), + title="Output message of the LLM:", + level=LogLevel.DEBUG, + ) + + memory_step.model_output_message.content = model_output + memory_step.model_output = model_output + except Exception as e: + raise AgentGenerationError(f"Error while generating output:\n{e}", self.logger) from e + + if chat_message.tool_calls is None or len(chat_message.tool_calls) == 0: + try: + chat_message = self.model.parse_tool_calls(chat_message) + except Exception as e: + raise AgentParsingError(f"Error while parsing tool call from model output: {e}", self.logger) + else: + for tool_call in chat_message.tool_calls: + tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments) + + tool_call = chat_message.tool_calls[0] + tool_name, tool_call_id = tool_call.function.name, tool_call.id + tool_arguments = tool_call.function.arguments + memory_step.model_output = str(f"Called Tool: '{tool_name}' with arguments: {tool_arguments}") + memory_step.tool_calls = [ToolCall(name=tool_name, arguments=tool_arguments, id=tool_call_id)] + + # Execute + self.logger.log( + Panel(Text(f"Calling tool: '{tool_name}' with arguments: {tool_arguments}")), + level=LogLevel.INFO, + ) + if tool_name == "final_answer": + if isinstance(tool_arguments, dict): + if "result" in tool_arguments: + result = tool_arguments["result"] + else: + result = tool_arguments + else: + result = tool_arguments + if ( + isinstance(result, str) and result in self.state.keys() + ): # if the answer is a state variable, return the value + final_result = self.state[result] + self.logger.log( + f"[bold {YELLOW_HEX}]Final answer:[/bold {YELLOW_HEX}] Extracting key '{result}' from state to return value '{final_result}'.", + level=LogLevel.INFO, + ) + else: + final_result = result + self.logger.log( + Text(f"Final result: {final_result}", style=f"bold {YELLOW_HEX}"), + level=LogLevel.INFO, + ) + + memory_step.action_output = final_result + return final_result + else: + if tool_arguments is None: + tool_arguments = {} + observation = await self.execute_tool_call(tool_name, tool_arguments) + observation_type = type(observation) + if observation_type in [AgentImage, AgentAudio]: + if observation_type == AgentImage: + observation_name = "image.png" + elif observation_type == AgentAudio: + observation_name = "audio.mp3" + # TODO: observation naming could allow for different names of same type + + self.state[observation_name] = observation + updated_information = f"Stored '{observation_name}' in memory." + else: + updated_information = str(observation).strip() + self.logger.log( + f"Observations: {updated_information.replace('[', '|')}", # escape potential rich-tag-like components + level=LogLevel.INFO, + ) + memory_step.observations = updated_information + return None \ No newline at end of file diff --git a/src/agent/browser_use_agent/browser_use_agent.py b/src/agent/browser_use_agent/browser_use_agent.py index 6b577ca..221fc5a 100644 --- a/src/agent/browser_use_agent/browser_use_agent.py +++ b/src/agent/browser_use_agent/browser_use_agent.py @@ -1,282 +1,54 @@ -from typing import ( - Any, - Callable, - Optional -) -import yaml -import json -from rich.panel import Panel -from rich.text import Text - -from src.tools import AsyncTool -from src.exception import ( - AgentGenerationError, - AgentParsingError, - AgentToolExecutionError, - AgentToolCallError -) -from src.base.async_multistep_agent import (PromptTemplates, - populate_template, - AsyncMultiStepAgent) -from src.memory import (ActionStep, - ToolCall, - AgentMemory) -from src.logger import (LogLevel, - YELLOW_HEX, - logger) -from src.models import Model, parse_json_if_needed, ChatMessage -from src.utils.agent_types import ( - AgentAudio, - AgentImage, -) -from src.registry import register_agent -from src.utils import assemble_project_path - -@register_agent("browser_use_agent") -class BrowserUseAgent(AsyncMultiStepAgent): - def __init__( - self, - config, - tools: list[AsyncTool], - model: Model, - prompt_templates: PromptTemplates | None = None, - max_steps: int = 20, - add_base_tools: bool = False, - verbosity_level: LogLevel = LogLevel.INFO, - grammar: dict[str, str] | None = None, - managed_agents: list | None = None, - step_callbacks: list[Callable] | None = None, - planning_interval: int | None = None, - name: str | None = None, - description: str | None = None, - provide_run_summary: bool = False, - final_answer_checks: list[Callable] | None = None, - **kwargs - ): - self.config = config - - super(BrowserUseAgent, self).__init__( - tools=tools, - model=model, - prompt_templates=prompt_templates, - max_steps=max_steps, - add_base_tools=add_base_tools, - verbosity_level=verbosity_level, - grammar= grammar, - managed_agents=managed_agents, - step_callbacks=step_callbacks, - planning_interval=planning_interval, - name=name, - description=description, - provide_run_summary=provide_run_summary, - final_answer_checks=final_answer_checks, - ) - - template_path = assemble_project_path(self.config.template_path) - with open(template_path, "r") as f: - self.prompt_templates = yaml.safe_load(f) - - self.system_prompt = self.initialize_system_prompt() - self.user_prompt = self.initialize_user_prompt() - - self.memory = AgentMemory( - system_prompt=self.system_prompt, - user_prompt=self.user_prompt, - ) - - def initialize_system_prompt(self) -> str: - """Initialize the system prompt for the agent.""" - system_prompt = populate_template( - self.prompt_templates["system_prompt"], - variables={"tools": self.tools, "managed_agents": self.managed_agents}, - ) - return system_prompt - - def initialize_user_prompt(self) -> str: - - user_prompt = populate_template( - self.prompt_templates["user_prompt"], - variables={}, - ) - - return user_prompt - - def initialize_task_instruction(self) -> str: - """Initialize the task instruction for the agent.""" - task_instruction = populate_template( - self.prompt_templates["task_instruction"], - variables={"task": self.task}, - ) - return task_instruction - - def _substitute_state_variables(self, arguments: dict[str, str] | str) -> dict[str, Any] | str: - """Replace string values in arguments with their corresponding state values if they exist.""" - if isinstance(arguments, dict): - return { - key: self.state.get(value, value) if isinstance(value, str) else value - for key, value in arguments.items() - } - return arguments - - async def execute_tool_call(self, tool_name: str, arguments: dict[str, str] | str) -> Any: - """ - Execute a tool or managed agent with the provided arguments. - - The arguments are replaced with the actual values from the state if they refer to state variables. - - Args: - tool_name (`str`): Name of the tool or managed agent to execute. - arguments (dict[str, str] | str): Arguments passed to the tool call. - """ - # Check if the tool exists - available_tools = {**self.tools, **self.managed_agents} - if tool_name not in available_tools: - raise AgentToolExecutionError( - f"Unknown tool {tool_name}, should be one of: {', '.join(available_tools)}.", self.logger - ) - - # Get the tool and substitute state variables in arguments - tool = available_tools[tool_name] - arguments = self._substitute_state_variables(arguments) - is_managed_agent = tool_name in self.managed_agents - - try: - # Call tool with appropriate arguments - if isinstance(arguments, dict): - return await tool(**arguments) if is_managed_agent else await tool(**arguments, sanitize_inputs_outputs=True) - elif isinstance(arguments, str): - return await tool(arguments) if is_managed_agent else await tool(arguments, sanitize_inputs_outputs=True) - else: - raise TypeError(f"Unsupported arguments type: {type(arguments)}") - - except TypeError as e: - # Handle invalid arguments - description = getattr(tool, "description", "No description") - if is_managed_agent: - error_msg = ( - f"Invalid request to team member '{tool_name}' with arguments {json.dumps(arguments, ensure_ascii=False)}: {e}\n" - "You should call this team member with a valid request.\n" - f"Team member description: {description}" - ) - else: - error_msg = ( - f"Invalid call to tool '{tool_name}' with arguments {json.dumps(arguments, ensure_ascii=False)}: {e}\n" - "You should call this tool with correct input arguments.\n" - f"Expected inputs: {json.dumps(tool.parameters)}\n" - f"Returns output type: {tool.output_type}\n" - f"Tool description: '{description}'" - ) - raise AgentToolCallError(error_msg, self.logger) from e - - except Exception as e: - # Handle execution errors - if is_managed_agent: - error_msg = ( - f"Error executing request to team member '{tool_name}' with arguments {json.dumps(arguments)}: {e}\n" - "Please try again or request to another team member" - ) - else: - error_msg = ( - f"Error executing tool '{tool_name}' with arguments {json.dumps(arguments)}: {type(e).__name__}: {e}\n" - "Please try again or use another tool" - ) - raise AgentToolExecutionError(error_msg, self.logger) from e - - async def step(self, memory_step: ActionStep) -> None | Any: - """ - Perform one step in the ReAct framework: the agent thinks, acts, and observes the result. - Returns None if the step is not final. - """ - memory_messages = await self.write_memory_to_messages() - - input_messages = memory_messages.copy() - - # Add new step in logs - memory_step.model_input_messages = input_messages - - try: - chat_message: ChatMessage = await self.model( - input_messages, - stop_sequences=["Observation:", "Calling tools:"], - tools_to_call_from=list(self.tools.values()), - ) - memory_step.model_output_message = chat_message - model_output = chat_message.content - self.logger.log_markdown( - content=model_output if model_output else str(chat_message.raw), - title="Output message of the LLM:", - level=LogLevel.DEBUG, - ) - - memory_step.model_output_message.content = model_output - memory_step.model_output = model_output - except Exception as e: - raise AgentGenerationError(f"Error while generating output:\n{e}", self.logger) from e - - if chat_message.tool_calls is None or len(chat_message.tool_calls) == 0: - try: - chat_message = self.model.parse_tool_calls(chat_message) - except Exception as e: - raise AgentParsingError(f"Error while parsing tool call from model output: {e}", self.logger) - else: - for tool_call in chat_message.tool_calls: - tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments) - - tool_call = chat_message.tool_calls[0] - tool_name, tool_call_id = tool_call.function.name, tool_call.id - tool_arguments = tool_call.function.arguments - memory_step.model_output = str(f"Called Tool: '{tool_name}' with arguments: {tool_arguments}") - memory_step.tool_calls = [ToolCall(name=tool_name, arguments=tool_arguments, id=tool_call_id)] - - # Execute - self.logger.log( - Panel(Text(f"Calling tool: '{tool_name}' with arguments: {tool_arguments}")), - level=LogLevel.INFO, - ) - if tool_name == "final_answer": - if isinstance(tool_arguments, dict): - if "result" in tool_arguments: - result = tool_arguments["result"] - else: - result = tool_arguments - else: - result = tool_arguments - if ( - isinstance(result, str) and result in self.state.keys() - ): # if the answer is a state variable, return the value - final_result = self.state[result] - self.logger.log( - f"[bold {YELLOW_HEX}]Final answer:[/bold {YELLOW_HEX}] Extracting key '{result}' from state to return value '{final_result}'.", - level=LogLevel.INFO, - ) - else: - final_result = result - self.logger.log( - Text(f"Final result: {final_result}", style=f"bold {YELLOW_HEX}"), - level=LogLevel.INFO, - ) - - memory_step.action_output = final_result - return final_result - else: - if tool_arguments is None: - tool_arguments = {} - observation = await self.execute_tool_call(tool_name, tool_arguments) - observation_type = type(observation) - if observation_type in [AgentImage, AgentAudio]: - if observation_type == AgentImage: - observation_name = "image.png" - elif observation_type == AgentAudio: - observation_name = "audio.mp3" - # TODO: observation naming could allow for different names of same type - - self.state[observation_name] = observation - updated_information = f"Stored '{observation_name}' in memory." - else: - updated_information = str(observation).strip() - self.logger.log( - f"Observations: {updated_information.replace('[', '|')}", # escape potential rich-tag-like components - level=LogLevel.INFO, - ) - memory_step.observations = updated_information - return None \ No newline at end of file +from typing import ( + Any, + Callable, + Optional +) + +from src.tools import AsyncTool +from src.logger import LogLevel +from src.models import Model +from src.registry import register_agent +from src.agent.base_agent import BaseAgent +from src.utils import assemble_project_path + + +@register_agent("browser_use_agent") +class BrowserUseAgent(BaseAgent): + AGENT_NAME = "browser_use_agent" + + def __init__( + self, + config, + tools: list[AsyncTool], + model: Model, + max_steps: int = 20, + add_base_tools: bool = False, + verbosity_level: LogLevel = LogLevel.INFO, + grammar: dict[str, str] | None = None, + managed_agents: list | None = None, + step_callbacks: list[Callable] | None = None, + planning_interval: int | None = None, + description: str | None = None, + provide_run_summary: bool = False, + final_answer_checks: list[Callable] | None = None, + **kwargs + ): + prompt_templates_path = assemble_project_path(config.template_path) + super().__init__( + config=config, + tools=tools, + model=model, + prompt_templates_path=prompt_templates_path, + max_steps=max_steps, + add_base_tools=add_base_tools, + verbosity_level=verbosity_level, + grammar=grammar, + managed_agents=managed_agents, + step_callbacks=step_callbacks, + planning_interval=planning_interval, + description=description, + provide_run_summary=provide_run_summary, + final_answer_checks=final_answer_checks, + **kwargs + ) + diff --git a/src/agent/deep_analyzer_agent/deep_analyzer_agent.py b/src/agent/deep_analyzer_agent/deep_analyzer_agent.py index 0ce22a8..b557bf9 100644 --- a/src/agent/deep_analyzer_agent/deep_analyzer_agent.py +++ b/src/agent/deep_analyzer_agent/deep_analyzer_agent.py @@ -1,282 +1,54 @@ -from typing import ( - Any, - Callable, - Optional -) -import yaml -import json -from rich.panel import Panel -from rich.text import Text - -from src.tools import AsyncTool -from src.exception import ( - AgentGenerationError, - AgentParsingError, - AgentToolExecutionError, - AgentToolCallError -) -from src.base.async_multistep_agent import (PromptTemplates, - populate_template, - AsyncMultiStepAgent) -from src.memory import (ActionStep, - ToolCall, - AgentMemory) -from src.logger import (LogLevel, - YELLOW_HEX, - logger) -from src.models import Model, parse_json_if_needed, ChatMessage -from src.utils.agent_types import ( - AgentAudio, - AgentImage, -) -from src.registry import register_agent -from src.utils import assemble_project_path - -@register_agent("deep_analyzer_agent") -class DeepAnalyzerAgent(AsyncMultiStepAgent): - def __init__( - self, - config, - tools: list[AsyncTool], - model: Model, - prompt_templates: PromptTemplates | None = None, - max_steps: int = 20, - add_base_tools: bool = False, - verbosity_level: LogLevel = LogLevel.INFO, - grammar: dict[str, str] | None = None, - managed_agents: list | None = None, - step_callbacks: list[Callable] | None = None, - planning_interval: int | None = None, - name: str | None = None, - description: str | None = None, - provide_run_summary: bool = False, - final_answer_checks: list[Callable] | None = None, - **kwargs - ): - self.config = config - - super(DeepAnalyzerAgent, self).__init__( - tools=tools, - model=model, - prompt_templates=prompt_templates, - max_steps=max_steps, - add_base_tools=add_base_tools, - verbosity_level=verbosity_level, - grammar= grammar, - managed_agents=managed_agents, - step_callbacks=step_callbacks, - planning_interval=planning_interval, - name=name, - description=description, - provide_run_summary=provide_run_summary, - final_answer_checks=final_answer_checks, - ) - - template_path = assemble_project_path(self.config.template_path) - with open(template_path, "r") as f: - self.prompt_templates = yaml.safe_load(f) - - self.system_prompt = self.initialize_system_prompt() - self.user_prompt = self.initialize_user_prompt() - - self.memory = AgentMemory( - system_prompt=self.system_prompt, - user_prompt=self.user_prompt, - ) - - def initialize_system_prompt(self) -> str: - """Initialize the system prompt for the agent.""" - system_prompt = populate_template( - self.prompt_templates["system_prompt"], - variables={"tools": self.tools, "managed_agents": self.managed_agents}, - ) - return system_prompt - - def initialize_user_prompt(self) -> str: - - user_prompt = populate_template( - self.prompt_templates["user_prompt"], - variables={}, - ) - - return user_prompt - - def initialize_task_instruction(self) -> str: - """Initialize the task instruction for the agent.""" - task_instruction = populate_template( - self.prompt_templates["task_instruction"], - variables={"task": self.task}, - ) - return task_instruction - - def _substitute_state_variables(self, arguments: dict[str, str] | str) -> dict[str, Any] | str: - """Replace string values in arguments with their corresponding state values if they exist.""" - if isinstance(arguments, dict): - return { - key: self.state.get(value, value) if isinstance(value, str) else value - for key, value in arguments.items() - } - return arguments - - async def execute_tool_call(self, tool_name: str, arguments: dict[str, str] | str) -> Any: - """ - Execute a tool or managed agent with the provided arguments. - - The arguments are replaced with the actual values from the state if they refer to state variables. - - Args: - tool_name (`str`): Name of the tool or managed agent to execute. - arguments (dict[str, str] | str): Arguments passed to the tool call. - """ - # Check if the tool exists - available_tools = {**self.tools, **self.managed_agents} - if tool_name not in available_tools: - raise AgentToolExecutionError( - f"Unknown tool {tool_name}, should be one of: {', '.join(available_tools)}.", self.logger - ) - - # Get the tool and substitute state variables in arguments - tool = available_tools[tool_name] - arguments = self._substitute_state_variables(arguments) - is_managed_agent = tool_name in self.managed_agents - - try: - # Call tool with appropriate arguments - if isinstance(arguments, dict): - return await tool(**arguments) if is_managed_agent else await tool(**arguments, sanitize_inputs_outputs=True) - elif isinstance(arguments, str): - return await tool(arguments) if is_managed_agent else await tool(arguments, sanitize_inputs_outputs=True) - else: - raise TypeError(f"Unsupported arguments type: {type(arguments)}") - - except TypeError as e: - # Handle invalid arguments - description = getattr(tool, "description", "No description") - if is_managed_agent: - error_msg = ( - f"Invalid request to team member '{tool_name}' with arguments {json.dumps(arguments, ensure_ascii=False)}: {e}\n" - "You should call this team member with a valid request.\n" - f"Team member description: {description}" - ) - else: - error_msg = ( - f"Invalid call to tool '{tool_name}' with arguments {json.dumps(arguments, ensure_ascii=False)}: {e}\n" - "You should call this tool with correct input arguments.\n" - f"Expected inputs: {json.dumps(tool.parameters)}\n" - f"Returns output type: {tool.output_type}\n" - f"Tool description: '{description}'" - ) - raise AgentToolCallError(error_msg, self.logger) from e - - except Exception as e: - # Handle execution errors - if is_managed_agent: - error_msg = ( - f"Error executing request to team member '{tool_name}' with arguments {json.dumps(arguments)}: {e}\n" - "Please try again or request to another team member" - ) - else: - error_msg = ( - f"Error executing tool '{tool_name}' with arguments {json.dumps(arguments)}: {type(e).__name__}: {e}\n" - "Please try again or use another tool" - ) - raise AgentToolExecutionError(error_msg, self.logger) from e - - async def step(self, memory_step: ActionStep) -> None | Any: - """ - Perform one step in the ReAct framework: the agent thinks, acts, and observes the result. - Returns None if the step is not final. - """ - memory_messages = await self.write_memory_to_messages() - - input_messages = memory_messages.copy() - - # Add new step in logs - memory_step.model_input_messages = input_messages - - try: - chat_message: ChatMessage = await self.model( - input_messages, - stop_sequences=["Observation:", "Calling tools:"], - tools_to_call_from=list(self.tools.values()), - ) - memory_step.model_output_message = chat_message - model_output = chat_message.content - self.logger.log_markdown( - content=model_output if model_output else str(chat_message.raw), - title="Output message of the LLM:", - level=LogLevel.DEBUG, - ) - - memory_step.model_output_message.content = model_output - memory_step.model_output = model_output - except Exception as e: - raise AgentGenerationError(f"Error while generating output:\n{e}", self.logger) from e - - if chat_message.tool_calls is None or len(chat_message.tool_calls) == 0: - try: - chat_message = self.model.parse_tool_calls(chat_message) - except Exception as e: - raise AgentParsingError(f"Error while parsing tool call from model output: {e}", self.logger) - else: - for tool_call in chat_message.tool_calls: - tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments) - - tool_call = chat_message.tool_calls[0] - tool_name, tool_call_id = tool_call.function.name, tool_call.id - tool_arguments = tool_call.function.arguments - memory_step.model_output = str(f"Called Tool: '{tool_name}' with arguments: {tool_arguments}") - memory_step.tool_calls = [ToolCall(name=tool_name, arguments=tool_arguments, id=tool_call_id)] - - # Execute - self.logger.log( - Panel(Text(f"Calling tool: '{tool_name}' with arguments: {tool_arguments}")), - level=LogLevel.INFO, - ) - if tool_name == "final_answer": - if isinstance(tool_arguments, dict): - if "result" in tool_arguments: - result = tool_arguments["result"] - else: - result = tool_arguments - else: - result = tool_arguments - if ( - isinstance(result, str) and result in self.state.keys() - ): # if the answer is a state variable, return the value - final_result = self.state[result] - self.logger.log( - f"[bold {YELLOW_HEX}]Final answer:[/bold {YELLOW_HEX}] Extracting key '{result}' from state to return value '{final_result}'.", - level=LogLevel.INFO, - ) - else: - final_result = result - self.logger.log( - Text(f"Final result: {final_result}", style=f"bold {YELLOW_HEX}"), - level=LogLevel.INFO, - ) - - memory_step.action_output = final_result - return final_result - else: - if tool_arguments is None: - tool_arguments = {} - observation = await self.execute_tool_call(tool_name, tool_arguments) - observation_type = type(observation) - if observation_type in [AgentImage, AgentAudio]: - if observation_type == AgentImage: - observation_name = "image.png" - elif observation_type == AgentAudio: - observation_name = "audio.mp3" - # TODO: observation naming could allow for different names of same type - - self.state[observation_name] = observation - updated_information = f"Stored '{observation_name}' in memory." - else: - updated_information = str(observation).strip() - self.logger.log( - f"Observations: {updated_information.replace('[', '|')}", # escape potential rich-tag-like components - level=LogLevel.INFO, - ) - memory_step.observations = updated_information - return None \ No newline at end of file +from typing import ( + Any, + Callable, + Optional +) + +from src.tools import AsyncTool +from src.logger import LogLevel +from src.models import Model +from src.registry import register_agent +from src.agent.base_agent import BaseAgent +from src.utils import assemble_project_path + + +@register_agent("deep_analyzer_agent") +class DeepAnalyzerAgent(BaseAgent): + AGENT_NAME = "deep_analyzer_agent" + + def __init__( + self, + config, + tools: list[AsyncTool], + model: Model, + max_steps: int = 20, + add_base_tools: bool = False, + verbosity_level: LogLevel = LogLevel.INFO, + grammar: dict[str, str] | None = None, + managed_agents: list | None = None, + step_callbacks: list[Callable] | None = None, + planning_interval: int | None = None, + description: str | None = None, + provide_run_summary: bool = False, + final_answer_checks: list[Callable] | None = None, + **kwargs + ): + prompt_templates_path = assemble_project_path(config.template_path) + super().__init__( + config=config, + tools=tools, + model=model, + prompt_templates_path=prompt_templates_path, + max_steps=max_steps, + add_base_tools=add_base_tools, + verbosity_level=verbosity_level, + grammar=grammar, + managed_agents=managed_agents, + step_callbacks=step_callbacks, + planning_interval=planning_interval, + description=description, + provide_run_summary=provide_run_summary, + final_answer_checks=final_answer_checks, + **kwargs + ) + diff --git a/src/agent/deep_researcher_agent/deep_researcher_agent.py b/src/agent/deep_researcher_agent/deep_researcher_agent.py index 7608a90..241954a 100644 --- a/src/agent/deep_researcher_agent/deep_researcher_agent.py +++ b/src/agent/deep_researcher_agent/deep_researcher_agent.py @@ -1,282 +1,55 @@ -from typing import ( - Any, - Callable, - Optional -) -import yaml -import json -from rich.panel import Panel -from rich.text import Text - -from src.tools import AsyncTool -from src.exception import ( - AgentGenerationError, - AgentParsingError, - AgentToolExecutionError, - AgentToolCallError -) -from src.base.async_multistep_agent import (PromptTemplates, - populate_template, - AsyncMultiStepAgent) -from src.memory import (ActionStep, - ToolCall, - AgentMemory) -from src.logger import (LogLevel, - YELLOW_HEX, - logger) -from src.models import Model, parse_json_if_needed, ChatMessage -from src.utils.agent_types import ( - AgentAudio, - AgentImage, -) -from src.registry import register_agent -from src.utils import assemble_project_path - -@register_agent("deep_researcher_agent") -class DeepResearcherAgent(AsyncMultiStepAgent): - def __init__( - self, - config, - tools: list[AsyncTool], - model: Model, - prompt_templates: PromptTemplates | None = None, - max_steps: int = 20, - add_base_tools: bool = False, - verbosity_level: LogLevel = LogLevel.INFO, - grammar: dict[str, str] | None = None, - managed_agents: list | None = None, - step_callbacks: list[Callable] | None = None, - planning_interval: int | None = None, - name: str | None = None, - description: str | None = None, - provide_run_summary: bool = False, - final_answer_checks: list[Callable] | None = None, - **kwargs - ): - self.config = config - - super(DeepResearcherAgent, self).__init__( - tools=tools, - model=model, - prompt_templates=prompt_templates, - max_steps=max_steps, - add_base_tools=add_base_tools, - verbosity_level=verbosity_level, - grammar= grammar, - managed_agents=managed_agents, - step_callbacks=step_callbacks, - planning_interval=planning_interval, - name=name, - description=description, - provide_run_summary=provide_run_summary, - final_answer_checks=final_answer_checks, - ) - - template_path = assemble_project_path(self.config.template_path) - with open(template_path, "r") as f: - self.prompt_templates = yaml.safe_load(f) - - self.system_prompt = self.initialize_system_prompt() - self.user_prompt = self.initialize_user_prompt() - - self.memory = AgentMemory( - system_prompt=self.system_prompt, - user_prompt=self.user_prompt, - ) - - def initialize_system_prompt(self) -> str: - """Initialize the system prompt for the agent.""" - system_prompt = populate_template( - self.prompt_templates["system_prompt"], - variables={"tools": self.tools, "managed_agents": self.managed_agents}, - ) - return system_prompt - - def initialize_user_prompt(self) -> str: - - user_prompt = populate_template( - self.prompt_templates["user_prompt"], - variables={}, - ) - - return user_prompt - - def initialize_task_instruction(self) -> str: - """Initialize the task instruction for the agent.""" - task_instruction = populate_template( - self.prompt_templates["task_instruction"], - variables={"task": self.task}, - ) - return task_instruction - - def _substitute_state_variables(self, arguments: dict[str, str] | str) -> dict[str, Any] | str: - """Replace string values in arguments with their corresponding state values if they exist.""" - if isinstance(arguments, dict): - return { - key: self.state.get(value, value) if isinstance(value, str) else value - for key, value in arguments.items() - } - return arguments - - async def execute_tool_call(self, tool_name: str, arguments: dict[str, str] | str) -> Any: - """ - Execute a tool or managed agent with the provided arguments. - - The arguments are replaced with the actual values from the state if they refer to state variables. - - Args: - tool_name (`str`): Name of the tool or managed agent to execute. - arguments (dict[str, str] | str): Arguments passed to the tool call. - """ - # Check if the tool exists - available_tools = {**self.tools, **self.managed_agents} - if tool_name not in available_tools: - raise AgentToolExecutionError( - f"Unknown tool {tool_name}, should be one of: {', '.join(available_tools)}.", self.logger - ) - - # Get the tool and substitute state variables in arguments - tool = available_tools[tool_name] - arguments = self._substitute_state_variables(arguments) - is_managed_agent = tool_name in self.managed_agents - - try: - # Call tool with appropriate arguments - if isinstance(arguments, dict): - return await tool(**arguments) if is_managed_agent else await tool(**arguments, sanitize_inputs_outputs=True) - elif isinstance(arguments, str): - return await tool(arguments) if is_managed_agent else await tool(arguments, sanitize_inputs_outputs=True) - else: - raise TypeError(f"Unsupported arguments type: {type(arguments)}") - - except TypeError as e: - # Handle invalid arguments - description = getattr(tool, "description", "No description") - if is_managed_agent: - error_msg = ( - f"Invalid request to team member '{tool_name}' with arguments {json.dumps(arguments, ensure_ascii=False)}: {e}\n" - "You should call this team member with a valid request.\n" - f"Team member description: {description}" - ) - else: - error_msg = ( - f"Invalid call to tool '{tool_name}' with arguments {json.dumps(arguments, ensure_ascii=False)}: {e}\n" - "You should call this tool with correct input arguments.\n" - f"Expected inputs: {json.dumps(tool.parameters)}\n" - f"Returns output type: {tool.output_type}\n" - f"Tool description: '{description}'" - ) - raise AgentToolCallError(error_msg, self.logger) from e - - except Exception as e: - # Handle execution errors - if is_managed_agent: - error_msg = ( - f"Error executing request to team member '{tool_name}' with arguments {json.dumps(arguments)}: {e}\n" - "Please try again or request to another team member" - ) - else: - error_msg = ( - f"Error executing tool '{tool_name}' with arguments {json.dumps(arguments)}: {type(e).__name__}: {e}\n" - "Please try again or use another tool" - ) - raise AgentToolExecutionError(error_msg, self.logger) from e - - async def step(self, memory_step: ActionStep) -> None | Any: - """ - Perform one step in the ReAct framework: the agent thinks, acts, and observes the result. - Returns None if the step is not final. - """ - memory_messages = await self.write_memory_to_messages() - - input_messages = memory_messages.copy() - - # Add new step in logs - memory_step.model_input_messages = input_messages - - try: - chat_message: ChatMessage = await self.model( - input_messages, - stop_sequences=["Observation:", "Calling tools:"], - tools_to_call_from=list(self.tools.values()), - ) - memory_step.model_output_message = chat_message - model_output = chat_message.content - self.logger.log_markdown( - content=model_output if model_output else str(chat_message.raw), - title="Output message of the LLM:", - level=LogLevel.DEBUG, - ) - - memory_step.model_output_message.content = model_output - memory_step.model_output = model_output - except Exception as e: - raise AgentGenerationError(f"Error while generating output:\n{e}", self.logger) from e - - if chat_message.tool_calls is None or len(chat_message.tool_calls) == 0: - try: - chat_message = self.model.parse_tool_calls(chat_message) - except Exception as e: - raise AgentParsingError(f"Error while parsing tool call from model output: {e}", self.logger) - else: - for tool_call in chat_message.tool_calls: - tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments) - - tool_call = chat_message.tool_calls[0] - tool_name, tool_call_id = tool_call.function.name, tool_call.id - tool_arguments = tool_call.function.arguments - memory_step.model_output = str(f"Called Tool: '{tool_name}' with arguments: {tool_arguments}") - memory_step.tool_calls = [ToolCall(name=tool_name, arguments=tool_arguments, id=tool_call_id)] - - # Execute - self.logger.log( - Panel(Text(f"Calling tool: '{tool_name}' with arguments: {tool_arguments}")), - level=LogLevel.INFO, - ) - if tool_name == "final_answer": - if isinstance(tool_arguments, dict): - if "result" in tool_arguments: - result = tool_arguments["result"] - else: - result = tool_arguments - else: - result = tool_arguments - if ( - isinstance(result, str) and result in self.state.keys() - ): # if the answer is a state variable, return the value - final_result = self.state[result] - self.logger.log( - f"[bold {YELLOW_HEX}]Final answer:[/bold {YELLOW_HEX}] Extracting key '{result}' from state to return value '{final_result}'.", - level=LogLevel.INFO, - ) - else: - final_result = result - self.logger.log( - Text(f"Final result: {final_result}", style=f"bold {YELLOW_HEX}"), - level=LogLevel.INFO, - ) - - memory_step.action_output = final_result - return final_result - else: - if tool_arguments is None: - tool_arguments = {} - observation = await self.execute_tool_call(tool_name, tool_arguments) - observation_type = type(observation) - if observation_type in [AgentImage, AgentAudio]: - if observation_type == AgentImage: - observation_name = "image.png" - elif observation_type == AgentAudio: - observation_name = "audio.mp3" - # TODO: observation naming could allow for different names of same type - - self.state[observation_name] = observation - updated_information = f"Stored '{observation_name}' in memory." - else: - updated_information = str(observation).strip() - self.logger.log( - f"Observations: {updated_information.replace('[', '|')}", # escape potential rich-tag-like components - level=LogLevel.INFO, - ) - memory_step.observations = updated_information - return None \ No newline at end of file +from typing import ( + Any, + Callable, + Optional +) + +from src.tools import AsyncTool +from src.logger import LogLevel +from src.models import Model +from src.registry import register_agent +from src.agent.base_agent import BaseAgent +from src.utils import assemble_project_path + + +@register_agent("deep_researcher_agent") +class DeepResearcherAgent(BaseAgent): + AGENT_NAME = "deep_researcher_agent" + + def __init__( + self, + config, + tools: list[AsyncTool], + model: Model, + max_steps: int = 20, + add_base_tools: bool = False, + verbosity_level: LogLevel = LogLevel.INFO, + grammar: dict[str, str] | None = None, + managed_agents: list | None = None, + step_callbacks: list[Callable] | None = None, + planning_interval: int | None = None, + description: str | None = None, + provide_run_summary: bool = False, + final_answer_checks: list[Callable] | None = None, + **kwargs + ): + prompt_templates_path = assemble_project_path(config.template_path) + super().__init__( + config=config, + tools=tools, + model=model, + prompt_templates_path=prompt_templates_path, + max_steps=max_steps, + add_base_tools=add_base_tools, + verbosity_level=verbosity_level, + grammar=grammar, + managed_agents=managed_agents, + step_callbacks=step_callbacks, + planning_interval=planning_interval, + description=description, + provide_run_summary=provide_run_summary, + final_answer_checks=final_answer_checks, + **kwargs + ) + + \ No newline at end of file diff --git a/src/agent/planning_agent/planning_agent.py b/src/agent/planning_agent/planning_agent.py index e4116ed..a2ae314 100644 --- a/src/agent/planning_agent/planning_agent.py +++ b/src/agent/planning_agent/planning_agent.py @@ -1,283 +1,93 @@ -from typing import ( - Any, - Callable, - Optional -) -import json -import yaml -from rich.panel import Panel -from rich.text import Text - -from src.tools import AsyncTool -from src.exception import ( - AgentGenerationError, - AgentParsingError, - AgentToolExecutionError, - AgentToolCallError -) -from src.base.async_multistep_agent import (PromptTemplates, - populate_template, - AsyncMultiStepAgent - ) -from src.memory import (ActionStep, - ToolCall, - AgentMemory) -from src.logger import (LogLevel, - YELLOW_HEX, - logger) -from src.models import Model, parse_json_if_needed, ChatMessage -from src.utils.agent_types import ( - AgentAudio, - AgentImage, -) -from src.registry import register_agent -from src.utils import assemble_project_path - -@register_agent("planning_agent") -class PlanningAgent(AsyncMultiStepAgent): - def __init__( - self, - config, - tools: list[AsyncTool], - model: Model, - prompt_templates: PromptTemplates | None = None, - max_steps: int = 20, - add_base_tools: bool = False, - verbosity_level: LogLevel = LogLevel.INFO, - grammar: dict[str, str] | None = None, - managed_agents: list | None = None, - step_callbacks: list[Callable] | None = None, - planning_interval: int | None = None, - name: str | None = None, - description: str | None = None, - provide_run_summary: bool = False, - final_answer_checks: list[Callable] | None = None, - **kwargs - ): - self.config = config - - super(PlanningAgent, self).__init__( - tools=tools, - model=model, - prompt_templates=prompt_templates, - max_steps=max_steps, - add_base_tools=add_base_tools, - verbosity_level=verbosity_level, - grammar= grammar, - managed_agents=managed_agents, - step_callbacks=step_callbacks, - planning_interval=planning_interval, - name=name, - description=description, - provide_run_summary=provide_run_summary, - final_answer_checks=final_answer_checks, - ) - - template_path = assemble_project_path(self.config.template_path) - with open(template_path, "r") as f: - self.prompt_templates = yaml.safe_load(f) - - self.system_prompt = self.initialize_system_prompt() - self.user_prompt = self.initialize_user_prompt() - - self.memory = AgentMemory( - system_prompt=self.system_prompt, - user_prompt=self.user_prompt, - ) - - def initialize_system_prompt(self) -> str: - """Initialize the system prompt for the agent.""" - system_prompt = populate_template( - self.prompt_templates["system_prompt"], - variables={"tools": self.tools, "managed_agents": self.managed_agents}, - ) - return system_prompt - - def initialize_user_prompt(self) -> str: - - user_prompt = populate_template( - self.prompt_templates["user_prompt"], - variables={}, - ) - - return user_prompt - - def initialize_task_instruction(self) -> str: - """Initialize the task instruction for the agent.""" - task_instruction = populate_template( - self.prompt_templates["task_instruction"], - variables={"task": self.task}, - ) - return task_instruction - - def _substitute_state_variables(self, arguments: dict[str, str] | str) -> dict[str, Any] | str: - """Replace string values in arguments with their corresponding state values if they exist.""" - if isinstance(arguments, dict): - return { - key: self.state.get(value, value) if isinstance(value, str) else value - for key, value in arguments.items() - } - return arguments - - async def execute_tool_call(self, tool_name: str, arguments: dict[str, str] | str) -> Any: - """ - Execute a tool or managed agent with the provided arguments. - - The arguments are replaced with the actual values from the state if they refer to state variables. - - Args: - tool_name (`str`): Name of the tool or managed agent to execute. - arguments (dict[str, str] | str): Arguments passed to the tool call. - """ - # Check if the tool exists - available_tools = {**self.tools, **self.managed_agents} - if tool_name not in available_tools: - raise AgentToolExecutionError( - f"Unknown tool {tool_name}, should be one of: {', '.join(available_tools)}.", self.logger - ) - - # Get the tool and substitute state variables in arguments - tool = available_tools[tool_name] - arguments = self._substitute_state_variables(arguments) - is_managed_agent = tool_name in self.managed_agents - - try: - # Call tool with appropriate arguments - if isinstance(arguments, dict): - return await tool(**arguments) if is_managed_agent else await tool(**arguments, sanitize_inputs_outputs=True) - elif isinstance(arguments, str): - return await tool(arguments) if is_managed_agent else await tool(arguments, sanitize_inputs_outputs=True) - else: - raise TypeError(f"Unsupported arguments type: {type(arguments)}") - - except TypeError as e: - # Handle invalid arguments - description = getattr(tool, "description", "No description") - if is_managed_agent: - error_msg = ( - f"Invalid request to team member '{tool_name}' with arguments {json.dumps(arguments, ensure_ascii=False)}: {e}\n" - "You should call this team member with a valid request.\n" - f"Team member description: {description}" - ) - else: - error_msg = ( - f"Invalid call to tool '{tool_name}' with arguments {json.dumps(arguments, ensure_ascii=False)}: {e}\n" - "You should call this tool with correct input arguments.\n" - f"Expected inputs: {json.dumps(tool.parameters)}\n" - f"Returns output type: {tool.output_type}\n" - f"Tool description: '{description}'" - ) - raise AgentToolCallError(error_msg, self.logger) from e - - except Exception as e: - # Handle execution errors - if is_managed_agent: - error_msg = ( - f"Error executing request to team member '{tool_name}' with arguments {json.dumps(arguments)}: {e}\n" - "Please try again or request to another team member" - ) - else: - error_msg = ( - f"Error executing tool '{tool_name}' with arguments {json.dumps(arguments)}: {type(e).__name__}: {e}\n" - "Please try again or use another tool" - ) - raise AgentToolExecutionError(error_msg, self.logger) from e - - async def step(self, memory_step: ActionStep) -> None | Any: - """ - Perform one step in the ReAct framework: the agent thinks, acts, and observes the result. - Returns None if the step is not final. - """ - memory_messages = await self.write_memory_to_messages() - - input_messages = memory_messages.copy() - - # Add new step in logs - memory_step.model_input_messages = input_messages - - try: - chat_message: ChatMessage = await self.model( - input_messages, - stop_sequences=["Observation:", "Calling tools:"], - tools_to_call_from=list(self.tools.values()), - ) - memory_step.model_output_message = chat_message - model_output = chat_message.content - self.logger.log_markdown( - content=model_output if model_output else str(chat_message.raw), - title="Output message of the LLM:", - level=LogLevel.DEBUG, - ) - - memory_step.model_output_message.content = model_output - memory_step.model_output = model_output - except Exception as e: - raise AgentGenerationError(f"Error while generating output:\n{e}", self.logger) from e - - if chat_message.tool_calls is None or len(chat_message.tool_calls) == 0: - try: - chat_message = self.model.parse_tool_calls(chat_message) - except Exception as e: - raise AgentParsingError(f"Error while parsing tool call from model output: {e}", self.logger) - else: - for tool_call in chat_message.tool_calls: - tool_call.function.arguments = parse_json_if_needed(tool_call.function.arguments) - - tool_call = chat_message.tool_calls[0] - tool_name, tool_call_id = tool_call.function.name, tool_call.id - tool_arguments = tool_call.function.arguments - memory_step.model_output = str(f"Called Tool: '{tool_name}' with arguments: {tool_arguments}") - memory_step.tool_calls = [ToolCall(name=tool_name, arguments=tool_arguments, id=tool_call_id)] - - # Execute - self.logger.log( - Panel(Text(f"Calling tool: '{tool_name}' with arguments: {tool_arguments}")), - level=LogLevel.INFO, - ) - if tool_name == "final_answer": - if isinstance(tool_arguments, dict): - if "result" in tool_arguments: - result = tool_arguments["result"] - else: - result = tool_arguments - else: - result = tool_arguments - if ( - isinstance(result, str) and result in self.state.keys() - ): # if the answer is a state variable, return the value - final_result = self.state[result] - self.logger.log( - f"[bold {YELLOW_HEX}]Final answer:[/bold {YELLOW_HEX}] Extracting key '{result}' from state to return value '{final_result}'.", - level=LogLevel.INFO, - ) - else: - final_result = result - self.logger.log( - Text(f"Final result: {final_result}", style=f"bold {YELLOW_HEX}"), - level=LogLevel.INFO, - ) - - memory_step.action_output = final_result - return final_result - else: - if tool_arguments is None: - tool_arguments = {} - observation = await self.execute_tool_call(tool_name, tool_arguments) - observation_type = type(observation) - if observation_type in [AgentImage, AgentAudio]: - if observation_type == AgentImage: - observation_name = "image.png" - elif observation_type == AgentAudio: - observation_name = "audio.mp3" - # TODO: observation naming could allow for different names of same type - - self.state[observation_name] = observation - updated_information = f"Stored '{observation_name}' in memory." - else: - updated_information = str(observation).strip() - self.logger.log( - f"Observations: {updated_information.replace('[', '|')}", # escape potential rich-tag-like components - level=LogLevel.INFO, - ) - memory_step.observations = updated_information - return None \ No newline at end of file +from typing import ( + Any, + Callable, + Optional +) +# Remove imports that are now in BaseAgent or not directly needed +# import json +# import yaml +# from rich.panel import Panel +# from rich.text import Text + +from src.tools import AsyncTool +from src.exception import ( + AgentGenerationError, + AgentParsingError, + AgentToolExecutionError, + AgentToolCallError +) +from src.base.async_multistep_agent import (PromptTemplates, + populate_template, + AsyncMultiStepAgent + ) +from src.memory import (ActionStep, + ToolCall, + AgentMemory) +from src.logger import (LogLevel, + YELLOW_HEX, + logger) +from src.models import Model, parse_json_if_needed, ChatMessage +from src.utils.agent_types import ( + AgentAudio, + AgentImage, +) +from src.registry import register_agent +from src.utils import assemble_project_path + +# Import the new base class +from src.agent.base_agent import BaseAgent + + +@register_agent("planning_agent") # 使用字符串常量而不是类属性,避免前向引用问题 +class PlanningAgent(BaseAgent): + AGENT_NAME = "planning_agent" # Define a specific agent name + + def __init__( + self, + config, # Specific configuration for this agent + tools: list[AsyncTool], + model: Model, + # prompt_templates_path is now a required parameter for BaseAgent + # It will be extracted from config + max_steps: int = 20, + add_base_tools: bool = False, + verbosity_level: LogLevel = LogLevel.INFO, + grammar: dict[str, str] | None = None, + managed_agents: list | None = None, + step_callbacks: list[Callable] | None = None, + planning_interval: int | None = None, + # name: str | None = None, # Handled by BaseAgent using AGENT_NAME + description: str | None = None, + provide_run_summary: bool = False, + final_answer_checks: list[Callable] | None = None, + **kwargs + ): + # Extract prompt_templates_path from the agent's configuration + # Original code used: self.config.template_path + prompt_templates_path = assemble_project_path(config.template_path) + + super().__init__( + config=config, # Pass the full configuration to the base class + tools=tools, + model=model, + prompt_templates_path=prompt_templates_path, # Pass the path + max_steps=max_steps, + add_base_tools=add_base_tools, + verbosity_level=verbosity_level, + grammar=grammar, + managed_agents=managed_agents, + step_callbacks=step_callbacks, + planning_interval=planning_interval, + # name is handled by BaseAgent + description=description, + provide_run_summary=provide_run_summary, + final_answer_checks=final_answer_checks, + **kwargs + ) + # All other initialization (prompts, memory) is now handled by BaseAgent + # Any PlanningAgent-specific initialization can be added here + + # All other methods (initialize_system_prompt, initialize_user_prompt, + # initialize_task_instruction, _substitute_state_variables, + # execute_tool_call, step) are now inherited from BaseAgent + From 69aa0177ba1bd03f4ca211b75e1222603af61cbd Mon Sep 17 00:00:00 2001 From: hoare211 Date: Sat, 14 Jun 2025 20:27:59 +0800 Subject: [PATCH 2/2] =?UTF-8?q?refactor(agent):=20=E9=87=8D=E6=9E=84agent?= =?UTF-8?q?=E4=B8=AD=E7=9A=84prompt=E6=A8=A1=E6=9D=BF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1).重构agent中的prompt模板,减少模板中的重复描述; 2).重构base_agent的模板加载逻辑,支持多目录搜索和jinja2渲染 --- src/agent/base_agent.py | 44 +++++- .../prompts/browser_use_agent.yaml | 145 +---------------- .../prompts/deep_analyzer_agent.yaml | 145 +---------------- .../prompts/deep_researcher_agent.yaml | 145 +---------------- .../prompts/planning_agent.yaml | 145 +---------------- .../prompts/_common_agent_prompt_parts.yaml | 147 ++++++++++++++++++ 6 files changed, 209 insertions(+), 562 deletions(-) create mode 100644 src/base/prompts/_common_agent_prompt_parts.yaml diff --git a/src/agent/base_agent.py b/src/agent/base_agent.py index 70baa85..dac7bb0 100755 --- a/src/agent/base_agent.py +++ b/src/agent/base_agent.py @@ -1,3 +1,4 @@ +import os from typing import ( Any, Callable, @@ -31,7 +32,17 @@ AgentImage, ) from src.utils import assemble_project_path +from src.utils.path_utils import assemble_project_path +from src.memory.memory import AgentMemory # Remove PromptTemplates from this import +from src.logger.logger import LogLevel +import yaml +from jinja2 import Environment, FileSystemLoader, TemplateNotFound, meta, Template +from abc import ABC, abstractmethod +from typing import Any, List, Dict, Union, Optional, Type + +from src.logger import logger +from src.memory.memory import ActionStep class BaseAgent(AsyncMultiStepAgent): """Base class for agents with common logic.""" AGENT_NAME = "base_agent" # Must be overridden by subclasses @@ -79,13 +90,34 @@ def __init__( ) # Loading prompt_templates - if prompt_templates: - self.prompt_templates = prompt_templates + if prompt_templates_path: + # template_dir is the directory of the specific agent's prompt file + template_dir = os.path.dirname(prompt_templates_path) + template_filename = os.path.basename(prompt_templates_path) + + # Determine the common prompts directory relative to this file (base_agent.py) + # __file__ is .../src/agent/base_agent.py + # common_prompts_dir should be .../src/base/prompts/ + current_file_dir = os.path.dirname(os.path.abspath(__file__)) # .../src/agent + src_dir = os.path.dirname(current_file_dir) # .../src + common_prompts_dir = os.path.join(src_dir, "base", "prompts") + + # Add both the specific agent's template directory and the common prompts directory to the search path + # Also adding trim_blocks and lstrip_blocks for cleaner template output + env = Environment( + loader=FileSystemLoader(searchpath=[template_dir, common_prompts_dir]), + trim_blocks=False, + lstrip_blocks=False + ) + template = env.get_template(template_filename) + + # expanded_yaml_str = expand_jinja_macros_in_yaml(template) + # print(expanded_yaml_str) + + rendered_yaml = template.render() # You can pass variables here if needed + self.prompt_templates = yaml.safe_load(rendered_yaml) else: - abs_template_path = assemble_project_path(prompt_templates_path) - with open(abs_template_path, "r", encoding='utf-8') as f: - self.prompt_templates = yaml.safe_load(f) - + self.prompt_templates = prompt_templates self.system_prompt = self.initialize_system_prompt() self.user_prompt = self.initialize_user_prompt() diff --git a/src/agent/browser_use_agent/prompts/browser_use_agent.yaml b/src/agent/browser_use_agent/prompts/browser_use_agent.yaml index d0ecea5..6394e66 100644 --- a/src/agent/browser_use_agent/prompts/browser_use_agent.yaml +++ b/src/agent/browser_use_agent/prompts/browser_use_agent.yaml @@ -1,120 +1,7 @@ -system_prompt: |- - You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can. - To do so, you have been given access to some tools. - - The tool call you write is an action: after the tool is executed, you will get the result of the tool call as an "observation". - This Action/Observation can repeat N times, you should take several steps when needed. - - You can use the result of the previous action as input for the next action. - The observation will always be a string: it can represent a file, like "image_1.jpg". - Then you can use it as input for the next action. You can do it for instance as follows: - - Observation: "image_1.jpg" - - Action: - { - "name": "image_transformer", - "arguments": {"image": "image_1.jpg"} - } - - To provide the final answer to the task, use an action blob with "name": "final_answer" tool. It is the only way to complete the task, else you will be stuck on a loop. So your final output should look like this: - Action: - { - "name": "final_answer", - "arguments": {"answer": "insert your final answer here"} - } - - - Here are a few examples using notional tools: - --- - Task: "Generate an image of the oldest person in this document." - - Action: - { - "name": "document_qa", - "arguments": {"document": "document.pdf", "question": "Who is the oldest person mentioned?"} - } - Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland." - - Action: - { - "name": "image_generator", - "arguments": {"prompt": "A portrait of John Doe, a 55-year-old man living in Canada."} - } - Observation: "image.png" - - Action: - { - "name": "final_answer", - "arguments": "image.png" - } - - --- - Task: "What is the result of the following operation: 5 + 3 + 1294.678?" - - Action: - { - "name": "python_interpreter", - "arguments": {"code": "5 + 3 + 1294.678"} - } - Observation: 1302.678 +{% import "_common_agent_prompt_parts.yaml" as common_parts %} - Action: - { - "name": "final_answer", - "arguments": "1302.678" - } - - --- - Task: "Which city has the highest population , Guangzhou or Shanghai?" - - Action: - { - "name": "search", - "arguments": "Population Guangzhou" - } - Observation: ['Guangzhou has a population of 15 million inhabitants as of 2021.'] - - - Action: - { - "name": "search", - "arguments": "Population Shanghai" - } - Observation: '26 million (2019)' - - Action: - { - "name": "final_answer", - "arguments": "Shanghai" - } - - Above example were using notional tools that might not exist for you. You only have access to these tools: - {%- for tool in tools.values() %} - * {{ tool.name }}: {{ tool.description }} - Takes inputs: {{tool.parameters.properties}} - Returns an output of type: {{tool.output_type}} - {%- endfor %} - - {%- if managed_agents and managed_agents.values() | list %} - - You can also give tasks to team members. - Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. - Given that this team member is a real human, you should be very verbose in your task. - Here is a list of the team members that you can call: - {%- for agent in managed_agents.values() %} - * {{ agent.name }}: {{ agent.description }} - {%- endfor %} - {%- endif %} - - Here are the rules you should always follow to solve your task: - 1. ALWAYS provide a tool call, else you will fail. - 2. Always use the right arguments for the tools. Never use variable names as the action arguments, use the value instead. - 3. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself. - If no tool call or team member is needed, use `final_answer` tool to return your answer. - 4. Never re-do a tool call that you previously did with the exact same parameters. - - Now Begin! +system_prompt: |- + {{ common_parts.common_system_prompt_intro() }} task_instruction: |- You can search for the most relevant web pages and interact with them to accurately find answers to tasks. @@ -123,33 +10,13 @@ task_instruction: |- * You can also use the `python_interpreter` tool to run any code to support your analysis. Here is the task: - {{task}} + {% raw %}{{task}}{% endraw %} user_prompt: |- You should think step by step to solve the task. managed_agent: - task: |- - You're a helpful agent named '{{name}}'. - You have been submitted this task by your manager. - --- - {{task}} - --- - You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer. - - Your `final_answer` WILL HAVE to contain these parts: - ### 1. Task outcome (short version): - ### 2. Task outcome (extremely detailed version): - ### 3. Additional context (if relevant): + {{ common_parts.common_managed_agent_parts() }} - Put all these in your `final_answer` tool, everything that you do not pass as an argument to `final_answer` will be lost. - And even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback. - report: |- - Here is the final answer from your managed agent '{{name}}': - {{final_answer}} final_answer: - pre_messages: |- - An agent tried to answer a user query but it got stuck and failed to do so. You are tasked with providing an answer instead. Here is the agent's memory: - post_messages: |- - Based on the above, please provide an answer to the following user task: - {{task}} + {{ common_parts.common_final_answer_parts() }} \ No newline at end of file diff --git a/src/agent/deep_analyzer_agent/prompts/deep_analyzer_agent.yaml b/src/agent/deep_analyzer_agent/prompts/deep_analyzer_agent.yaml index 23dac4a..36076f5 100644 --- a/src/agent/deep_analyzer_agent/prompts/deep_analyzer_agent.yaml +++ b/src/agent/deep_analyzer_agent/prompts/deep_analyzer_agent.yaml @@ -1,120 +1,7 @@ -system_prompt: |- - You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can. - To do so, you have been given access to some tools. - - The tool call you write is an action: after the tool is executed, you will get the result of the tool call as an "observation". - This Action/Observation can repeat N times, you should take several steps when needed. - - You can use the result of the previous action as input for the next action. - The observation will always be a string: it can represent a file, like "image_1.jpg". - Then you can use it as input for the next action. You can do it for instance as follows: - - Observation: "image_1.jpg" - - Action: - { - "name": "image_transformer", - "arguments": {"image": "image_1.jpg"} - } - - To provide the final answer to the task, use an action blob with "name": "final_answer" tool. It is the only way to complete the task, else you will be stuck on a loop. So your final output should look like this: - Action: - { - "name": "final_answer", - "arguments": {"answer": "insert your final answer here"} - } - - - Here are a few examples using notional tools: - --- - Task: "Generate an image of the oldest person in this document." - - Action: - { - "name": "document_qa", - "arguments": {"document": "document.pdf", "question": "Who is the oldest person mentioned?"} - } - Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland." - - Action: - { - "name": "image_generator", - "arguments": {"prompt": "A portrait of John Doe, a 55-year-old man living in Canada."} - } - Observation: "image.png" - - Action: - { - "name": "final_answer", - "arguments": "image.png" - } - - --- - Task: "What is the result of the following operation: 5 + 3 + 1294.678?" +{% import "_common_agent_prompt_parts.yaml" as common_parts %} - Action: - { - "name": "python_interpreter", - "arguments": {"code": "5 + 3 + 1294.678"} - } - Observation: 1302.678 - - Action: - { - "name": "final_answer", - "arguments": "1302.678" - } - - --- - Task: "Which city has the highest population , Guangzhou or Shanghai?" - - Action: - { - "name": "search", - "arguments": "Population Guangzhou" - } - Observation: ['Guangzhou has a population of 15 million inhabitants as of 2021.'] - - - Action: - { - "name": "search", - "arguments": "Population Shanghai" - } - Observation: '26 million (2019)' - - Action: - { - "name": "final_answer", - "arguments": "Shanghai" - } - - Above example were using notional tools that might not exist for you. You only have access to these tools: - {%- for tool in tools.values() %} - * {{ tool.name }}: {{ tool.description }} - Takes inputs: {{tool.parameters.properties}} - Returns an output of type: {{tool.output_type}} - {%- endfor %} - - {%- if managed_agents and managed_agents.values() | list %} - - You can also give tasks to team members. - Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. - Given that this team member is a real human, you should be very verbose in your task. - Here is a list of the team members that you can call: - {%- for agent in managed_agents.values() %} - * {{ agent.name }}: {{ agent.description }} - {%- endfor %} - {%- endif %} - - Here are the rules you should always follow to solve your task: - 1. ALWAYS provide a tool call, else you will fail. - 2. Always use the right arguments for the tools. Never use variable names as the action arguments, use the value instead. - 3. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself. - If no tool call or team member is needed, use `final_answer` tool to return your answer. - 4. Never re-do a tool call that you previously did with the exact same parameters. - - Now Begin! +system_prompt: |- + {{ common_parts.common_system_prompt_intro() }} task_instruction: |- You can analyze and solve any task based on attached file or uri. @@ -122,33 +9,13 @@ task_instruction: |- * When the task involves calculation and statistics for attached files or data, you can use the `python_interpreter` to run code to convert the data into a table at first. And then run the code to analyze the data. Here is the task: - {{task}} + {% raw %}{{task}}{% endraw %} user_prompt: |- You should think step by step to solve the task. managed_agent: - task: |- - You're a helpful agent named '{{name}}'. - You have been submitted this task by your manager. - --- - {{task}} - --- - You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer. - - Your `final_answer` WILL HAVE to contain these parts: - ### 1. Task outcome (short version): - ### 2. Task outcome (extremely detailed version): - ### 3. Additional context (if relevant): + {{ common_parts.common_managed_agent_parts() }} - Put all these in your `final_answer` tool, everything that you do not pass as an argument to `final_answer` will be lost. - And even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback. - report: |- - Here is the final answer from your managed agent '{{name}}': - {{final_answer}} final_answer: - pre_messages: |- - An agent tried to answer a user query but it got stuck and failed to do so. You are tasked with providing an answer instead. Here is the agent's memory: - post_messages: |- - Based on the above, please provide an answer to the following user task: - {{task}} + {{ common_parts.common_final_answer_parts() }} \ No newline at end of file diff --git a/src/agent/deep_researcher_agent/prompts/deep_researcher_agent.yaml b/src/agent/deep_researcher_agent/prompts/deep_researcher_agent.yaml index 76b7ac6..9c0f194 100644 --- a/src/agent/deep_researcher_agent/prompts/deep_researcher_agent.yaml +++ b/src/agent/deep_researcher_agent/prompts/deep_researcher_agent.yaml @@ -1,120 +1,7 @@ -system_prompt: |- - You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can. - To do so, you have been given access to some tools. - - The tool call you write is an action: after the tool is executed, you will get the result of the tool call as an "observation". - This Action/Observation can repeat N times, you should take several steps when needed. - - You can use the result of the previous action as input for the next action. - The observation will always be a string: it can represent a file, like "image_1.jpg". - Then you can use it as input for the next action. You can do it for instance as follows: - - Observation: "image_1.jpg" - - Action: - { - "name": "image_transformer", - "arguments": {"image": "image_1.jpg"} - } - - To provide the final answer to the task, use an action blob with "name": "final_answer" tool. It is the only way to complete the task, else you will be stuck on a loop. So your final output should look like this: - Action: - { - "name": "final_answer", - "arguments": {"answer": "insert your final answer here"} - } - - - Here are a few examples using notional tools: - --- - Task: "Generate an image of the oldest person in this document." - - Action: - { - "name": "document_qa", - "arguments": {"document": "document.pdf", "question": "Who is the oldest person mentioned?"} - } - Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland." - - Action: - { - "name": "image_generator", - "arguments": {"prompt": "A portrait of John Doe, a 55-year-old man living in Canada."} - } - Observation: "image.png" - - Action: - { - "name": "final_answer", - "arguments": "image.png" - } - - --- - Task: "What is the result of the following operation: 5 + 3 + 1294.678?" - - Action: - { - "name": "python_interpreter", - "arguments": {"code": "5 + 3 + 1294.678"} - } - Observation: 1302.678 +{% import "_common_agent_prompt_parts.yaml" as common_parts %} - Action: - { - "name": "final_answer", - "arguments": "1302.678" - } - - --- - Task: "Which city has the highest population , Guangzhou or Shanghai?" - - Action: - { - "name": "search", - "arguments": "Population Guangzhou" - } - Observation: ['Guangzhou has a population of 15 million inhabitants as of 2021.'] - - - Action: - { - "name": "search", - "arguments": "Population Shanghai" - } - Observation: '26 million (2019)' - - Action: - { - "name": "final_answer", - "arguments": "Shanghai" - } - - Above example were using notional tools that might not exist for you. You only have access to these tools: - {%- for tool in tools.values() %} - * {{ tool.name }}: {{ tool.description }} - Takes inputs: {{tool.parameters.properties}} - Returns an output of type: {{tool.output_type}} - {%- endfor %} - - {%- if managed_agents and managed_agents.values() | list %} - - You can also give tasks to team members. - Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. - Given that this team member is a real human, you should be very verbose in your task. - Here is a list of the team members that you can call: - {%- for agent in managed_agents.values() %} - * {{ agent.name }}: {{ agent.description }} - {%- endfor %} - {%- endif %} - - Here are the rules you should always follow to solve your task: - 1. ALWAYS provide a tool call, else you will fail. - 2. Always use the right arguments for the tools. Never use variable names as the action arguments, use the value instead. - 3. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself. - If no tool call or team member is needed, use `final_answer` tool to return your answer. - 4. Never re-do a tool call that you previously did with the exact same parameters. - - Now Begin! +system_prompt: |- + {{ common_parts.common_system_prompt_intro() }} task_instruction: |- You can search for the most relevant web pages and interact with them to accurately find answers to tasks. @@ -122,33 +9,13 @@ task_instruction: |- * You can also use the `archive_searcher` tool to use Wayback Machine to find the archived version of the url and extract the key insights from it. Here is the task: - {{task}} + {% raw %}{{task}}{% endraw %} user_prompt: |- You should think step by step to solve the task. managed_agent: - task: |- - You're a helpful agent named '{{name}}'. - You have been submitted this task by your manager. - --- - {{task}} - --- - You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer. - - Your `final_answer` WILL HAVE to contain these parts: - ### 1. Task outcome (short version): - ### 2. Task outcome (extremely detailed version): - ### 3. Additional context (if relevant): + {{ common_parts.common_managed_agent_parts() }} - Put all these in your `final_answer` tool, everything that you do not pass as an argument to `final_answer` will be lost. - And even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback. - report: |- - Here is the final answer from your managed agent '{{name}}': - {{final_answer}} final_answer: - pre_messages: |- - An agent tried to answer a user query but it got stuck and failed to do so. You are tasked with providing an answer instead. Here is the agent's memory: - post_messages: |- - Based on the above, please provide an answer to the following user task: - {{task}} + {{ common_parts.common_final_answer_parts() }} \ No newline at end of file diff --git a/src/agent/planning_agent/prompts/planning_agent.yaml b/src/agent/planning_agent/prompts/planning_agent.yaml index 81d722e..06981c4 100644 --- a/src/agent/planning_agent/prompts/planning_agent.yaml +++ b/src/agent/planning_agent/prompts/planning_agent.yaml @@ -1,120 +1,7 @@ -system_prompt: |- - You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can. - To do so, you have been given access to some tools. - - The tool call you write is an action: after the tool is executed, you will get the result of the tool call as an "observation". - This Action/Observation can repeat N times, you should take several steps when needed. - - You can use the result of the previous action as input for the next action. - The observation will always be a string: it can represent a file, like "image_1.jpg". - Then you can use it as input for the next action. You can do it for instance as follows: - - Observation: "image_1.jpg" - - Action: - { - "name": "image_transformer", - "arguments": {"image": "image_1.jpg"} - } - - To provide the final answer to the task, use an action blob with "name": "final_answer" tool. It is the only way to complete the task, else you will be stuck on a loop. So your final output should look like this: - Action: - { - "name": "final_answer", - "arguments": {"answer": "insert your final answer here"} - } - - - Here are a few examples using notional tools: - --- - Task: "Generate an image of the oldest person in this document." - - Action: - { - "name": "document_qa", - "arguments": {"document": "document.pdf", "question": "Who is the oldest person mentioned?"} - } - Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland." - - Action: - { - "name": "image_generator", - "arguments": {"prompt": "A portrait of John Doe, a 55-year-old man living in Canada."} - } - Observation: "image.png" - - Action: - { - "name": "final_answer", - "arguments": "image.png" - } - - --- - Task: "What is the result of the following operation: 5 + 3 + 1294.678?" +{% import "_common_agent_prompt_parts.yaml" as common_parts %} - Action: - { - "name": "python_interpreter", - "arguments": {"code": "5 + 3 + 1294.678"} - } - Observation: 1302.678 - - Action: - { - "name": "final_answer", - "arguments": "1302.678" - } - - --- - Task: "Which city has the highest population , Guangzhou or Shanghai?" - - Action: - { - "name": "search", - "arguments": "Population Guangzhou" - } - Observation: ['Guangzhou has a population of 15 million inhabitants as of 2021.'] - - - Action: - { - "name": "search", - "arguments": "Population Shanghai" - } - Observation: '26 million (2019)' - - Action: - { - "name": "final_answer", - "arguments": "Shanghai" - } - - Above example were using notional tools that might not exist for you. You only have access to these tools: - {%- for tool in tools.values() %} - * {{ tool.name }}: {{ tool.description }} - Takes inputs: {{tool.parameters.properties}} - Returns an output of type: {{tool.output_type}} - {%- endfor %} - - {%- if managed_agents and managed_agents.values() | list %} - - You can also give tasks to team members. - Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. - Given that this team member is a real human, you should be very verbose in your task. - Here is a list of the team members that you can call: - {%- for agent in managed_agents.values() %} - * {{ agent.name }}: {{ agent.description }} - {%- endfor %} - {%- endif %} - - Here are the rules you should always follow to solve your task: - 1. ALWAYS provide a tool call, else you will fail. - 2. Always use the right arguments for the tools. Never use variable names as the action arguments, use the value instead. - 3. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself. - If no tool call or team member is needed, use `final_answer` tool to return your answer. - 4. Never re-do a tool call that you previously did with the exact same parameters. - - Now Begin! +system_prompt: |- + {{ common_parts.common_system_prompt_intro() }} task_instruction: |- You have one question to answer. It is paramount that you provide a correct answer. Give it all you can: I know for a fact that you have access to all the relevant tools and team members to solve it and find the correct answer (the answer does exist). @@ -129,33 +16,13 @@ task_instruction: |- * Run verification steps if that's needed, you must make sure you find the correct answer! Here is the task: - {{task}} + {% raw %}{{task}}{% endraw %} user_prompt: |- You should think step by step and provide a detailed plan for the task. managed_agent: - task: |- - You're a helpful agent named '{{name}}'. - You have been submitted this task by your manager. - --- - {{task}} - --- - You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer. - - Your `final_answer` WILL HAVE to contain these parts: - ### 1. Task outcome (short version): - ### 2. Task outcome (extremely detailed version): - ### 3. Additional context (if relevant): + {{ common_parts.common_managed_agent_parts() }} - Put all these in your `final_answer` tool, everything that you do not pass as an argument to `final_answer` will be lost. - And even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback. - report: |- - Here is the final answer from your managed agent '{{name}}': - {{final_answer}} final_answer: - pre_messages: |- - An agent tried to answer a user query but it got stuck and failed to do so. You are tasked with providing an answer instead. Here is the agent's memory: - post_messages: |- - Based on the above, please provide an answer to the following user task: - {{task}} + {{ common_parts.common_final_answer_parts() }} \ No newline at end of file diff --git a/src/base/prompts/_common_agent_prompt_parts.yaml b/src/base/prompts/_common_agent_prompt_parts.yaml new file mode 100644 index 0000000..32ea9db --- /dev/null +++ b/src/base/prompts/_common_agent_prompt_parts.yaml @@ -0,0 +1,147 @@ +{% macro common_system_prompt_intro() %} + You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can. + To do so, you have been given access to some tools. + + The tool call you write is an action: after the tool is executed, you will get the result of the tool call as an "observation". + This Action/Observation can repeat N times, you should take several steps when needed. + + You can use the result of the previous action as input for the next action. + The observation will always be a string: it can represent a file, like "image_1.jpg". + Then you can use it as input for the next action. You can do it for instance as follows: + + Observation: "image_1.jpg" + + Action: + { + "name": "image_transformer", + "arguments": {"image": "image_1.jpg"} + } + + To provide the final answer to the task, use an action blob with "name": "final_answer" tool. It is the only way to complete the task, else you will be stuck on a loop. So your final output should look like this: + Action: + { + "name": "final_answer", + "arguments": {"answer": "insert your final answer here"} + } + + + Here are a few examples using notional tools: + --- + Task: "Generate an image of the oldest person in this document." + + Action: + { + "name": "document_qa", + "arguments": {"document": "document.pdf", "question": "Who is the oldest person mentioned?"} + } + Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland." + + Action: + { + "name": "image_generator", + "arguments": {"prompt": "A portrait of John Doe, a 55-year-old man living in Canada."} + } + Observation: "image.png" + + Action: + { + "name": "final_answer", + "arguments": "image.png" + } + + --- + Task: "What is the result of the following operation: 5 + 3 + 1294.678?" + + Action: + { + "name": "python_interpreter", + "arguments": {"code": "5 + 3 + 1294.678"} + } + Observation: 1302.678 + + Action: + { + "name": "final_answer", + "arguments": "1302.678" + } + + --- + Task: "Which city has the highest population , Guangzhou or Shanghai?" + + Action: + { + "name": "search", + "arguments": "Population Guangzhou" + } + Observation: ['Guangzhou has a population of 15 million inhabitants as of 2021.'] + + + Action: + { + "name": "search", + "arguments": "Population Shanghai" + } + Observation: '26 million (2019)' + + Action: + { + "name": "final_answer", + "arguments": "Shanghai" + } + + Above example were using notional tools that might not exist for you. You only have access to these tools: + {% raw %}{%- for tool in tools.values() %} + * {{ tool.name }}: {{ tool.description }} + Takes inputs: {{tool.parameters.properties}} + Returns an output of type: {{tool.output_type}} + {%- endfor %}{% endraw %} + + {% raw %}{%- if managed_agents and managed_agents.values() | list %} + + You can also give tasks to team members. + Calling a team member works the same as for calling a tool: simply, the only argument you can give in the call is 'task', a long string explaining your task. + Given that this team member is a real human, you should be very verbose in your task. + Here is a list of the team members that you can call: + {%- for agent in managed_agents.values() %} + * {{ agent.name }}: {{ agent.description }} + {%- endfor %} + {%- endif %}{% endraw %} + + Here are the rules you should always follow to solve your task: + 1. ALWAYS provide a tool call, else you will fail. + 2. Always use the right arguments for the tools. Never use variable names as the action arguments, use the value instead. + 3. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself. + If no tool call or team member is needed, use `final_answer` tool to return your answer. + 4. Never re-do a tool call that you previously did with the exact same parameters. + + Now Begin! +{% endmacro %} + +{% macro common_managed_agent_parts() %} + task: |- + You're a helpful agent named '{% raw %}{{name}}{% endraw %}'. + You have been submitted this task by your manager. + --- + {% raw %}{{task}}{% endraw %} + --- + You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer. + + Your `final_answer` WILL HAVE to contain these parts: + ### 1. Task outcome (short version): + ### 2. Task outcome (extremely detailed version): + ### 3. Additional context (if relevant): + + Put all these in your `final_answer` tool, everything that you do not pass as an argument to `final_answer` will be lost. + And even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback. + report: |- + Here is the final answer from your managed agent '{% raw %}{{name}}{% endraw %}': + {% raw %}{{final_answer}}{% endraw %} +{% endmacro %} + +{% macro common_final_answer_parts() %} + pre_messages: |- + An agent tried to answer a user query but it got stuck and failed to do so. You are tasked with providing an answer instead. Here is the agent's memory: + post_messages: |- + Based on the above, please provide an answer to the following user task: + {% raw %}{{task}}{% endraw %} +{% endmacro %} \ No newline at end of file