Skip to content

Commit f941819

Browse files
committed
opt deep research
1 parent eba5788 commit f941819

File tree

7 files changed

+92
-93
lines changed

7 files changed

+92
-93
lines changed

src/agent/deep_research/deep_research_agent.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
SEARCH_INFO_FILENAME = "search_info.json"
4141

4242
_AGENT_STOP_FLAGS = {}
43+
_BROWSER_AGENT_INSTANCES = {}
4344

4445

4546
async def run_single_browser_task(
@@ -129,6 +130,7 @@ async def run_single_browser_task(
129130

130131
# Store instance for potential stop() call
131132
task_key = f"{task_id}_{uuid.uuid4()}"
133+
_BROWSER_AGENT_INSTANCES[task_key] = bu_agent_instance
132134

133135
# --- Run with Stop Check ---
134136
# BrowserUseAgent needs to internally check a stop signal or have a stop method.
@@ -173,6 +175,9 @@ async def run_single_browser_task(
173175
except Exception as e:
174176
logger.error(f"Error closing browser: {e}")
175177

178+
if task_key in _BROWSER_AGENT_INSTANCES:
179+
del _BROWSER_AGENT_INSTANCES[task_key]
180+
176181

177182
class BrowserSearchInput(BaseModel):
178183
queries: List[str] = Field(
@@ -257,7 +262,7 @@ def create_browser_search_tool(
257262
name="parallel_browser_search",
258263
description=f"""Use this tool to actively search the web for information related to a specific research task or question.
259264
It runs up to {max_parallel_browsers} searches in parallel using a browser agent for better results than simple scraping.
260-
Provide a list of distinct search queries that are likely to yield relevant information.""",
265+
Provide a list of distinct search queries(up to {max_parallel_browsers}) that are likely to yield relevant information.""",
261266
args_schema=BrowserSearchInput,
262267
)
263268

@@ -296,9 +301,8 @@ class DeepResearchState(TypedDict):
296301
def _load_previous_state(task_id: str, output_dir: str) -> Dict[str, Any]:
297302
"""Loads state from files if they exist."""
298303
state_updates = {}
299-
plan_file = os.path.join(output_dir, task_id, PLAN_FILENAME)
300-
search_file = os.path.join(output_dir, task_id, SEARCH_INFO_FILENAME)
301-
304+
plan_file = os.path.join(output_dir, PLAN_FILENAME)
305+
search_file = os.path.join(output_dir, SEARCH_INFO_FILENAME)
302306
if os.path.exists(plan_file):
303307
try:
304308
with open(plan_file, 'r', encoding='utf-8') as f:
@@ -307,9 +311,9 @@ def _load_previous_state(task_id: str, output_dir: str) -> Dict[str, Any]:
307311
step = 1
308312
for line in f:
309313
line = line.strip()
310-
if line.startswith(("[x]", "[ ]")):
311-
status = "completed" if line.startswith("[x]") else "pending"
312-
task = line[4:].strip()
314+
if line.startswith(("- [x]", "- [ ]")):
315+
status = "completed" if line.startswith("- [x]") else "pending"
316+
task = line[5:].strip()
313317
plan.append(
314318
ResearchPlanItem(step=step, task=task, status=status, queries=None, result_summary=None))
315319
step += 1
@@ -321,7 +325,6 @@ def _load_previous_state(task_id: str, output_dir: str) -> Dict[str, Any]:
321325
except Exception as e:
322326
logger.error(f"Failed to load or parse research plan {plan_file}: {e}")
323327
state_updates['error_message'] = f"Failed to load research plan: {e}"
324-
325328
if os.path.exists(search_file):
326329
try:
327330
with open(search_file, 'r', encoding='utf-8') as f:
@@ -342,7 +345,7 @@ def _save_plan_to_md(plan: List[ResearchPlanItem], output_dir: str):
342345
with open(plan_file, 'w', encoding='utf-8') as f:
343346
f.write("# Research Plan\n\n")
344347
for item in plan:
345-
marker = "[x]" if item['status'] == 'completed' else "[ ]"
348+
marker = "- [x]" if item['status'] == 'completed' else "- [ ]"
346349
f.write(f"{marker} {item['task']}\n")
347350
logger.info(f"Research plan saved to {plan_file}")
348351
except Exception as e:
@@ -545,8 +548,6 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
545548
stop_event = _AGENT_STOP_FLAGS.get(task_id)
546549
if stop_event and stop_event.is_set():
547550
logger.info(f"Stop requested before executing tool: {tool_name}")
548-
# How to report this back? Maybe skip execution, return special state?
549-
# Let's update state and return stop_requested = True
550551
current_step['status'] = 'pending' # Not completed due to stop
551552
_save_plan_to_md(plan, output_dir)
552553
return {"stop_requested": True, "research_plan": plan}
@@ -668,7 +669,8 @@ async def synthesis_node(state: DeepResearchState) -> Dict[str, Any]:
668669
# Prepare the research plan context
669670
plan_summary = "\nResearch Plan Followed:\n"
670671
for item in plan:
671-
marker = "[x]" if item['status'] == 'completed' else "[?]" if item['status'] == 'failed' else "[ ]"
672+
marker = "- [x]" if item['status'] == 'completed' else "- [ ] (Failed)" if item[
673+
'status'] == 'failed' else "- [ ]"
672674
plan_summary += f"{marker} {item['task']}\n"
673675

674676
synthesis_prompt = ChatPromptTemplate.from_messages([
@@ -745,7 +747,7 @@ def should_continue(state: DeepResearchState) -> str:
745747
return "end_run" # Should not happen if planning node ran correctly
746748

747749
# Check if there are pending steps in the plan
748-
if current_index < 2:
750+
if current_index < len(plan):
749751
logger.info(
750752
f"Plan has pending steps (current index {current_index}/{len(plan)}). Routing to Research Execution.")
751753
return "execute_research"
@@ -956,7 +958,25 @@ async def run(self, topic: str, task_id: Optional[str] = None, save_dir: str = "
956958
"final_state": final_state if final_state else {} # Return the final state dict
957959
}
958960

959-
def stop(self):
961+
async def _stop_lingering_browsers(self, task_id):
962+
"""Attempts to stop any BrowserUseAgent instances associated with the task_id."""
963+
keys_to_stop = [key for key in _BROWSER_AGENT_INSTANCES if key.startswith(f"{task_id}_")]
964+
if not keys_to_stop:
965+
return
966+
967+
logger.warning(
968+
f"Found {len(keys_to_stop)} potentially lingering browser agents for task {task_id}. Attempting stop...")
969+
for key in keys_to_stop:
970+
agent_instance = _BROWSER_AGENT_INSTANCES.get(key)
971+
try:
972+
if agent_instance:
973+
# Assuming BU agent has an async stop method
974+
await agent_instance.stop()
975+
logger.info(f"Called stop() on browser agent instance {key}")
976+
except Exception as e:
977+
logger.error(f"Error calling stop() on browser agent instance {key}: {e}")
978+
979+
async def stop(self):
960980
"""Signals the currently running agent task to stop."""
961981
if not self.current_task_id or not self.stop_event:
962982
logger.info("No agent task is currently running.")
@@ -965,6 +985,7 @@ def stop(self):
965985
logger.info(f"Stop requested for task ID: {self.current_task_id}")
966986
self.stop_event.set() # Signal the stop event
967987
self.stopped = True
988+
await self._stop_lingering_browsers(self.current_task_id)
968989

969990
def close(self):
970991
self.stopped = False

src/utils/config.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,13 @@
1616
"openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "o3-mini"],
1717
"deepseek": ["deepseek-chat", "deepseek-reasoner"],
1818
"google": ["gemini-2.0-flash", "gemini-2.0-flash-thinking-exp", "gemini-1.5-flash-latest",
19-
"gemini-1.5-flash-8b-latest", "gemini-2.0-flash-thinking-exp-01-21", "gemini-2.0-pro-exp-02-05"],
19+
"gemini-1.5-flash-8b-latest", "gemini-2.0-flash-thinking-exp-01-21", "gemini-2.0-pro-exp-02-05",
20+
"gemini-2.5-pro-preview-03-25", "gemini-2.5-flash-preview-04-17"],
2021
"ollama": ["qwen2.5:7b", "qwen2.5:14b", "qwen2.5:32b", "qwen2.5-coder:14b", "qwen2.5-coder:32b", "llama2:7b",
2122
"deepseek-r1:14b", "deepseek-r1:32b"],
2223
"azure_openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"],
2324
"mistral": ["pixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"],
24-
"alibaba": ["qwen-plus", "qwen-max", "qwen-turbo", "qwen-long"],
25+
"alibaba": ["qwen-plus", "qwen-max", "qwen-vl-max", "qwen-vl-plus", "qwen-turbo", "qwen-long"],
2526
"moonshot": ["moonshot-v1-32k-vision-preview", "moonshot-v1-8k-vision-preview"],
2627
"unbound": ["gemini-2.0-flash", "gpt-4o-mini", "gpt-4o", "gpt-4.5-preview"],
2728
"siliconflow": [

src/utils/llm_provider.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -265,23 +265,6 @@ def get_llm_model(provider: str, **kwargs):
265265
azure_endpoint=base_url,
266266
api_key=api_key,
267267
)
268-
elif provider == "bedrock":
269-
if not kwargs.get("base_url", ""):
270-
access_key_id = os.getenv('AWS_ACCESS_KEY_ID', '')
271-
else:
272-
access_key_id = kwargs.get("base_url")
273-
274-
if not kwargs.get("api_key", ""):
275-
api_key = os.getenv('AWS_SECRET_ACCESS_KEY', '')
276-
else:
277-
api_key = kwargs.get("api_key")
278-
return ChatBedrock(
279-
model=kwargs.get("model_name", 'anthropic.claude-3-5-sonnet-20241022-v2:0'),
280-
region=kwargs.get("bedrock_region", 'us-west-2'), # with higher quota
281-
aws_access_key_id=SecretStr(access_key_id),
282-
aws_secret_access_key=SecretStr(api_key),
283-
temperature=kwargs.get("temperature", 0.0),
284-
)
285268
elif provider == "alibaba":
286269
if not kwargs.get("base_url", ""):
287270
base_url = os.getenv("ALIBABA_ENDPOINT", "https://dashscope.aliyuncs.com/compatible-mode/v1")

src/webui/components/deep_research_agent_tab.py

Lines changed: 33 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ async def run_deep_research(webui_manager: WebuiManager, components: Dict[Compon
8484
return
8585

8686
# Store base save dir for stop handler
87-
webui_manager._dr_save_dir = base_save_dir
87+
webui_manager.dr_save_dir = base_save_dir
8888
os.makedirs(base_save_dir, exist_ok=True)
8989

9090
# --- 2. Initial UI Update ---
@@ -141,29 +141,29 @@ def get_setting(tab: str, key: str, default: Any = None):
141141
}
142142

143143
# --- 4. Initialize or Get Agent ---
144-
if not webui_manager._dr_agent:
145-
webui_manager._dr_agent = DeepResearchAgent(
144+
if not webui_manager.dr_agent:
145+
webui_manager.dr_agent = DeepResearchAgent(
146146
llm=llm,
147147
browser_config=browser_config_dict,
148148
mcp_server_config=mcp_config
149149
)
150150
logger.info("DeepResearchAgent initialized.")
151151

152152
# --- 5. Start Agent Run ---
153-
agent_run_coro = await webui_manager._dr_agent.run(
153+
agent_run_coro = webui_manager.dr_agent.run(
154154
topic=task_topic,
155155
task_id=task_id_to_resume,
156156
save_dir=base_save_dir,
157157
max_parallel_browsers=max_parallel_agents
158158
)
159159
agent_task = asyncio.create_task(agent_run_coro)
160-
webui_manager._dr_current_task = agent_task
160+
webui_manager.dr_current_task = agent_task
161161

162162
# Wait briefly for the agent to start and potentially create the task ID/folder
163163
await asyncio.sleep(1.0)
164164

165165
# Determine the actual task ID being used (agent sets this)
166-
running_task_id = webui_manager._dr_agent.current_task_id
166+
running_task_id = webui_manager.dr_agent.current_task_id
167167
if not running_task_id:
168168
# Agent might not have set it yet, try to get from result later? Risky.
169169
# Or derive from resume_task_id if provided?
@@ -176,7 +176,7 @@ def get_setting(tab: str, key: str, default: Any = None):
176176
else:
177177
logger.info(f"Agent started with Task ID: {running_task_id}")
178178

179-
webui_manager._dr_task_id = running_task_id # Store for stop handler
179+
webui_manager.dr_task_id = running_task_id # Store for stop handler
180180

181181
# --- 6. Monitor Progress via research_plan.md ---
182182
if running_task_id:
@@ -187,12 +187,11 @@ def get_setting(tab: str, key: str, default: Any = None):
187187
else:
188188
logger.warning("Cannot monitor plan file: Task ID unknown.")
189189
plan_file_path = None
190-
190+
last_plan_content = None
191191
while not agent_task.done():
192192
update_dict = {}
193-
194-
# Check for stop signal (agent sets self.stopped)
195-
agent_stopped = getattr(webui_manager._dr_agent, 'stopped', False)
193+
update_dict[resume_task_id_comp] = gr.update(value=running_task_id)
194+
agent_stopped = getattr(webui_manager.dr_agent, 'stopped', False)
196195
if agent_stopped:
197196
logger.info("Stop signal detected from agent state.")
198197
break # Exit monitoring loop
@@ -204,7 +203,8 @@ def get_setting(tab: str, key: str, default: Any = None):
204203
if current_mtime > last_plan_mtime:
205204
logger.info(f"Detected change in {plan_file_path}")
206205
plan_content = _read_file_safe(plan_file_path)
207-
if plan_content is not None and plan_content != last_plan_content:
206+
if last_plan_content is None or (
207+
plan_content is not None and plan_content != last_plan_content):
208208
update_dict[markdown_display_comp] = gr.update(value=plan_content)
209209
last_plan_content = plan_content
210210
last_plan_mtime = current_mtime
@@ -230,7 +230,7 @@ def get_setting(tab: str, key: str, default: Any = None):
230230
# Try to get task ID from result if not known before
231231
if not running_task_id and final_result_dict and 'task_id' in final_result_dict:
232232
running_task_id = final_result_dict['task_id']
233-
webui_manager._dr_task_id = running_task_id
233+
webui_manager.dr_task_id = running_task_id
234234
task_specific_dir = os.path.join(base_save_dir, str(running_task_id))
235235
report_file_path = os.path.join(task_specific_dir, "report.md")
236236
logger.info(f"Task ID confirmed from result: {running_task_id}")
@@ -268,22 +268,14 @@ def get_setting(tab: str, key: str, default: Any = None):
268268

269269
finally:
270270
# --- 8. Final UI Reset ---
271-
webui_manager._dr_current_task = None # Clear task reference
272-
webui_manager._dr_task_id = None # Clear running task ID
273-
# Optionally close agent resources if needed, e.g., browser pool
274-
if webui_manager._dr_agent and hasattr(webui_manager._dr_agent, 'close'):
275-
try:
276-
await webui_manager._dr_agent.close() # Assuming an async close method
277-
logger.info("Closed DeepResearchAgent resources.")
278-
webui_manager._dr_agent = None
279-
except Exception as e_close:
280-
logger.error(f"Error closing DeepResearchAgent: {e_close}")
271+
webui_manager.dr_current_task = None # Clear task reference
272+
webui_manager.dr_task_id = None # Clear running task ID
281273

282274
yield {
283275
start_button_comp: gr.update(value="▶️ Run", interactive=True),
284276
stop_button_comp: gr.update(interactive=False),
285277
research_task_comp: gr.update(interactive=True),
286-
resume_task_id_comp: gr.update(interactive=True),
278+
resume_task_id_comp: gr.update(value="", interactive=True),
287279
parallel_num_comp: gr.update(interactive=True),
288280
save_dir_comp: gr.update(interactive=True),
289281
# Keep download button enabled if file exists
@@ -295,10 +287,10 @@ def get_setting(tab: str, key: str, default: Any = None):
295287
async def stop_deep_research(webui_manager: WebuiManager) -> Dict[Component, Any]:
296288
"""Handles the Stop button click."""
297289
logger.info("Stop button clicked for Deep Research.")
298-
agent = webui_manager._dr_agent
299-
task = webui_manager._dr_current_task
300-
task_id = webui_manager._dr_task_id
301-
base_save_dir = webui_manager._dr_save_dir
290+
agent = webui_manager.dr_agent
291+
task = webui_manager.dr_current_task
292+
task_id = webui_manager.dr_task_id
293+
base_save_dir = webui_manager.dr_save_dir
302294

303295
stop_button_comp = webui_manager.get_component_by_id("deep_research_agent.stop_button")
304296
start_button_comp = webui_manager.get_component_by_id("deep_research_agent.start_button")
@@ -311,15 +303,11 @@ async def stop_deep_research(webui_manager: WebuiManager) -> Dict[Component, Any
311303

312304
if agent and task and not task.done():
313305
logger.info("Signalling DeepResearchAgent to stop.")
314-
if hasattr(agent, 'stop'):
315-
try:
316-
# Assuming stop is synchronous or sets a flag quickly
317-
agent.stop()
318-
except Exception as e:
319-
logger.error(f"Error calling agent.stop(): {e}")
320-
else:
321-
logger.warning("Agent has no 'stop' method. Task cancellation might not be graceful.")
322-
# Task cancellation is handled by the run_deep_research finally block if needed
306+
try:
307+
# Assuming stop is synchronous or sets a flag quickly
308+
await agent.stop()
309+
except Exception as e:
310+
logger.error(f"Error calling agent.stop(): {e}")
323311

324312
# The run_deep_research loop should detect the stop and exit.
325313
# We yield an intermediate "Stopping..." state. The final reset is done by run_deep_research.
@@ -393,7 +381,7 @@ def create_deep_research_agent_tab(webui_manager: WebuiManager):
393381

394382
with gr.Group():
395383
research_task = gr.Textbox(label="Research Task", lines=5,
396-
value="Give me a detailed plan for traveling to Switzerland on June 1st.",
384+
value="Give me a detailed travel plan to Switzerland from June 1st to 10th.",
397385
interactive=True)
398386
with gr.Row():
399387
resume_task_id = gr.Textbox(label="Resume Task ID", value="",
@@ -418,7 +406,9 @@ def create_deep_research_agent_tab(webui_manager: WebuiManager):
418406
stop_button=stop_button,
419407
markdown_display=markdown_display,
420408
markdown_download=markdown_download,
421-
resume_task_id=resume_task_id
409+
resume_task_id=resume_task_id,
410+
mcp_json_file=mcp_json_file,
411+
mcp_server_config=mcp_server_config,
422412
)
423413
)
424414
webui_manager.add_components("deep_research_agent", tab_components)
@@ -430,7 +420,7 @@ def create_deep_research_agent_tab(webui_manager: WebuiManager):
430420
)
431421

432422
dr_tab_outputs = list(tab_components.values())
433-
all_managed_inputs = webui_manager.get_components()
423+
all_managed_inputs = set(webui_manager.get_components())
434424

435425
# --- Define Event Handler Wrappers ---
436426
async def start_wrapper(comps: Dict[Component, Any]) -> AsyncGenerator[Dict[Component, Any], None]:
@@ -439,17 +429,17 @@ async def start_wrapper(comps: Dict[Component, Any]) -> AsyncGenerator[Dict[Comp
439429

440430
async def stop_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
441431
update_dict = await stop_deep_research(webui_manager)
442-
yield update_dict # Yield the single dict update
432+
yield update_dict
443433

444434
# --- Connect Handlers ---
445435
start_button.click(
446436
fn=start_wrapper,
447437
inputs=all_managed_inputs,
448-
outputs=dr_tab_outputs # Update only components in this tab
438+
outputs=dr_tab_outputs
449439
)
450440

451441
stop_button.click(
452442
fn=stop_wrapper,
453443
inputs=None,
454-
outputs=dr_tab_outputs # Update only components in this tab
444+
outputs=dr_tab_outputs
455445
)

src/webui/webui_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,9 @@ def init_deep_research_agent(self) -> None:
4545
init deep research agent
4646
"""
4747
self.dr_agent: Optional[DeepResearchAgent] = None
48-
self._dr_current_task = None
48+
self.dr_current_task = None
4949
self.dr_agent_task_id: Optional[str] = None
50-
self._dr_save_dir: Optional[str] = None
50+
self.dr_save_dir: Optional[str] = None
5151

5252
def add_components(self, tab_name: str, components_dict: dict[str, "Component"]) -> None:
5353
"""

0 commit comments

Comments
 (0)