Skip to content

Commit eb91cb6

Browse files
committed
update to bu==0.1.43 and fix deep research
1 parent fb65ca7 commit eb91cb6

File tree

10 files changed

+218
-267
lines changed

10 files changed

+218
-267
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
browser-use==0.1.45
1+
browser-use==0.1.43
22
pyperclip==1.9.0
33
gradio==5.27.0
44
json-repair

src/agent/browser_use/browser_use_agent.py

Lines changed: 52 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,13 @@
88
from browser_use.agent.gif import create_history_gif
99
from browser_use.agent.service import Agent, AgentHookFunc
1010
from browser_use.agent.views import (
11+
ActionResult,
12+
AgentHistory,
1113
AgentHistoryList,
1214
AgentStepInfo,
15+
ToolCallingMethod,
1316
)
17+
from browser_use.browser.views import BrowserStateHistory
1418
from browser_use.telemetry.views import (
1519
AgentEndTelemetryEvent,
1620
)
@@ -21,17 +25,15 @@
2125
logger = logging.getLogger(__name__)
2226

2327
SKIP_LLM_API_KEY_VERIFICATION = (
24-
os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
28+
os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
2529
)
2630

2731

2832
class BrowserUseAgent(Agent):
2933
@time_execution_async("--run (agent)")
3034
async def run(
31-
self,
32-
max_steps: int = 100,
33-
on_step_start: AgentHookFunc | None = None,
34-
on_step_end: AgentHookFunc | None = None,
35+
self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
36+
on_step_end: AgentHookFunc | None = None
3537
) -> AgentHistoryList:
3638
"""Execute the task with maximum number of steps"""
3739

@@ -49,41 +51,28 @@ async def run(
4951
)
5052
signal_handler.register()
5153

52-
# Wait for verification task to complete if it exists
53-
if hasattr(self, "_verification_task") and not self._verification_task.done():
54-
try:
55-
await self._verification_task
56-
except Exception:
57-
# Error already logged in the task
58-
pass
59-
6054
try:
6155
self._log_agent_run()
6256

6357
# Execute initial actions if provided
6458
if self.initial_actions:
65-
result = await self.multi_act(
66-
self.initial_actions, check_for_new_elements=False
67-
)
59+
result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
6860
self.state.last_result = result
6961

7062
for step in range(max_steps):
7163
# Check if waiting for user input after Ctrl+C
72-
while self.state.paused:
73-
await asyncio.sleep(0.5)
74-
if self.state.stopped:
75-
break
64+
if self.state.paused:
65+
signal_handler.wait_for_resume()
66+
signal_handler.reset()
7667

7768
# Check if we should stop due to too many failures
7869
if self.state.consecutive_failures >= self.settings.max_failures:
79-
logger.error(
80-
f"❌ Stopping due to {self.settings.max_failures} consecutive failures"
81-
)
70+
logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
8271
break
8372

8473
# Check control flags before each step
8574
if self.state.stopped:
86-
logger.info("Agent stopped")
75+
logger.info('Agent stopped')
8776
break
8877

8978
while self.state.paused:
@@ -108,15 +97,30 @@ async def run(
10897
await self.log_completion()
10998
break
11099
else:
111-
logger.info("❌ Failed to complete task in maximum steps")
100+
error_message = 'Failed to complete task in maximum steps'
101+
102+
self.state.history.history.append(
103+
AgentHistory(
104+
model_output=None,
105+
result=[ActionResult(error=error_message, include_in_memory=True)],
106+
state=BrowserStateHistory(
107+
url='',
108+
title='',
109+
tabs=[],
110+
interacted_element=[],
111+
screenshot=None,
112+
),
113+
metadata=None,
114+
)
115+
)
116+
117+
logger.info(f'❌ {error_message}')
112118

113119
return self.state.history
114120

115121
except KeyboardInterrupt:
116122
# Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
117-
logger.info(
118-
"Got KeyboardInterrupt during execution, returning current history"
119-
)
123+
logger.info('Got KeyboardInterrupt during execution, returning current history')
120124
return self.state.history
121125

122126
finally:
@@ -136,13 +140,29 @@ async def run(
136140
)
137141
)
138142

143+
if self.settings.save_playwright_script_path:
144+
logger.info(
145+
f'Agent run finished. Attempting to save Playwright script to: {self.settings.save_playwright_script_path}'
146+
)
147+
try:
148+
# Extract sensitive data keys if sensitive_data is provided
149+
keys = list(self.sensitive_data.keys()) if self.sensitive_data else None
150+
# Pass browser and context config to the saving method
151+
self.state.history.save_as_playwright_script(
152+
self.settings.save_playwright_script_path,
153+
sensitive_data_keys=keys,
154+
browser_config=self.browser.config,
155+
context_config=self.browser_context.config,
156+
)
157+
except Exception as script_gen_err:
158+
# Log any error during script generation/saving
159+
logger.error(f'Failed to save Playwright script: {script_gen_err}', exc_info=True)
160+
139161
await self.close()
140162

141163
if self.settings.generate_gif:
142-
output_path: str = "agent_history.gif"
164+
output_path: str = 'agent_history.gif'
143165
if isinstance(self.settings.generate_gif, str):
144166
output_path = self.settings.generate_gif
145167

146-
create_history_gif(
147-
task=self.task, history=self.state.history, output_path=output_path
148-
)
168+
create_history_gif(task=self.task, history=self.state.history, output_path=output_path)

src/agent/deep_research/deep_research_agent.py

Lines changed: 42 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@
2929
from langgraph.graph import StateGraph
3030
from pydantic import BaseModel, Field
3131

32+
from browser_use.browser.context import BrowserContextWindowSize, BrowserContextConfig
33+
3234
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
3335
from src.browser.custom_browser import CustomBrowser
34-
from src.browser.custom_context import CustomBrowserContextConfig
3536
from src.controller.custom_controller import CustomController
3637
from src.utils.mcp_client import setup_mcp_client_and_tools
3738

@@ -47,12 +48,12 @@
4748

4849

4950
async def run_single_browser_task(
50-
task_query: str,
51-
task_id: str,
52-
llm: Any, # Pass the main LLM
53-
browser_config: Dict[str, Any],
54-
stop_event: threading.Event,
55-
use_vision: bool = False,
51+
task_query: str,
52+
task_id: str,
53+
llm: Any, # Pass the main LLM
54+
browser_config: Dict[str, Any],
55+
stop_event: threading.Event,
56+
use_vision: bool = False,
5657
) -> Dict[str, Any]:
5758
"""
5859
Runs a single BrowserUseAgent task.
@@ -104,10 +105,9 @@ async def run_single_browser_task(
104105
)
105106
)
106107

107-
context_config = CustomBrowserContextConfig(
108+
context_config = BrowserContextConfig(
108109
save_downloads_path="./tmp/downloads",
109-
window_width=window_w,
110-
window_height=window_h,
110+
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
111111
force_new_context=True,
112112
)
113113
bu_browser_context = await bu_browser.new_context(config=context_config)
@@ -198,12 +198,12 @@ class BrowserSearchInput(BaseModel):
198198

199199

200200
async def _run_browser_search_tool(
201-
queries: List[str],
202-
task_id: str, # Injected dependency
203-
llm: Any, # Injected dependency
204-
browser_config: Dict[str, Any],
205-
stop_event: threading.Event,
206-
max_parallel_browsers: int = 1,
201+
queries: List[str],
202+
task_id: str, # Injected dependency
203+
llm: Any, # Injected dependency
204+
browser_config: Dict[str, Any],
205+
stop_event: threading.Event,
206+
max_parallel_browsers: int = 1,
207207
) -> List[Dict[str, Any]]:
208208
"""
209209
Internal function to execute parallel browser searches based on LLM-provided queries.
@@ -267,11 +267,11 @@ async def task_wrapper(query):
267267

268268

269269
def create_browser_search_tool(
270-
llm: Any,
271-
browser_config: Dict[str, Any],
272-
task_id: str,
273-
stop_event: threading.Event,
274-
max_parallel_browsers: int = 1,
270+
llm: Any,
271+
browser_config: Dict[str, Any],
272+
task_id: str,
273+
stop_event: threading.Event,
274+
max_parallel_browsers: int = 1,
275275
) -> StructuredTool:
276276
"""Factory function to create the browser search tool with necessary dependencies."""
277277
# Use partial to bind the dependencies that aren't part of the LLM call arguments
@@ -553,7 +553,7 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
553553
else:
554554
current_task_message = [
555555
SystemMessage(
556-
content="You are a research assistant executing one step of a research plan. Use the available tools, especially the 'parallel_browser_search' tool, to gather information needed for the current task. Be precise with your search queries if using the browser tool."
556+
content="You are a research assistant executing one step of a research plan. Use the available tools, especially the 'parallel_browser_search' tool, to gather information needed for the current task. Be precise with your search queries if using the browser tool. Please output at least one tool."
557557
),
558558
HumanMessage(
559559
content=f"Research Task (Step {current_step['step']}): {current_step['task']}"
@@ -582,8 +582,11 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
582582
_save_plan_to_md(plan, output_dir)
583583
return {
584584
"research_plan": plan,
585-
"current_step_index": current_index + 1,
586-
"error_message": f"LLM failed to call a tool for step {current_step['step']}.",
585+
"status": "pending",
586+
"current_step_index": current_index,
587+
"messages": [
588+
f"LLM failed to call a tool for step {current_step['step']}. Response: {ai_response.content}"
589+
f". Please use tool to do research unless you are thinking or summary"],
587590
}
588591

589592
# Process tool calls
@@ -665,8 +668,8 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
665668
browser_tool_called = "parallel_browser_search" in executed_tool_names
666669
# We might need a more nuanced status based on the *content* of tool_results
667670
step_failed = (
668-
any("Error:" in str(tr.content) for tr in tool_results)
669-
or not browser_tool_called
671+
any("Error:" in str(tr.content) for tr in tool_results)
672+
or not browser_tool_called
670673
)
671674

672675
if step_failed:
@@ -695,9 +698,9 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
695698
"search_results": current_search_results, # Update with new results
696699
"current_step_index": current_index + 1,
697700
"messages": state["messages"]
698-
+ current_task_message
699-
+ [ai_response]
700-
+ tool_results,
701+
+ current_task_message
702+
+ [ai_response]
703+
+ tool_results,
701704
# Optionally return the tool_results messages if needed by downstream nodes
702705
}
703706

@@ -879,10 +882,10 @@ def should_continue(state: DeepResearchState) -> str:
879882

880883
class DeepResearchAgent:
881884
def __init__(
882-
self,
883-
llm: Any,
884-
browser_config: Dict[str, Any],
885-
mcp_server_config: Optional[Dict[str, Any]] = None,
885+
self,
886+
llm: Any,
887+
browser_config: Dict[str, Any],
888+
mcp_server_config: Optional[Dict[str, Any]] = None,
886889
):
887890
"""
888891
Initializes the DeepSearchAgent.
@@ -904,7 +907,7 @@ def __init__(
904907
self.runner: Optional[asyncio.Task] = None # To hold the asyncio task for run
905908

906909
async def _setup_tools(
907-
self, task_id: str, stop_event: threading.Event, max_parallel_browsers: int = 1
910+
self, task_id: str, stop_event: threading.Event, max_parallel_browsers: int = 1
908911
) -> List[Tool]:
909912
"""Sets up the basic tools (File I/O) and optional MCP tools."""
910913
tools = [
@@ -981,11 +984,11 @@ def _compile_graph(self) -> StateGraph:
981984
return app
982985

983986
async def run(
984-
self,
985-
topic: str,
986-
task_id: Optional[str] = None,
987-
save_dir: str = "./tmp/deep_research",
988-
max_parallel_browsers: int = 1,
987+
self,
988+
topic: str,
989+
task_id: Optional[str] = None,
990+
save_dir: str = "./tmp/deep_research",
991+
max_parallel_browsers: int = 1,
989992
) -> Dict[str, Any]:
990993
"""
991994
Starts the deep research process (Async Generator Version).

src/browser/custom_browser.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,32 +26,41 @@
2626
from browser_use.utils import time_execution_async
2727
import socket
2828

29-
from .custom_context import CustomBrowserContext, CustomBrowserContextConfig
29+
from .custom_context import CustomBrowserContext
3030

3131
logger = logging.getLogger(__name__)
3232

3333

3434
class CustomBrowser(Browser):
3535

36-
async def new_context(self, config: CustomBrowserContextConfig | None = None) -> CustomBrowserContext:
36+
async def new_context(self, config: BrowserContextConfig | None = None) -> CustomBrowserContext:
3737
"""Create a browser context"""
3838
browser_config = self.config.model_dump() if self.config else {}
3939
context_config = config.model_dump() if config else {}
4040
merged_config = {**browser_config, **context_config}
41-
return CustomBrowserContext(config=CustomBrowserContextConfig(**merged_config), browser=self)
41+
return CustomBrowserContext(config=BrowserContextConfig(**merged_config), browser=self)
4242

4343
async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
4444
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
4545
assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'
4646

47-
if self.config.headless:
47+
# Use the configured window size from new_context_config if available
48+
if (
49+
not self.config.headless
50+
and hasattr(self.config, 'new_context_config')
51+
and hasattr(self.config.new_context_config, 'browser_window_size')
52+
):
53+
screen_size = self.config.new_context_config.browser_window_size.model_dump()
54+
offset_x, offset_y = get_window_adjustments()
55+
elif self.config.headless:
4856
screen_size = {'width': 1920, 'height': 1080}
4957
offset_x, offset_y = 0, 0
5058
else:
5159
screen_size = get_screen_resolution()
5260
offset_x, offset_y = get_window_adjustments()
5361

5462
chrome_args = {
63+
f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
5564
*CHROME_ARGS,
5665
*(CHROME_DOCKER_ARGS if IN_DOCKER else []),
5766
*(CHROME_HEADLESS_ARGS if self.config.headless else []),
@@ -70,8 +79,8 @@ async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrow
7079

7180
# check if port 9222 is already taken, if so remove the remote-debugging-port arg to prevent conflicts
7281
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
73-
if s.connect_ex(('localhost', 9222)) == 0:
74-
chrome_args.remove('--remote-debugging-port=9222')
82+
if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0:
83+
chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}')
7584

7685
browser_class = getattr(playwright, self.config.browser_class)
7786
args = {

0 commit comments

Comments
 (0)