browser-use
diff --git a/‎requirements.txt
Lines changed: 1 addition & 1 deletion b/‎requirements.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/agent/browser_use/browser_use_agent.py
Lines changed: 52 additions & 32 deletions b/‎src/agent/browser_use/browser_use_agent.py
Lines changed: 52 additions & 32 deletions
diff --git a/‎src/agent/deep_research/deep_research_agent.py
Lines changed: 42 additions & 39 deletions b/‎src/agent/deep_research/deep_research_agent.py
Lines changed: 42 additions & 39 deletions
diff --git a/‎src/browser/custom_browser.py
Lines changed: 15 additions & 6 deletions b/‎src/browser/custom_browser.py
Lines changed: 15 additions & 6 deletions
@@ -1,4 +1,4 @@
-browser-use==0.1.45
+browser-use==0.1.43
 pyperclip==1.9.0
 gradio==5.27.0
 json-repair
 
@@ -8,9 +8,13 @@
 from browser_use.agent.gif import create_history_gif
 from browser_use.agent.service import Agent, AgentHookFunc
 from browser_use.agent.views import (
+    ActionResult,
+    AgentHistory,
     AgentHistoryList,
     AgentStepInfo,
+    ToolCallingMethod,
 )
+from browser_use.browser.views import BrowserStateHistory
 from browser_use.telemetry.views import (
     AgentEndTelemetryEvent,
 )
@@ -21,17 +25,15 @@
 logger = logging.getLogger(__name__)
 
 SKIP_LLM_API_KEY_VERIFICATION = (
-    os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
+        os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
 )
 
 
 class BrowserUseAgent(Agent):
     @time_execution_async("--run (agent)")
     async def run(
-        self,
-        max_steps: int = 100,
-        on_step_start: AgentHookFunc | None = None,
-        on_step_end: AgentHookFunc | None = None,
+            self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
+            on_step_end: AgentHookFunc | None = None
     ) -> AgentHistoryList:
         """Execute the task with maximum number of steps"""
 
@@ -49,41 +51,28 @@ async def run(
         )
         signal_handler.register()
 
-        # Wait for verification task to complete if it exists
-        if hasattr(self, "_verification_task") and not self._verification_task.done():
-            try:
-                await self._verification_task
-            except Exception:
-                # Error already logged in the task
-                pass
-
         try:
             self._log_agent_run()
 
             # Execute initial actions if provided
             if self.initial_actions:
-                result = await self.multi_act(
-                    self.initial_actions, check_for_new_elements=False
-                )
+                result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
                 self.state.last_result = result
 
             for step in range(max_steps):
                 # Check if waiting for user input after Ctrl+C
-                while self.state.paused:
-                    await asyncio.sleep(0.5)
-                    if self.state.stopped:
-                        break
+                if self.state.paused:
+                    signal_handler.wait_for_resume()
+                    signal_handler.reset()
 
                 # Check if we should stop due to too many failures
                 if self.state.consecutive_failures >= self.settings.max_failures:
-                    logger.error(
-                        f"❌ Stopping due to {self.settings.max_failures} consecutive failures"
-                    )
+                    logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
                     break
 
                 # Check control flags before each step
                 if self.state.stopped:
-                    logger.info("Agent stopped")
+                    logger.info('Agent stopped')
                     break
 
                 while self.state.paused:
@@ -108,15 +97,30 @@ async def run(
                     await self.log_completion()
                     break
             else:
-                logger.info("❌ Failed to complete task in maximum steps")
+                error_message = 'Failed to complete task in maximum steps'
+
+                self.state.history.history.append(
+                    AgentHistory(
+                        model_output=None,
+                        result=[ActionResult(error=error_message, include_in_memory=True)],
+                        state=BrowserStateHistory(
+                            url='',
+                            title='',
+                            tabs=[],
+                            interacted_element=[],
+                            screenshot=None,
+                        ),
+                        metadata=None,
+                    )
+                )
+
+                logger.info(f'❌ {error_message}')
 
             return self.state.history
 
         except KeyboardInterrupt:
             # Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
-            logger.info(
-                "Got KeyboardInterrupt during execution, returning current history"
-            )
+            logger.info('Got KeyboardInterrupt during execution, returning current history')
             return self.state.history
 
         finally:
@@ -136,13 +140,29 @@ async def run(
                 )
             )
 
+            if self.settings.save_playwright_script_path:
+                logger.info(
+                    f'Agent run finished. Attempting to save Playwright script to: {self.settings.save_playwright_script_path}'
+                )
+                try:
+                    # Extract sensitive data keys if sensitive_data is provided
+                    keys = list(self.sensitive_data.keys()) if self.sensitive_data else None
+                    # Pass browser and context config to the saving method
+                    self.state.history.save_as_playwright_script(
+                        self.settings.save_playwright_script_path,
+                        sensitive_data_keys=keys,
+                        browser_config=self.browser.config,
+                        context_config=self.browser_context.config,
+                    )
+                except Exception as script_gen_err:
+                    # Log any error during script generation/saving
+                    logger.error(f'Failed to save Playwright script: {script_gen_err}', exc_info=True)
+
             await self.close()
 
             if self.settings.generate_gif:
-                output_path: str = "agent_history.gif"
+                output_path: str = 'agent_history.gif'
                 if isinstance(self.settings.generate_gif, str):
                     output_path = self.settings.generate_gif
 
-                create_history_gif(
-                    task=self.task, history=self.state.history, output_path=output_path
-                )
+                create_history_gif(task=self.task, history=self.state.history, output_path=output_path)
@@ -29,9 +29,10 @@
 from langgraph.graph import StateGraph
 from pydantic import BaseModel, Field
 
+from browser_use.browser.context import BrowserContextWindowSize, BrowserContextConfig
+
 from src.agent.browser_use.browser_use_agent import BrowserUseAgent
 from src.browser.custom_browser import CustomBrowser
-from src.browser.custom_context import CustomBrowserContextConfig
 from src.controller.custom_controller import CustomController
 from src.utils.mcp_client import setup_mcp_client_and_tools
 
@@ -47,12 +48,12 @@
 
 
 async def run_single_browser_task(
-    task_query: str,
-    task_id: str,
-    llm: Any,  # Pass the main LLM
-    browser_config: Dict[str, Any],
-    stop_event: threading.Event,
-    use_vision: bool = False,
+        task_query: str,
+        task_id: str,
+        llm: Any,  # Pass the main LLM
+        browser_config: Dict[str, Any],
+        stop_event: threading.Event,
+        use_vision: bool = False,
 ) -> Dict[str, Any]:
     """
     Runs a single BrowserUseAgent task.
@@ -104,10 +105,9 @@ async def run_single_browser_task(
             )
         )
 
-        context_config = CustomBrowserContextConfig(
+        context_config = BrowserContextConfig(
             save_downloads_path="./tmp/downloads",
-            window_width=window_w,
-            window_height=window_h,
+            browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
             force_new_context=True,
         )
         bu_browser_context = await bu_browser.new_context(config=context_config)
@@ -198,12 +198,12 @@ class BrowserSearchInput(BaseModel):
 
 
 async def _run_browser_search_tool(
-    queries: List[str],
-    task_id: str,  # Injected dependency
-    llm: Any,  # Injected dependency
-    browser_config: Dict[str, Any],
-    stop_event: threading.Event,
-    max_parallel_browsers: int = 1,
+        queries: List[str],
+        task_id: str,  # Injected dependency
+        llm: Any,  # Injected dependency
+        browser_config: Dict[str, Any],
+        stop_event: threading.Event,
+        max_parallel_browsers: int = 1,
 ) -> List[Dict[str, Any]]:
     """
     Internal function to execute parallel browser searches based on LLM-provided queries.
@@ -267,11 +267,11 @@ async def task_wrapper(query):
 
 
 def create_browser_search_tool(
-    llm: Any,
-    browser_config: Dict[str, Any],
-    task_id: str,
-    stop_event: threading.Event,
-    max_parallel_browsers: int = 1,
+        llm: Any,
+        browser_config: Dict[str, Any],
+        task_id: str,
+        stop_event: threading.Event,
+        max_parallel_browsers: int = 1,
 ) -> StructuredTool:
     """Factory function to create the browser search tool with necessary dependencies."""
     # Use partial to bind the dependencies that aren't part of the LLM call arguments
@@ -553,7 +553,7 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
     else:
         current_task_message = [
             SystemMessage(
-                content="You are a research assistant executing one step of a research plan. Use the available tools, especially the 'parallel_browser_search' tool, to gather information needed for the current task. Be precise with your search queries if using the browser tool."
+                content="You are a research assistant executing one step of a research plan. Use the available tools, especially the 'parallel_browser_search' tool, to gather information needed for the current task. Be precise with your search queries if using the browser tool. Please output at least one tool."
             ),
             HumanMessage(
                 content=f"Research Task (Step {current_step['step']}): {current_step['task']}"
@@ -582,8 +582,11 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
             _save_plan_to_md(plan, output_dir)
             return {
                 "research_plan": plan,
-                "current_step_index": current_index + 1,
-                "error_message": f"LLM failed to call a tool for step {current_step['step']}.",
+                "status": "pending",
+                "current_step_index": current_index,
+                "messages": [
+                    f"LLM failed to call a tool for step {current_step['step']}. Response: {ai_response.content}"
+                    f". Please use tool to do research unless you are thinking or summary"],
             }
 
         # Process tool calls
@@ -665,8 +668,8 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
         browser_tool_called = "parallel_browser_search" in executed_tool_names
         # We might need a more nuanced status based on the *content* of tool_results
         step_failed = (
-            any("Error:" in str(tr.content) for tr in tool_results)
-            or not browser_tool_called
+                any("Error:" in str(tr.content) for tr in tool_results)
+                or not browser_tool_called
         )
 
         if step_failed:
@@ -695,9 +698,9 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
             "search_results": current_search_results,  # Update with new results
             "current_step_index": current_index + 1,
             "messages": state["messages"]
-            + current_task_message
-            + [ai_response]
-            + tool_results,
+                        + current_task_message
+                        + [ai_response]
+                        + tool_results,
             # Optionally return the tool_results messages if needed by downstream nodes
         }
 
@@ -879,10 +882,10 @@ def should_continue(state: DeepResearchState) -> str:
 
 class DeepResearchAgent:
     def __init__(
-        self,
-        llm: Any,
-        browser_config: Dict[str, Any],
-        mcp_server_config: Optional[Dict[str, Any]] = None,
+            self,
+            llm: Any,
+            browser_config: Dict[str, Any],
+            mcp_server_config: Optional[Dict[str, Any]] = None,
     ):
         """
         Initializes the DeepSearchAgent.
@@ -904,7 +907,7 @@ def __init__(
         self.runner: Optional[asyncio.Task] = None  # To hold the asyncio task for run
 
     async def _setup_tools(
-        self, task_id: str, stop_event: threading.Event, max_parallel_browsers: int = 1
+            self, task_id: str, stop_event: threading.Event, max_parallel_browsers: int = 1
     ) -> List[Tool]:
         """Sets up the basic tools (File I/O) and optional MCP tools."""
         tools = [
@@ -981,11 +984,11 @@ def _compile_graph(self) -> StateGraph:
         return app
 
     async def run(
-        self,
-        topic: str,
-        task_id: Optional[str] = None,
-        save_dir: str = "./tmp/deep_research",
-        max_parallel_browsers: int = 1,
+            self,
+            topic: str,
+            task_id: Optional[str] = None,
+            save_dir: str = "./tmp/deep_research",
+            max_parallel_browsers: int = 1,
     ) -> Dict[str, Any]:
         """
         Starts the deep research process (Async Generator Version).
 
@@ -26,32 +26,41 @@
 from browser_use.utils import time_execution_async
 import socket
 
-from .custom_context import CustomBrowserContext, CustomBrowserContextConfig
+from .custom_context import CustomBrowserContext
 
 logger = logging.getLogger(__name__)
 
 
 class CustomBrowser(Browser):
 
-    async def new_context(self, config: CustomBrowserContextConfig | None = None) -> CustomBrowserContext:
+    async def new_context(self, config: BrowserContextConfig | None = None) -> CustomBrowserContext:
         """Create a browser context"""
         browser_config = self.config.model_dump() if self.config else {}
         context_config = config.model_dump() if config else {}
         merged_config = {**browser_config, **context_config}
-        return CustomBrowserContext(config=CustomBrowserContextConfig(**merged_config), browser=self)
+        return CustomBrowserContext(config=BrowserContextConfig(**merged_config), browser=self)
 
     async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
         """Sets up and returns a Playwright Browser instance with anti-detection measures."""
         assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'
 
-        if self.config.headless:
+        # Use the configured window size from new_context_config if available
+        if (
+                not self.config.headless
+                and hasattr(self.config, 'new_context_config')
+                and hasattr(self.config.new_context_config, 'browser_window_size')
+        ):
+            screen_size = self.config.new_context_config.browser_window_size.model_dump()
+            offset_x, offset_y = get_window_adjustments()
+        elif self.config.headless:
             screen_size = {'width': 1920, 'height': 1080}
             offset_x, offset_y = 0, 0
         else:
             screen_size = get_screen_resolution()
             offset_x, offset_y = get_window_adjustments()
 
         chrome_args = {
+            f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
             *CHROME_ARGS,
             *(CHROME_DOCKER_ARGS if IN_DOCKER else []),
             *(CHROME_HEADLESS_ARGS if self.config.headless else []),
@@ -70,8 +79,8 @@ async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrow
 
         # check if port 9222 is already taken, if so remove the remote-debugging-port arg to prevent conflicts
         with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-            if s.connect_ex(('localhost', 9222)) == 0:
-                chrome_args.remove('--remote-debugging-port=9222')
+            if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0:
+                chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}')
 
         browser_class = getattr(playwright, self.config.browser_class)
         args = {
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-browser-use==0.1.45`
	`1`	`+browser-use==0.1.43`
`2`	`2`	`pyperclip==1.9.0`
`3`	`3`	`gradio==5.27.0`
`4`	`4`	`json-repair`