1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import gc
5
+ import inspect
6
+ import json
7
+ import logging
8
+ import os
9
+ import re
10
+ import time
11
+ from pathlib import Path
12
+ from typing import Any , Awaitable , Callable , Dict , Generic , List , Optional , TypeVar , Union
13
+
14
+ from dotenv import load_dotenv
15
+ from langchain_core .language_models .chat_models import BaseChatModel
16
+ from langchain_core .messages import (
17
+ BaseMessage ,
18
+ HumanMessage ,
19
+ SystemMessage ,
20
+ )
21
+
22
+ # from lmnr.sdk.decorators import observe
23
+ from pydantic import BaseModel , ValidationError
24
+
25
+ from browser_use .agent .gif import create_history_gif
26
+ from browser_use .agent .memory .service import Memory , MemorySettings
27
+ from browser_use .agent .message_manager .service import MessageManager , MessageManagerSettings
28
+ from browser_use .agent .message_manager .utils import convert_input_messages , extract_json_from_model_output , save_conversation
29
+ from browser_use .agent .prompts import AgentMessagePrompt , PlannerPrompt , SystemPrompt
30
+ from browser_use .agent .views import (
31
+ REQUIRED_LLM_API_ENV_VARS ,
32
+ ActionResult ,
33
+ AgentError ,
34
+ AgentHistory ,
35
+ AgentHistoryList ,
36
+ AgentOutput ,
37
+ AgentSettings ,
38
+ AgentState ,
39
+ AgentStepInfo ,
40
+ StepMetadata ,
41
+ ToolCallingMethod ,
42
+ )
43
+ from browser_use .browser .browser import Browser
44
+ from browser_use .browser .context import BrowserContext
45
+ from browser_use .browser .views import BrowserState , BrowserStateHistory
46
+ from browser_use .controller .registry .views import ActionModel
47
+ from browser_use .controller .service import Controller
48
+ from browser_use .dom .history_tree_processor .service import (
49
+ DOMHistoryElement ,
50
+ HistoryTreeProcessor ,
51
+ )
52
+ from browser_use .exceptions import LLMException
53
+ from browser_use .telemetry .service import ProductTelemetry
54
+ from browser_use .telemetry .views import (
55
+ AgentEndTelemetryEvent ,
56
+ AgentRunTelemetryEvent ,
57
+ AgentStepTelemetryEvent ,
58
+ )
59
+ from browser_use .utils import check_env_variables , time_execution_async , time_execution_sync
60
+ from browser_use .agent .service import Agent , AgentHookFunc
61
+
62
+ load_dotenv ()
63
+ logger = logging .getLogger (__name__ )
64
+
65
+ SKIP_LLM_API_KEY_VERIFICATION = os .environ .get ('SKIP_LLM_API_KEY_VERIFICATION' , 'false' ).lower ()[0 ] in 'ty1'
66
+
67
+
68
+ class BrowserUseAgent (Agent ):
69
+ @time_execution_async ('--run (agent)' )
70
+ async def run (
71
+ self , max_steps : int = 100 , on_step_start : AgentHookFunc | None = None ,
72
+ on_step_end : AgentHookFunc | None = None
73
+ ) -> AgentHistoryList :
74
+ """Execute the task with maximum number of steps"""
75
+
76
+ loop = asyncio .get_event_loop ()
77
+
78
+ # Set up the Ctrl+C signal handler with callbacks specific to this agent
79
+ from browser_use .utils import SignalHandler
80
+
81
+ signal_handler = SignalHandler (
82
+ loop = loop ,
83
+ pause_callback = self .pause ,
84
+ resume_callback = self .resume ,
85
+ custom_exit_callback = None , # No special cleanup needed on forced exit
86
+ exit_on_second_int = True ,
87
+ )
88
+ signal_handler .register ()
89
+
90
+ # Wait for verification task to complete if it exists
91
+ if hasattr (self , '_verification_task' ) and not self ._verification_task .done ():
92
+ try :
93
+ await self ._verification_task
94
+ except Exception :
95
+ # Error already logged in the task
96
+ pass
97
+
98
+ try :
99
+ self ._log_agent_run ()
100
+
101
+ # Execute initial actions if provided
102
+ if self .initial_actions :
103
+ result = await self .multi_act (self .initial_actions , check_for_new_elements = False )
104
+ self .state .last_result = result
105
+
106
+ for step in range (max_steps ):
107
+ # Check if waiting for user input after Ctrl+C
108
+ while self .state .paused :
109
+ await asyncio .sleep (0.5 )
110
+ if self .state .stopped :
111
+ break
112
+
113
+ # Check if we should stop due to too many failures
114
+ if self .state .consecutive_failures >= self .settings .max_failures :
115
+ logger .error (f'❌ Stopping due to { self .settings .max_failures } consecutive failures' )
116
+ break
117
+
118
+ # Check control flags before each step
119
+ if self .state .stopped :
120
+ logger .info ('Agent stopped' )
121
+ break
122
+
123
+ while self .state .paused :
124
+ await asyncio .sleep (0.2 ) # Small delay to prevent CPU spinning
125
+ if self .state .stopped : # Allow stopping while paused
126
+ break
127
+
128
+ if on_step_start is not None :
129
+ await on_step_start (self )
130
+
131
+ step_info = AgentStepInfo (step_number = step , max_steps = max_steps )
132
+ await self .step (step_info )
133
+
134
+ if on_step_end is not None :
135
+ await on_step_end (self )
136
+
137
+ if self .state .history .is_done ():
138
+ if self .settings .validate_output and step < max_steps - 1 :
139
+ if not await self ._validate_output ():
140
+ continue
141
+
142
+ await self .log_completion ()
143
+ break
144
+ else :
145
+ logger .info ('❌ Failed to complete task in maximum steps' )
146
+
147
+ return self .state .history
148
+
149
+ except KeyboardInterrupt :
150
+ # Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
151
+ logger .info ('Got KeyboardInterrupt during execution, returning current history' )
152
+ return self .state .history
153
+
154
+ finally :
155
+ # Unregister signal handlers before cleanup
156
+ signal_handler .unregister ()
157
+
158
+ self .telemetry .capture (
159
+ AgentEndTelemetryEvent (
160
+ agent_id = self .state .agent_id ,
161
+ is_done = self .state .history .is_done (),
162
+ success = self .state .history .is_successful (),
163
+ steps = self .state .n_steps ,
164
+ max_steps_reached = self .state .n_steps >= max_steps ,
165
+ errors = self .state .history .errors (),
166
+ total_input_tokens = self .state .history .total_input_tokens (),
167
+ total_duration_seconds = self .state .history .total_duration_seconds (),
168
+ )
169
+ )
170
+
171
+ await self .close ()
172
+
173
+ if self .settings .generate_gif :
174
+ output_path : str = 'agent_history.gif'
175
+ if isinstance (self .settings .generate_gif , str ):
176
+ output_path = self .settings .generate_gif
177
+
178
+ create_history_gif (task = self .task , history = self .state .history , output_path = output_path )
0 commit comments