@@ -123,6 +123,7 @@ def __init__(
123123        cache_dir : Optional [str ] =  None ,
124124        channel : Literal ["chrome" , "msedge" , "chromium" ] =  "chromium" ,
125125        cookie_json_path : Optional [str ] =  None ,
126+         user_data_dir : Optional [str ] =  None ,
126127    ):
127128        r""" 
128129        Initialize the asynchronous browser core. 
@@ -136,7 +137,11 @@ def __init__(
136137            cookie_json_path (Optional[str]): Path to a JSON file containing 
137138                authentication cookies and browser storage state. If provided 
138139                and the file exists, the browser will load this state to 
139-                 maintain authenticated sessions without requiring manual login. 
140+                 maintain authenticated sessions. This is primarily used when 
141+                 `user_data_dir` is not set. 
142+             user_data_dir (Optional[str]): The directory to store user data 
143+                 for persistent context. If None, a fresh browser instance 
144+                 is used without saving data. (default: :obj:`None`) 
140145
141146        Returns: 
142147            None 
@@ -151,6 +156,7 @@ def __init__(
151156        self .playwright  =  async_playwright ()
152157        self .page_history : list [Any ] =  []
153158        self .cookie_json_path  =  cookie_json_path 
159+         self .user_data_dir  =  user_data_dir 
154160        self .playwright_server : Any  =  None 
155161        self .playwright_started : bool  =  False 
156162        self .browser : Any  =  None 
@@ -163,6 +169,10 @@ def __init__(
163169        self .cache_dir  =  "tmp/"  if  cache_dir  is  None  else  cache_dir 
164170        os .makedirs (self .cache_dir , exist_ok = True )
165171
172+         # Create user data directory only if specified 
173+         if  self .user_data_dir :
174+             os .makedirs (self .user_data_dir , exist_ok = True )
175+ 
166176        # Load the page script 
167177        abs_dir_path  =  os .path .dirname (os .path .abspath (__file__ ))
168178        page_script_path  =  os .path .join (abs_dir_path , "page_script.js" )
@@ -183,23 +193,56 @@ async def async_init(self) -> None:
183193            await  self ._ensure_browser_installed ()
184194            self .playwright_server  =  await  self .playwright .start ()
185195            self .playwright_started  =  True 
186-         # Launch the browser asynchronously. 
187-         self .browser  =  await  self .playwright_server .chromium .launch (
188-             headless = self .headless , channel = self .channel 
196+ 
197+         browser_launch_args  =  [
198+             "--disable-blink-features=AutomationControlled" ,  # Basic stealth 
199+         ]
200+ 
201+         user_agent_string  =  (
202+             "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " 
203+             "AppleWebKit/537.36 (KHTML, like Gecko) " 
204+             "Chrome/91.0.4472.124 Safari/537.36" 
189205        )
190-         # Check if cookie file exists before using it to maintain 
191-         # authenticated sessions. This prevents errors when the cookie file 
192-         # doesn't exist 
193-         if  self .cookie_json_path  and  os .path .exists (self .cookie_json_path ):
194-             self .context  =  await  self .browser .new_context (
195-                 accept_downloads = True , storage_state = self .cookie_json_path 
206+ 
207+         if  self .user_data_dir :
208+             self .context  =  await  (
209+                 self .playwright_server .chromium .launch_persistent_context (
210+                     user_data_dir = self .user_data_dir ,
211+                     headless = self .headless ,
212+                     channel = self .channel ,
213+                     accept_downloads = True ,
214+                     user_agent = user_agent_string ,
215+                     java_script_enabled = True ,
216+                     args = browser_launch_args ,
217+                 )
196218            )
219+             self .browser  =  None   # Not using a separate browser instance 
220+             if  len (self .context .pages ) >  0 :  # Persistent context might 
221+                 # reopen pages 
222+                 self .page  =  self .context .pages [0 ]
223+             else :
224+                 self .page  =  await  self .context .new_page ()
197225        else :
198-             self .context  =  await  self .browser .new_context (
199-                 accept_downloads = True ,
226+             # Launch a fresh browser instance 
227+             self .browser  =  await  self .playwright_server .chromium .launch (
228+                 headless = self .headless ,
229+                 channel = self .channel ,
230+                 args = browser_launch_args ,
200231            )
201-         # Create a new page asynchronously. 
202-         self .page  =  await  self .context .new_page ()
232+ 
233+             new_context_kwargs : Dict [str , Any ] =  {
234+                 "accept_downloads" : True ,
235+                 "user_agent" : user_agent_string ,
236+                 "java_script_enabled" : True ,
237+             }
238+             if  self .cookie_json_path  and  os .path .exists (self .cookie_json_path ):
239+                 new_context_kwargs ["storage_state" ] =  self .cookie_json_path 
240+ 
241+             self .context  =  await  self .browser .new_context (** new_context_kwargs )
242+             self .page  =  await  self .context .new_page ()
243+ 
244+         assert  self .context  is  not   None 
245+         assert  self .page  is  not   None 
203246
204247    def  init (self ) ->  Coroutine [Any , Any , None ]:
205248        r"""Initialize the browser asynchronously.""" 
@@ -827,7 +870,14 @@ def back(self) -> Coroutine[Any, Any, None]:
827870
828871    async  def  async_close (self ) ->  None :
829872        r"""Asynchronously close the browser.""" 
830-         await  self .browser .close ()
873+         if  self .context  is  not   None :
874+             await  self .context .close ()
875+         if  self .browser  is  not   None :  # Only close browser if it was 
876+             # launched separately 
877+             await  self .browser .close ()
878+         if  self .playwright_server  and  self .playwright_started :
879+             await  self .playwright_server .stop ()
880+             self .playwright_started  =  False 
831881
832882    def  close (self ) ->  Coroutine [Any , Any , None ]:
833883        r"""Close the browser.""" 
@@ -943,6 +993,7 @@ def __init__(
943993        planning_agent_model : Optional [BaseModelBackend ] =  None ,
944994        output_language : str  =  "en" ,
945995        cookie_json_path : Optional [str ] =  None ,
996+         user_data_dir : Optional [str ] =  None ,
946997    ):
947998        r"""Initialize the BrowserToolkit instance. 
948999
@@ -966,13 +1017,16 @@ def __init__(
9661017                maintain authenticated sessions without requiring manual 
9671018                login. 
9681019                (default: :obj:`None`) 
1020+             user_data_dir (Optional[str]): The directory to store user data 
1021+                 for persistent context. (default: :obj:`"user_data_dir/"`) 
9691022        """ 
9701023        super ().__init__ ()
9711024        self .browser  =  AsyncBaseBrowser (
9721025            headless = headless ,
9731026            cache_dir = cache_dir ,
9741027            channel = channel ,
9751028            cookie_json_path = cookie_json_path ,
1029+             user_data_dir = user_data_dir ,
9761030        )
9771031
9781032        self .history_window  =  history_window 
@@ -991,7 +1045,7 @@ def _reset(self):
9911045        os .makedirs (self .browser .cache_dir , exist_ok = True )
9921046
9931047    def  _initialize_agent (self ) ->  Tuple ["ChatAgent" , "ChatAgent" ]:
994-         r"""Initialize the agent .""" 
1048+         r"""Initialize the planning and web agents .""" 
9951049        from  camel .agents .chat_agent  import  ChatAgent 
9961050
9971051        if  self .web_agent_model  is  None :
@@ -1060,7 +1114,7 @@ async def async_observe(
10601114        )
10611115        # Reset the history message of web_agent. 
10621116        self .web_agent .reset ()
1063-         resp  =  self .web_agent .step (message )
1117+         resp  =  await   self .web_agent .astep (message )
10641118
10651119        resp_content  =  resp .msgs [0 ].content 
10661120
@@ -1196,43 +1250,29 @@ def _fix_action_code(action_code: str) -> str:
11961250                f"correct identifier." ,
11971251            )
11981252
1199-     def  _get_final_answer (self , task_prompt : str ) ->  str :
1200-         r"""Get the final answer based on the task prompt and current browser 
1201-         state. It is used when the agent thinks that the task can be completed 
1202-         without any further action, and answer can be directly found in the 
1203-         current viewport. 
1204-         """ 
1205- 
1206-         prompt  =  GET_FINAL_ANSWER_PROMPT_TEMPLATE .format (
1207-             history = self .history , task_prompt = task_prompt 
1208-         )
1209- 
1210-         message  =  BaseMessage .make_user_message (
1211-             role_name = 'user' ,
1212-             content = prompt ,
1253+     async  def  _async_get_final_answer (self , task_prompt : str ) ->  str :
1254+         r"""Generate the final answer based on the task prompt.""" 
1255+         final_answer_prompt  =  GET_FINAL_ANSWER_PROMPT_TEMPLATE .format (
1256+             task_prompt = task_prompt , history = self .history 
12131257        )
1258+         response  =  await  self .planning_agent .astep (final_answer_prompt )
1259+         if  response .msgs  is  None  or  len (response .msgs ) ==  0 :
1260+             raise  RuntimeError ("Got empty final answer from planning agent." )
1261+         return  response .msgs [0 ].content 
12141262
1215-         resp  =  self .web_agent .step (message )
1216-         return  resp .msgs [0 ].content 
1217- 
1218-     def  _task_planning (self , task_prompt : str , start_url : str ) ->  str :
1219-         r"""Plan the task based on the given task prompt.""" 
1220- 
1221-         # Here are the available browser functions we can 
1222-         # use: {AVAILABLE_ACTIONS_PROMPT} 
1223- 
1263+     async  def  _async_task_planning (
1264+         self , task_prompt : str , start_url : str 
1265+     ) ->  str :
1266+         r"""Generate a detailed plan for the given task.""" 
12241267        planning_prompt  =  TASK_PLANNING_PROMPT_TEMPLATE .format (
12251268            task_prompt = task_prompt , start_url = start_url 
12261269        )
1270+         response  =  await  self .planning_agent .astep (planning_prompt )
1271+         if  response .msgs  is  None  or  len (response .msgs ) ==  0 :
1272+             raise  RuntimeError ("Got empty plan from planning agent." )
1273+         return  response .msgs [0 ].content 
12271274
1228-         message  =  BaseMessage .make_user_message (
1229-             role_name = 'user' , content = planning_prompt 
1230-         )
1231- 
1232-         resp  =  self .planning_agent .step (message )
1233-         return  resp .msgs [0 ].content 
1234- 
1235-     def  _task_replanning (
1275+     async  def  _async_task_replanning (
12361276        self , task_prompt : str , detailed_plan : str 
12371277    ) ->  Tuple [bool , str ]:
12381278        r"""Replan the task based on the given task prompt. 
@@ -1252,12 +1292,11 @@ def _task_replanning(
12521292        replanning_prompt  =  TASK_REPLANNING_PROMPT_TEMPLATE .format (
12531293            task_prompt = task_prompt ,
12541294            detailed_plan = detailed_plan ,
1255-             history_window = self .history_window ,
12561295            history = self .history [- self .history_window  :],
12571296        )
12581297        # Reset the history message of planning_agent. 
12591298        self .planning_agent .reset ()
1260-         resp  =  self .planning_agent .step (replanning_prompt )
1299+         resp  =  await   self .planning_agent .astep (replanning_prompt )
12611300        resp_dict  =  _parse_json_output (resp .msgs [0 ].content , logger )
12621301
12631302        if_need_replan  =  resp_dict .get ("if_need_replan" , False )
@@ -1287,7 +1326,7 @@ async def browse_url(
12871326
12881327        self ._reset ()
12891328        task_completed  =  False 
1290-         detailed_plan  =  self ._task_planning (task_prompt , start_url )
1329+         detailed_plan  =  await   self ._async_task_planning (task_prompt , start_url )
12911330        logger .debug (f"Detailed plan: { detailed_plan }  " )
12921331
12931332        await  self .browser .async_init ()
@@ -1331,7 +1370,11 @@ async def browse_url(
13311370                self .history .append (trajectory_info )
13321371
13331372                # replan the task if necessary 
1334-                 if_need_replan , replanned_schema  =  self ._task_replanning (
1373+                 (
1374+                     if_need_replan ,
1375+                     replanned_schema ,
1376+                     # ruff: noqa: E501 
1377+                 ) =  await  self ._async_task_replanning (
13351378                    task_prompt , detailed_plan 
13361379                )
13371380                if  if_need_replan :
@@ -1343,11 +1386,11 @@ async def browse_url(
13431386                The task is not completed within the round limit. Please check  
13441387                the last round { self .history_window }   information to see if  
13451388                there is any useful information: 
1346-                 <history>{ self .history [- self .history_window   :]}  </history> 
1389+                 <history>{ self .history [- self .history_window :]}  </history> 
13471390            """ 
13481391
13491392        else :
1350-             simulation_result  =  self ._get_final_answer (task_prompt )
1393+             simulation_result  =  await   self ._async_get_final_answer (task_prompt )
13511394
13521395        await  self .browser .close ()
13531396        return  simulation_result 
0 commit comments