Skip to content

Commit e5a7d0a

Browse files
nitpicker55555actions-userWendong-Fan
authored
feat: Support persistent context and stealth mode (#2291)
Co-authored-by: GitHub Action <action@github.com> Co-authored-by: Wendong-Fan <133094783+Wendong-Fan@users.noreply.github.com>
1 parent bc6545a commit e5a7d0a

File tree

7 files changed

+446
-137
lines changed

7 files changed

+446
-137
lines changed

camel/toolkits/async_browser_toolkit.py

Lines changed: 97 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def __init__(
123123
cache_dir: Optional[str] = None,
124124
channel: Literal["chrome", "msedge", "chromium"] = "chromium",
125125
cookie_json_path: Optional[str] = None,
126+
user_data_dir: Optional[str] = None,
126127
):
127128
r"""
128129
Initialize the asynchronous browser core.
@@ -136,7 +137,11 @@ def __init__(
136137
cookie_json_path (Optional[str]): Path to a JSON file containing
137138
authentication cookies and browser storage state. If provided
138139
and the file exists, the browser will load this state to
139-
maintain authenticated sessions without requiring manual login.
140+
maintain authenticated sessions. This is primarily used when
141+
`user_data_dir` is not set.
142+
user_data_dir (Optional[str]): The directory to store user data
143+
for persistent context. If None, a fresh browser instance
144+
is used without saving data. (default: :obj:`None`)
140145
141146
Returns:
142147
None
@@ -151,6 +156,7 @@ def __init__(
151156
self.playwright = async_playwright()
152157
self.page_history: list[Any] = []
153158
self.cookie_json_path = cookie_json_path
159+
self.user_data_dir = user_data_dir
154160
self.playwright_server: Any = None
155161
self.playwright_started: bool = False
156162
self.browser: Any = None
@@ -163,6 +169,10 @@ def __init__(
163169
self.cache_dir = "tmp/" if cache_dir is None else cache_dir
164170
os.makedirs(self.cache_dir, exist_ok=True)
165171

172+
# Create user data directory only if specified
173+
if self.user_data_dir:
174+
os.makedirs(self.user_data_dir, exist_ok=True)
175+
166176
# Load the page script
167177
abs_dir_path = os.path.dirname(os.path.abspath(__file__))
168178
page_script_path = os.path.join(abs_dir_path, "page_script.js")
@@ -183,23 +193,56 @@ async def async_init(self) -> None:
183193
await self._ensure_browser_installed()
184194
self.playwright_server = await self.playwright.start()
185195
self.playwright_started = True
186-
# Launch the browser asynchronously.
187-
self.browser = await self.playwright_server.chromium.launch(
188-
headless=self.headless, channel=self.channel
196+
197+
browser_launch_args = [
198+
"--disable-blink-features=AutomationControlled", # Basic stealth
199+
]
200+
201+
user_agent_string = (
202+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
203+
"AppleWebKit/537.36 (KHTML, like Gecko) "
204+
"Chrome/91.0.4472.124 Safari/537.36"
189205
)
190-
# Check if cookie file exists before using it to maintain
191-
# authenticated sessions. This prevents errors when the cookie file
192-
# doesn't exist
193-
if self.cookie_json_path and os.path.exists(self.cookie_json_path):
194-
self.context = await self.browser.new_context(
195-
accept_downloads=True, storage_state=self.cookie_json_path
206+
207+
if self.user_data_dir:
208+
self.context = await (
209+
self.playwright_server.chromium.launch_persistent_context(
210+
user_data_dir=self.user_data_dir,
211+
headless=self.headless,
212+
channel=self.channel,
213+
accept_downloads=True,
214+
user_agent=user_agent_string,
215+
java_script_enabled=True,
216+
args=browser_launch_args,
217+
)
196218
)
219+
self.browser = None # Not using a separate browser instance
220+
if len(self.context.pages) > 0: # Persistent context might
221+
# reopen pages
222+
self.page = self.context.pages[0]
223+
else:
224+
self.page = await self.context.new_page()
197225
else:
198-
self.context = await self.browser.new_context(
199-
accept_downloads=True,
226+
# Launch a fresh browser instance
227+
self.browser = await self.playwright_server.chromium.launch(
228+
headless=self.headless,
229+
channel=self.channel,
230+
args=browser_launch_args,
200231
)
201-
# Create a new page asynchronously.
202-
self.page = await self.context.new_page()
232+
233+
new_context_kwargs: Dict[str, Any] = {
234+
"accept_downloads": True,
235+
"user_agent": user_agent_string,
236+
"java_script_enabled": True,
237+
}
238+
if self.cookie_json_path and os.path.exists(self.cookie_json_path):
239+
new_context_kwargs["storage_state"] = self.cookie_json_path
240+
241+
self.context = await self.browser.new_context(**new_context_kwargs)
242+
self.page = await self.context.new_page()
243+
244+
assert self.context is not None
245+
assert self.page is not None
203246

204247
def init(self) -> Coroutine[Any, Any, None]:
205248
r"""Initialize the browser asynchronously."""
@@ -827,7 +870,14 @@ def back(self) -> Coroutine[Any, Any, None]:
827870

828871
async def async_close(self) -> None:
829872
r"""Asynchronously close the browser."""
830-
await self.browser.close()
873+
if self.context is not None:
874+
await self.context.close()
875+
if self.browser is not None: # Only close browser if it was
876+
# launched separately
877+
await self.browser.close()
878+
if self.playwright_server and self.playwright_started:
879+
await self.playwright_server.stop()
880+
self.playwright_started = False
831881

832882
def close(self) -> Coroutine[Any, Any, None]:
833883
r"""Close the browser."""
@@ -943,6 +993,7 @@ def __init__(
943993
planning_agent_model: Optional[BaseModelBackend] = None,
944994
output_language: str = "en",
945995
cookie_json_path: Optional[str] = None,
996+
user_data_dir: Optional[str] = None,
946997
):
947998
r"""Initialize the BrowserToolkit instance.
948999
@@ -966,13 +1017,16 @@ def __init__(
9661017
maintain authenticated sessions without requiring manual
9671018
login.
9681019
(default: :obj:`None`)
1020+
user_data_dir (Optional[str]): The directory to store user data
1021+
for persistent context. (default: :obj:`"user_data_dir/"`)
9691022
"""
9701023
super().__init__()
9711024
self.browser = AsyncBaseBrowser(
9721025
headless=headless,
9731026
cache_dir=cache_dir,
9741027
channel=channel,
9751028
cookie_json_path=cookie_json_path,
1029+
user_data_dir=user_data_dir,
9761030
)
9771031

9781032
self.history_window = history_window
@@ -991,7 +1045,7 @@ def _reset(self):
9911045
os.makedirs(self.browser.cache_dir, exist_ok=True)
9921046

9931047
def _initialize_agent(self) -> Tuple["ChatAgent", "ChatAgent"]:
994-
r"""Initialize the agent."""
1048+
r"""Initialize the planning and web agents."""
9951049
from camel.agents.chat_agent import ChatAgent
9961050

9971051
if self.web_agent_model is None:
@@ -1060,7 +1114,7 @@ async def async_observe(
10601114
)
10611115
# Reset the history message of web_agent.
10621116
self.web_agent.reset()
1063-
resp = self.web_agent.step(message)
1117+
resp = await self.web_agent.astep(message)
10641118

10651119
resp_content = resp.msgs[0].content
10661120

@@ -1196,43 +1250,29 @@ def _fix_action_code(action_code: str) -> str:
11961250
f"correct identifier.",
11971251
)
11981252

1199-
def _get_final_answer(self, task_prompt: str) -> str:
1200-
r"""Get the final answer based on the task prompt and current browser
1201-
state. It is used when the agent thinks that the task can be completed
1202-
without any further action, and answer can be directly found in the
1203-
current viewport.
1204-
"""
1205-
1206-
prompt = GET_FINAL_ANSWER_PROMPT_TEMPLATE.format(
1207-
history=self.history, task_prompt=task_prompt
1208-
)
1209-
1210-
message = BaseMessage.make_user_message(
1211-
role_name='user',
1212-
content=prompt,
1253+
async def _async_get_final_answer(self, task_prompt: str) -> str:
1254+
r"""Generate the final answer based on the task prompt."""
1255+
final_answer_prompt = GET_FINAL_ANSWER_PROMPT_TEMPLATE.format(
1256+
task_prompt=task_prompt, history=self.history
12131257
)
1258+
response = await self.planning_agent.astep(final_answer_prompt)
1259+
if response.msgs is None or len(response.msgs) == 0:
1260+
raise RuntimeError("Got empty final answer from planning agent.")
1261+
return response.msgs[0].content
12141262

1215-
resp = self.web_agent.step(message)
1216-
return resp.msgs[0].content
1217-
1218-
def _task_planning(self, task_prompt: str, start_url: str) -> str:
1219-
r"""Plan the task based on the given task prompt."""
1220-
1221-
# Here are the available browser functions we can
1222-
# use: {AVAILABLE_ACTIONS_PROMPT}
1223-
1263+
async def _async_task_planning(
1264+
self, task_prompt: str, start_url: str
1265+
) -> str:
1266+
r"""Generate a detailed plan for the given task."""
12241267
planning_prompt = TASK_PLANNING_PROMPT_TEMPLATE.format(
12251268
task_prompt=task_prompt, start_url=start_url
12261269
)
1270+
response = await self.planning_agent.astep(planning_prompt)
1271+
if response.msgs is None or len(response.msgs) == 0:
1272+
raise RuntimeError("Got empty plan from planning agent.")
1273+
return response.msgs[0].content
12271274

1228-
message = BaseMessage.make_user_message(
1229-
role_name='user', content=planning_prompt
1230-
)
1231-
1232-
resp = self.planning_agent.step(message)
1233-
return resp.msgs[0].content
1234-
1235-
def _task_replanning(
1275+
async def _async_task_replanning(
12361276
self, task_prompt: str, detailed_plan: str
12371277
) -> Tuple[bool, str]:
12381278
r"""Replan the task based on the given task prompt.
@@ -1252,12 +1292,11 @@ def _task_replanning(
12521292
replanning_prompt = TASK_REPLANNING_PROMPT_TEMPLATE.format(
12531293
task_prompt=task_prompt,
12541294
detailed_plan=detailed_plan,
1255-
history_window=self.history_window,
12561295
history=self.history[-self.history_window :],
12571296
)
12581297
# Reset the history message of planning_agent.
12591298
self.planning_agent.reset()
1260-
resp = self.planning_agent.step(replanning_prompt)
1299+
resp = await self.planning_agent.astep(replanning_prompt)
12611300
resp_dict = _parse_json_output(resp.msgs[0].content, logger)
12621301

12631302
if_need_replan = resp_dict.get("if_need_replan", False)
@@ -1287,7 +1326,7 @@ async def browse_url(
12871326

12881327
self._reset()
12891328
task_completed = False
1290-
detailed_plan = self._task_planning(task_prompt, start_url)
1329+
detailed_plan = await self._async_task_planning(task_prompt, start_url)
12911330
logger.debug(f"Detailed plan: {detailed_plan}")
12921331

12931332
await self.browser.async_init()
@@ -1331,7 +1370,11 @@ async def browse_url(
13311370
self.history.append(trajectory_info)
13321371

13331372
# replan the task if necessary
1334-
if_need_replan, replanned_schema = self._task_replanning(
1373+
(
1374+
if_need_replan,
1375+
replanned_schema,
1376+
# ruff: noqa: E501
1377+
) = await self._async_task_replanning(
13351378
task_prompt, detailed_plan
13361379
)
13371380
if if_need_replan:
@@ -1343,11 +1386,11 @@ async def browse_url(
13431386
The task is not completed within the round limit. Please check
13441387
the last round {self.history_window} information to see if
13451388
there is any useful information:
1346-
<history>{self.history[-self.history_window :]}</history>
1389+
<history>{self.history[-self.history_window:]}</history>
13471390
"""
13481391

13491392
else:
1350-
simulation_result = self._get_final_answer(task_prompt)
1393+
simulation_result = await self._async_get_final_answer(task_prompt)
13511394

13521395
await self.browser.close()
13531396
return simulation_result

0 commit comments

Comments
 (0)