Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions camel/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,7 @@ def __init__(
self._max_retries = max_retries
# Initialize logging configuration
self._log_enabled = (
os.environ.get("CAMEL_MODEL_LOG_ENABLED", "False").lower()
== "true"
os.environ.get("CAMEL_MODEL_LOG_ENABLED", "True").lower() == "true"
)
self._log_dir = os.environ.get("CAMEL_LOG_DIR", "camel_logs")

Expand Down
90 changes: 87 additions & 3 deletions camel/models/moonshot_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __init__(
model_type: Union[ModelType, str],
model_config_dict: Optional[Dict[str, Any]] = None,
api_key: Optional[str] = None,
url: Optional[str] = "https://api.moonshot.ai/v1",
url: Optional[str] = None,
token_counter: Optional[BaseTokenCounter] = None,
timeout: Optional[float] = None,
max_retries: int = 3,
Expand All @@ -93,7 +93,12 @@ def __init__(
if model_config_dict is None:
model_config_dict = MoonshotConfig().as_dict()
api_key = api_key or os.environ.get("MOONSHOT_API_KEY")
url = url or os.environ.get("MOONSHOT_API_BASE_URL")
# Preserve default URL if not provided
if url is None:
url = (
os.environ.get("MOONSHOT_API_BASE_URL")
or "https://api.moonshot.ai/v1"
)
timeout = timeout or float(os.environ.get("MODEL_TIMEOUT", 180))
super().__init__(
model_type=model_type,
Expand Down Expand Up @@ -130,14 +135,93 @@ def _prepare_request(
request_config = copy.deepcopy(self.model_config_dict)

if tools:
request_config["tools"] = tools
# Clean tools to remove null types (Moonshot API incompatibility)
cleaned_tools = self._clean_tool_schemas(tools)
request_config["tools"] = cleaned_tools
elif response_format:
# Use the same approach as DeepSeek for structured output
try_modify_message_with_format(messages[-1], response_format)
request_config["response_format"] = {"type": "json_object"}

return request_config

def _clean_tool_schemas(
self, tools: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
r"""Clean tool schemas to remove null types for Moonshot compatibility.

Moonshot API doesn't accept {"type": "null"} in anyOf schemas.
This method removes null type definitions from parameters.

Args:
tools (List[Dict[str, Any]]): Original tool schemas.

Returns:
List[Dict[str, Any]]: Cleaned tool schemas.
"""
import copy

def remove_null_from_schema(schema: Any) -> Any:
"""Recursively remove null types from schema."""
if isinstance(schema, dict):
# Create a copy to avoid modifying the original
result = {}

for key, value in schema.items():
if key == 'type' and isinstance(value, list):
# Handle type arrays like ["string", "null"]
filtered_types = [t for t in value if t != 'null']
if len(filtered_types) == 1:
# Single type remains, convert to string
result[key] = filtered_types[0]
elif len(filtered_types) > 1:
# Multiple types remain, keep as array
result[key] = filtered_types
else:
# All were null, use string as fallback
result[key] = 'string'
elif key == 'anyOf':
# Handle anyOf with null types
filtered = [
item
for item in value
if not (
isinstance(item, dict)
and item.get('type') == 'null'
)
]
if len(filtered) == 1:
# If only one type remains, flatten it
return remove_null_from_schema(filtered[0])
elif len(filtered) > 1:
result[key] = [
remove_null_from_schema(item)
for item in filtered
]
else:
# All were null, return string type as fallback
return {"type": "string"}
else:
# Recursively process other values
result[key] = remove_null_from_schema(value)

return result
elif isinstance(schema, list):
return [remove_null_from_schema(item) for item in schema]
else:
return schema

cleaned_tools = copy.deepcopy(tools)
for tool in cleaned_tools:
if 'function' in tool and 'parameters' in tool['function']:
params = tool['function']['parameters']
if 'properties' in params:
params['properties'] = remove_null_from_schema(
params['properties']
)

return cleaned_tools

@observe()
async def _arun(
self,
Expand Down
54 changes: 18 additions & 36 deletions camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ async def browser_get_page_snapshot(self) -> str:
async def browser_get_som_screenshot(
self,
read_image: bool = True,
instruction: Optional[str] = None,
instruction: str = "",
) -> str:
r"""Captures a screenshot with interactive elements highlighted.

Expand Down Expand Up @@ -645,10 +645,9 @@ async def browser_get_som_screenshot(
from PIL import Image

img = Image.open(file_path)
inst = instruction if instruction is not None else ""
message = BaseMessage.make_user_message(
role_name="User",
content=inst,
content=instruction,
image_list=[img],
)

Expand Down Expand Up @@ -722,50 +721,33 @@ async def browser_click(self, *, ref: str) -> Dict[str, Any]:
async def browser_type(
self,
*,
ref: Optional[str] = None,
text: Optional[str] = None,
inputs: Optional[List[Dict[str, str]]] = None,
ref: str,
text: str,
) -> Dict[str, Any]:
r"""Types text into one or more input elements on the page.

This method supports two modes:
1. Single input mode (backward compatible): Provide 'ref' and 'text'
2. Multiple inputs mode: Provide 'inputs' as a list of dictionaries
with 'ref' and 'text' keys
r"""Types text into an input element on the page.

Args:
ref (Optional[str]): The `ref` ID of the input element, from a
snapshot. Required when using single input mode.
text (Optional[str]): The text to type into the element. Required
when using single input mode.
inputs (Optional[List[Dict[str, str]]]): List of dictionaries,
each containing 'ref' and 'text' keys for typing into multiple
elements. Example: [{'ref': '1', 'text': 'username'},
{'ref': '2', 'text': 'password'}]
ref (str): The `ref` ID of the input element, from a snapshot.
text (str): The text to type into the element.

Returns:
Dict[str, Any]: A dictionary with the result of the action:
- "result" (str): Confirmation of the action.
- "result" (str): Confirmation message describing the action.
- "snapshot" (str): A textual snapshot of the page after
typing.
typing, showing the updated state of interactive elements.
- "tabs" (List[Dict]): Information about all open tabs.
- "current_tab" (int): Index of the active tab.
- "total_tabs" (int): Total number of open tabs.
- "details" (Dict[str, Any]): When using multiple inputs,
contains success/error status for each ref.
- "current_tab" (int): Index of the active tab (zero-based).

Example:
>>> browser_type(
... ref="3",
... text="hello@example.com"
... )
Typed text into element 3
"""
try:
ws_wrapper = await self._get_ws_wrapper()

if ref is not None and text is not None:
result = await ws_wrapper.type(ref, text)
elif inputs is not None:
result = await ws_wrapper.type_multiple(inputs)
else:
raise ValueError(
"Either provide 'ref' and 'text' for single input, "
"or 'inputs' for multiple inputs"
)
result = await ws_wrapper.type(ref, text)

tab_info = await ws_wrapper.get_tab_info()
result.update(
Expand Down
69 changes: 53 additions & 16 deletions camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -646,25 +646,43 @@ export class HybridBrowserSession {
// Handle multiple inputs if provided
if (inputs && inputs.length > 0) {
const results: Record<string, { success: boolean; error?: string }> = {};

for (const input of inputs) {
const singleResult = await this.performType(page, input.ref, input.text);
results[input.ref] = {
success: singleResult.success,
error: singleResult.error
};
}

// Check if all inputs were successful
const allSuccess = Object.values(results).every(r => r.success);
const errors = Object.entries(results)

// Check how many inputs were successful
const successfulRefs = Object.entries(results)
.filter(([_, r]) => r.success)
.map(([ref, _]) => ref);
const failedRefs = Object.entries(results)
.filter(([_, r]) => !r.success)
.map(([ref, r]) => `${ref}: ${r.error}`)
.join('; ');

.map(([ref, r]) => `${ref}: ${r.error}`);

const hasAnySuccess = successfulRefs.length > 0;
const allSuccess = failedRefs.length === 0;

// Build detailed error message
let errorMessage: string | undefined;
if (!allSuccess) {
const parts: string[] = [];
if (successfulRefs.length > 0) {
parts.push(`Successfully typed into: ${successfulRefs.join(', ')}`);
}
if (failedRefs.length > 0) {
parts.push(`Failed: ${failedRefs.join('; ')}`);
}
errorMessage = parts.join('. ');
}

// Return success if at least one input succeeded
return {
success: allSuccess,
error: allSuccess ? undefined : `Some inputs failed: ${errors}`,
success: hasAnySuccess,
error: errorMessage,
details: results
};
}
Expand Down Expand Up @@ -1137,27 +1155,46 @@ export class HybridBrowserSession {
const typeStart = Date.now();

const typeResult = await this.performType(page, action.ref, action.text, action.inputs);


// For single input mode: throw error if failed
// For multiple inputs mode: only throw if ALL inputs failed
if (!typeResult.success) {
throw new Error(`Type failed: ${typeResult.error}`);
// Check if this is multiple inputs mode
if (typeResult.details) {
const hasAnySuccess = Object.values(typeResult.details).some((r: any) => r.success);
if (!hasAnySuccess) {
// All inputs failed, throw error
throw new Error(`Type failed: ${typeResult.error}`);
}
// Some inputs succeeded, continue with partial success
} else {
// Single input mode failed, throw error
throw new Error(`Type failed: ${typeResult.error}`);
}
}

// Set custom message and details if multiple inputs were used
if (typeResult.details) {
const successCount = Object.values(typeResult.details).filter((r: any) => r.success).length;
const totalCount = Object.keys(typeResult.details).length;
customMessage = `Typed text into ${successCount}/${totalCount} elements`;
if (typeResult.error) {
// Partial success - include error in message
customMessage = `Typed text into ${successCount}/${totalCount} elements. ${typeResult.error}`;
} else {
// Full success
customMessage = `Typed text into ${successCount}/${totalCount} elements`;
}
actionDetails = typeResult.details;
}

// Capture diff snapshot if present
if (typeResult.diffSnapshot) {
if (!actionDetails) {
actionDetails = {};
}
actionDetails.diffSnapshot = typeResult.diffSnapshot;
}

actionExecutionTime = Date.now() - typeStart;
break;
}
Expand Down
3 changes: 2 additions & 1 deletion camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ class WebSocketBrowserServer {

async start() {
return new Promise((resolve, reject) => {
this.server = new WebSocket.Server({
this.server = new WebSocket.Server({
host: '127.0.0.1',
port: this.port,
maxPayload: 50 * 1024 * 1024 // 50MB limit instead of default 1MB
}, () => {
Expand Down
2 changes: 1 addition & 1 deletion camel/types/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ class ModelType(UnifiedModelType, Enum):
MOONSHOT_V1_8K = "moonshot-v1-8k"
MOONSHOT_V1_32K = "moonshot-v1-32k"
MOONSHOT_V1_128K = "moonshot-v1-128k"
MOONSHOT_KIMI_K2 = "kimi-k2-0711-preview"
MOONSHOT_KIMI_K2 = "kimi-latest"

# SiliconFlow models support tool calling
SILICONFLOW_DEEPSEEK_V2_5 = "deepseek-ai/DeepSeek-V2.5"
Expand Down
10 changes: 6 additions & 4 deletions examples/toolkits/hybrid_browser_toolkit_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@
logging.getLogger('camel.toolkits.hybrid_browser_toolkit').setLevel(
logging.DEBUG
)
USER_DATA_DIR = "User_Data"
USER_DATA_DIR = "/Users/puzhen/Desktop/pre/camel_project/camel/UserData"

model_backend = ModelFactory.create(
model_platform=ModelPlatformType.OPENAI,
model_type=ModelType.GPT_4O,
model_platform=ModelPlatformType.MOONSHOT,
model_type=ModelType.MOONSHOT_KIMI_K2,
url="https://api.moonshot.ai/v1", # Explicitly specify Moonshot API URL
model_config_dict={"temperature": 0.0, "top_p": 1},
)

Expand Down Expand Up @@ -67,7 +68,6 @@
"browser_type",
"browser_switch_tab",
"browser_enter",
# "browser_get_som_screenshot", # remove it to achieve faster operation
# "browser_press_key",
# "browser_console_view",
# "browser_console_exec",
Expand All @@ -86,6 +86,7 @@
print(f"Custom tools: {web_toolkit_custom.enabled_tools}")
# Use the custom toolkit for the actual task
agent = ChatAgent(
enable_tool_output_cache=True,
model=model_backend,
tools=[*web_toolkit_custom.get_tools()],
toolkits_to_register_agent=[web_toolkit_custom],
Expand All @@ -105,6 +106,7 @@


async def main() -> None:
# await web_toolkit_custom.browser_open()
try:
response = await agent.astep(TASK_PROMPT)
print("Task:", TASK_PROMPT)
Expand Down
Loading
Loading