diff --git a/.changeset/solid-emus-sin.md b/.changeset/solid-emus-sin.md new file mode 100644 index 000000000..9b5343e32 --- /dev/null +++ b/.changeset/solid-emus-sin.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +Added support for new Anthropic computer-use models. Also added support for passing in new Anthropic models into Stagehand via the legacy (non AI SDK) format until we fully migrate. diff --git a/lib/agent/AgentProvider.ts b/lib/agent/AgentProvider.ts index cfb9242f9..5f7624a79 100644 --- a/lib/agent/AgentProvider.ts +++ b/lib/agent/AgentProvider.ts @@ -10,10 +10,19 @@ import { // Map model names to their provider types const modelToAgentProviderMap: Record = { + // OpenAI models "computer-use-preview": "openai", "computer-use-preview-2025-03-11": "openai", - "claude-3-7-sonnet-latest": "anthropic", + // Anthropic models + "claude-sonnet-4-0": "anthropic", "claude-sonnet-4-20250514": "anthropic", + "claude-opus-4-0": "anthropic", + "claude-opus-4-20250514": "anthropic", + "claude-3-7-sonnet-latest": "anthropic", + "claude-3-7-sonnet-20250219": "anthropic", + "claude-3-5-sonnet-latest": "anthropic", + "claude-3-5-sonnet-20241022": "anthropic", + "claude-3-5-sonnet-20240620": "anthropic", }; /** diff --git a/lib/agent/AnthropicCUAClient.ts b/lib/agent/AnthropicCUAClient.ts index 65b6e2df0..64509c3a6 100644 --- a/lib/agent/AnthropicCUAClient.ts +++ b/lib/agent/AnthropicCUAClient.ts @@ -1,4 +1,4 @@ -import Anthropic from "@anthropic-ai/sdk"; +import Anthropic, { ClientOptions } from "@anthropic-ai/sdk"; import { LogLine } from "@/types/log"; import { AgentAction, @@ -35,7 +35,7 @@ export class AnthropicCUAClient extends AgentClient { type: AgentType, modelName: string, userProvidedInstructions?: string, - clientOptions?: Record, + clientOptions?: ClientOptions, ) { super(type, modelName, userProvidedInstructions); @@ -44,21 +44,13 @@ export class AnthropicCUAClient extends AgentClient { (clientOptions?.apiKey as string) || process.env.ANTHROPIC_API_KEY || ""; this.baseURL = (clientOptions?.baseURL as string) || undefined; - // Get thinking budget if specified - if ( - clientOptions?.thinkingBudget && - typeof clientOptions.thinkingBudget === "number" - ) { - this.thinkingBudget = clientOptions.thinkingBudget; - } - // Store client options for reference this.clientOptions = { apiKey: this.apiKey, }; if (this.baseURL) { - this.clientOptions.baseUrl = this.baseURL; + this.clientOptions.baseURL = this.baseURL; } // Initialize the Anthropic client @@ -406,6 +398,11 @@ export class AnthropicCUAClient extends AgentClient { ? { type: "enabled" as const, budget_tokens: this.thinkingBudget } : undefined; + // Determine the appropriate computer type and beta flag based on model + const { computerType, betaFlag } = this.getComputerUseConfigForModel( + this.modelName, + ); + // Create the request parameters const requestParams: Record = { model: this.modelName, @@ -413,14 +410,14 @@ export class AnthropicCUAClient extends AgentClient { messages: messages, tools: [ { - type: "computer_20250124", // Use the latest version for Claude 3.7 Sonnet + type: computerType, name: "computer", display_width_px: this.currentViewport.width, display_height_px: this.currentViewport.height, display_number: 1, }, ], - betas: ["computer-use-2025-01-24"], + betas: [betaFlag], }; // Add system parameter if provided @@ -474,6 +471,43 @@ export class AnthropicCUAClient extends AgentClient { } } + /** + * Get the appropriate computer type and beta flag for a given model + */ + private getComputerUseConfigForModel(modelName: string): { + computerType: string; + betaFlag: string; + } { + // Claude 3.5 models use computer_20241022 with computer-use-2024-10-22 + if ( + modelName.includes("claude-3-5-sonnet") || + modelName.includes("claude-3-5") + ) { + return { + computerType: "computer_20241022", + betaFlag: "computer-use-2024-10-22", + }; + } + + // Claude 4 models and Claude Sonnet 3.7 use computer_20250124 with computer-use-2025-01-24 + if ( + modelName.includes("claude-4") || + modelName.includes("claude-3-7") || + modelName.includes("claude-3-7-sonnet") + ) { + return { + computerType: "computer_20250124", + betaFlag: "computer-use-2025-01-24", + }; + } + + // Default fallback for other models + return { + computerType: "computer_20250124", + betaFlag: "computer-use-2025-01-24", + }; + } + async takeAction( toolUseItems: ToolUseItem[], logger: (message: LogLine) => void, diff --git a/lib/agent/OpenAICUAClient.ts b/lib/agent/OpenAICUAClient.ts index f578751b8..062746c9f 100644 --- a/lib/agent/OpenAICUAClient.ts +++ b/lib/agent/OpenAICUAClient.ts @@ -1,17 +1,17 @@ -import OpenAI from "openai"; -import { LogLine } from "../../types/log"; import { AgentAction, + AgentExecutionOptions, AgentResult, AgentType, - AgentExecutionOptions, - ResponseInputItem, - ResponseItem, ComputerCallItem, FunctionCallItem, + ResponseInputItem, + ResponseItem, } from "@/types/agent"; -import { AgentClient } from "./AgentClient"; import { AgentScreenshotProviderError } from "@/types/stagehandErrors"; +import OpenAI, { ClientOptions } from "openai"; +import { LogLine } from "../../types/log"; +import { AgentClient } from "./AgentClient"; /** * Client for OpenAI's Computer Use Assistant API @@ -34,7 +34,7 @@ export class OpenAICUAClient extends AgentClient { type: AgentType, modelName: string, userProvidedInstructions?: string, - clientOptions?: Record, + clientOptions?: ClientOptions, ) { super(type, modelName, userProvidedInstructions); @@ -43,15 +43,21 @@ export class OpenAICUAClient extends AgentClient { (clientOptions?.apiKey as string) || process.env.OPENAI_API_KEY || ""; this.organization = (clientOptions?.organization as string) || process.env.OPENAI_ORG; + this.baseURL = (clientOptions?.baseURL as string) || undefined; // Get environment if specified - if ( - clientOptions?.environment && - typeof clientOptions.environment === "string" - ) { - this.environment = clientOptions.environment; + // Store client options for reference + this.clientOptions = { + apiKey: this.apiKey, + }; + + if (this.baseURL) { + this.clientOptions.baseURL = this.baseURL; } + // Initialize the OpenAI client + this.client = new OpenAI(this.clientOptions); + // Store client options for reference this.clientOptions = { apiKey: this.apiKey, diff --git a/lib/llm/LLMProvider.ts b/lib/llm/LLMProvider.ts index fc11c5753..a4529f4e9 100644 --- a/lib/llm/LLMProvider.ts +++ b/lib/llm/LLMProvider.ts @@ -1,8 +1,21 @@ +import { AISDKCustomProvider, AISDKProvider } from "@/types/llm"; import { UnsupportedAISDKModelProviderError, UnsupportedModelError, UnsupportedModelProviderError, } from "@/types/stagehandErrors"; +import { anthropic, createAnthropic } from "@ai-sdk/anthropic"; +import { azure, createAzure } from "@ai-sdk/azure"; +import { cerebras, createCerebras } from "@ai-sdk/cerebras"; +import { createDeepSeek, deepseek } from "@ai-sdk/deepseek"; +import { createGoogleGenerativeAI, google } from "@ai-sdk/google"; +import { createGroq, groq } from "@ai-sdk/groq"; +import { createMistral, mistral } from "@ai-sdk/mistral"; +import { createOpenAI, openai } from "@ai-sdk/openai"; +import { createPerplexity, perplexity } from "@ai-sdk/perplexity"; +import { createTogetherAI, togetherai } from "@ai-sdk/togetherai"; +import { createXai, xai } from "@ai-sdk/xai"; +import { ollama } from "ollama-ai-provider"; import { LogLine } from "../../types/log"; import { AvailableModel, @@ -17,19 +30,6 @@ import { GoogleClient } from "./GoogleClient"; import { GroqClient } from "./GroqClient"; import { LLMClient } from "./LLMClient"; import { OpenAIClient } from "./OpenAIClient"; -import { openai, createOpenAI } from "@ai-sdk/openai"; -import { anthropic, createAnthropic } from "@ai-sdk/anthropic"; -import { google, createGoogleGenerativeAI } from "@ai-sdk/google"; -import { xai, createXai } from "@ai-sdk/xai"; -import { azure, createAzure } from "@ai-sdk/azure"; -import { groq, createGroq } from "@ai-sdk/groq"; -import { cerebras, createCerebras } from "@ai-sdk/cerebras"; -import { togetherai, createTogetherAI } from "@ai-sdk/togetherai"; -import { mistral, createMistral } from "@ai-sdk/mistral"; -import { deepseek, createDeepSeek } from "@ai-sdk/deepseek"; -import { perplexity, createPerplexity } from "@ai-sdk/perplexity"; -import { ollama } from "ollama-ai-provider"; -import { AISDKProvider, AISDKCustomProvider } from "@/types/llm"; const AISDKProviders: Record = { openai, @@ -80,6 +80,10 @@ const modelToProviderMap: { [key in AvailableModel]: ModelProvider } = { "claude-3-5-sonnet-20241022": "anthropic", "claude-3-7-sonnet-20250219": "anthropic", "claude-3-7-sonnet-latest": "anthropic", + "claude-opus-4-0": "anthropic", + "claude-opus-4-20250514": "anthropic", + "claude-sonnet-4-0": "anthropic", + "claude-sonnet-4-20250514": "anthropic", "cerebras-llama-3.3-70b": "cerebras", "cerebras-llama-3.1-8b": "cerebras", "groq-llama-3.3-70b-versatile": "groq",