fix chat completion not working for custom llm clients (#618)

kamath · web-flow · commit 1e49dee6fc81 · 2025-03-30T15:43:31.000-07:00
* fix chat completion not working for custom llm clients

* throw stagehandevalerror
diff --git a/evals/evals.config.json b/evals/evals.config.json
@@ -85,7 +85,14 @@
       "name": "wichita",
       "categories": ["combination", "regression_dom_extract"]
     },
-
+    {
+      "name": "hn_aisdk",
+      "categories": ["combination", "regression_dom_extract"]
+    },
+    {
+      "name": "hn_langchain",
+      "categories": ["combination", "regression_dom_extract"]
+    },
     {
       "name": "apple",
       "categories": ["experimental"]
diff --git a/evals/initStagehand.ts b/evals/initStagehand.ts
@@ -11,8 +11,15 @@
  */
 
 import { enableCaching, env } from "./env";
-import { AvailableModel, ConstructorParams, LogLine, Stagehand } from "@/dist";
+import {
+  AvailableModel,
+  ConstructorParams,
+  LLMClient,
+  LogLine,
+  Stagehand,
+} from "@/dist";
 import { EvalLogger } from "./logger";
+import { StagehandEvalError } from "@/types/stagehandErrors";
 
 /**
  * StagehandConfig:
@@ -51,26 +58,37 @@ const StagehandConfig = {
  * - initResponse: Any response data returned by Stagehand initialization
  */
 export const initStagehand = async ({
+  llmClient,
   modelName,
   domSettleTimeoutMs,
   logger,
   configOverrides,
   actTimeoutMs,
 }: {
-  modelName: AvailableModel;
+  llmClient?: LLMClient;
+  modelName?: AvailableModel;
   domSettleTimeoutMs?: number;
   logger: EvalLogger;
   configOverrides?: Partial<ConstructorParams>;
   actTimeoutMs?: number;
 }) => {
+  if (llmClient && modelName) {
+    throw new StagehandEvalError("Cannot provide both llmClient and modelName");
+  }
+
+  if (!llmClient && !modelName) {
+    throw new StagehandEvalError("Must provide either llmClient or modelName");
+  }
+
   let chosenApiKey: string | undefined = process.env.OPENAI_API_KEY;
-  if (modelName.startsWith("claude")) {
+  if (modelName?.startsWith("claude")) {
     chosenApiKey = process.env.ANTHROPIC_API_KEY;
   }
 
   const config = {
     ...StagehandConfig,
     modelName,
+    llmClient,
     ...(domSettleTimeoutMs && { domSettleTimeoutMs }),
     modelClientOptions: {
       apiKey: chosenApiKey,
diff --git a/evals/tasks/hn_aisdk.ts b/evals/tasks/hn_aisdk.ts
@@ -0,0 +1,113 @@
+import { EvalFunction } from "@/types/evals";
+import { initStagehand } from "@/evals/initStagehand";
+import { z } from "zod";
+import { openai } from "@ai-sdk/openai/dist";
+import { AISdkClient } from "@/examples/external_clients/aisdk";
+
+export const hn_aisdk: EvalFunction = async ({ logger }) => {
+  const { stagehand, initResponse } = await initStagehand({
+    logger,
+    llmClient: new AISdkClient({
+      model: openai("gpt-4o-mini"),
+    }),
+  });
+
+  const { debugUrl, sessionUrl } = initResponse;
+
+  await stagehand.page.goto("https://news.ycombinator.com");
+
+  let { story } = await stagehand.page.extract({
+    schema: z.object({
+      story: z.string().describe("the title of the top story on the page"),
+    }),
+  });
+  // remove the (url) part of the story title
+  story = story.split(" (")[0];
+
+  const expectedStoryElement = await stagehand.page.$(
+    "xpath=/html/body/center/table/tbody/tr[3]/td/table/tbody/tr[1]/td[3]/span/a",
+  );
+  // remove the (url) part of the story title
+  const expectedStory = (await expectedStoryElement?.textContent())?.split(
+    " (",
+  )?.[0];
+
+  if (!expectedStory) {
+    logger.error({
+      message: "Could not find expected story element",
+      level: 0,
+    });
+    return {
+      _success: false,
+      error: "Could not find expected story element",
+      debugUrl,
+      sessionUrl,
+      logs: logger.getLogs(),
+    };
+  }
+
+  if (story !== expectedStory) {
+    logger.error({
+      message: "Extracted story does not match expected story",
+      level: 0,
+      auxiliary: {
+        expected: {
+          value: expectedStory,
+          type: "string",
+        },
+        actual: {
+          value: story,
+          type: "string",
+        },
+      },
+    });
+    return {
+      _success: false,
+      error: "Extracted story does not match expected story",
+      expectedStory,
+      actualStory: story,
+      debugUrl,
+      sessionUrl,
+      logs: logger.getLogs(),
+    };
+  }
+
+  await stagehand.page.act("Click on the 'new' tab");
+
+  if (stagehand.page.url() !== "https://news.ycombinator.com/newest") {
+    logger.error({
+      message: "Page did not navigate to the 'new' tab",
+      level: 0,
+      auxiliary: {
+        expected: {
+          value: "https://news.ycombinator.com/newest",
+          type: "string",
+        },
+        actual: {
+          value: stagehand.page.url(),
+          type: "string",
+        },
+      },
+    });
+    return {
+      _success: false,
+      error: "Page did not navigate to the 'new' tab",
+      expectedUrl: "https://news.ycombinator.com/newest",
+      actualUrl: stagehand.page.url(),
+      debugUrl,
+      sessionUrl,
+      logs: logger.getLogs(),
+    };
+  }
+
+  await stagehand.close();
+
+  return {
+    _success: true,
+    expectedStory,
+    actualStory: story,
+    debugUrl,
+    sessionUrl,
+    logs: logger.getLogs(),
+  };
+};
diff --git a/evals/tasks/hn_langchain.ts b/evals/tasks/hn_langchain.ts
@@ -0,0 +1,115 @@
+import { EvalFunction } from "@/types/evals";
+import { initStagehand } from "@/evals/initStagehand";
+import { z } from "zod";
+import { LangchainClient } from "@/examples/external_clients/langchain";
+import { ChatOpenAI } from "@langchain/openai";
+
+export const hn_langchain: EvalFunction = async ({ logger }) => {
+  const { stagehand, initResponse } = await initStagehand({
+    logger,
+    llmClient: new LangchainClient(
+      new ChatOpenAI({
+        model: "gpt-4o",
+      }),
+    ),
+  });
+
+  const { debugUrl, sessionUrl } = initResponse;
+
+  await stagehand.page.goto("https://news.ycombinator.com");
+
+  let { story } = await stagehand.page.extract({
+    schema: z.object({
+      story: z.string().describe("the title of the top story on the page"),
+    }),
+  });
+  // remove the (url) part of the story title
+  story = story.split(" (")[0];
+
+  const expectedStoryElement = await stagehand.page.$(
+    "xpath=/html/body/center/table/tbody/tr[3]/td/table/tbody/tr[1]/td[3]/span/a",
+  );
+  // remove the (url) part of the story title
+  const expectedStory = (await expectedStoryElement?.textContent())?.split(
+    " (",
+  )?.[0];
+
+  if (!expectedStory) {
+    logger.error({
+      message: "Could not find expected story element",
+      level: 0,
+    });
+    return {
+      _success: false,
+      error: "Could not find expected story element",
+      debugUrl,
+      sessionUrl,
+      logs: logger.getLogs(),
+    };
+  }
+
+  if (story !== expectedStory) {
+    logger.error({
+      message: "Extracted story does not match expected story",
+      level: 0,
+      auxiliary: {
+        expected: {
+          value: expectedStory,
+          type: "string",
+        },
+        actual: {
+          value: story,
+          type: "string",
+        },
+      },
+    });
+    return {
+      _success: false,
+      error: "Extracted story does not match expected story",
+      expectedStory,
+      actualStory: story,
+      debugUrl,
+      sessionUrl,
+      logs: logger.getLogs(),
+    };
+  }
+
+  await stagehand.page.act("Click on the 'new' tab");
+
+  if (stagehand.page.url() !== "https://news.ycombinator.com/newest") {
+    logger.error({
+      message: "Page did not navigate to the 'new' tab",
+      level: 0,
+      auxiliary: {
+        expected: {
+          value: "https://news.ycombinator.com/newest",
+          type: "string",
+        },
+        actual: {
+          value: stagehand.page.url(),
+          type: "string",
+        },
+      },
+    });
+    return {
+      _success: false,
+      error: "Page did not navigate to the 'new' tab",
+      expectedUrl: "https://news.ycombinator.com/newest",
+      actualUrl: stagehand.page.url(),
+      debugUrl,
+      sessionUrl,
+      logs: logger.getLogs(),
+    };
+  }
+
+  await stagehand.close();
+
+  return {
+    _success: true,
+    expectedStory,
+    actualStory: story,
+    debugUrl,
+    sessionUrl,
+    logs: logger.getLogs(),
+  };
+};
diff --git a/examples/ai_sdk_example.ts b/examples/ai_sdk_example.ts
@@ -1,36 +1,29 @@
-import { google } from "@ai-sdk/google";
-import { z } from "zod";
+import { openai } from "@ai-sdk/openai";
 import { Stagehand } from "@/dist";
 import { AISdkClient } from "./external_clients/aisdk";
 import StagehandConfig from "@/stagehand.config";
+import { z } from "zod";
 
 async function example() {
   const stagehand = new Stagehand({
     ...StagehandConfig,
     llmClient: new AISdkClient({
-      model: google("gemini-1.5-flash-latest"),
+      model: openai("gpt-4o"),
     }),
   });
 
   await stagehand.init();
   await stagehand.page.goto("https://news.ycombinator.com");
 
-  const headlines = await stagehand.page.extract({
-    instruction: "Extract only 3 stories from the Hacker News homepage.",
+  const { story } = await stagehand.page.extract({
     schema: z.object({
-      stories: z
-        .array(
-          z.object({
-            title: z.string(),
-            url: z.string(),
-            points: z.number(),
-          }),
-        )
-        .length(3),
+      story: z.string().describe("the top story on the page"),
     }),
   });
 
-  console.log(headlines);
+  console.log("The top story is:", story);
+
+  await stagehand.page.act("click the first story");
 
   await stagehand.close();
 }
diff --git a/examples/external_clients/aisdk.ts b/examples/external_clients/aisdk.ts
@@ -10,8 +10,8 @@ import {
   LanguageModel,
   TextPart,
 } from "ai";
-import { ChatCompletion } from "openai/resources/chat/completions";
 import { CreateChatCompletionOptions, LLMClient, AvailableModel } from "@/dist";
+import { ChatCompletion } from "openai/resources";
 
 export class AISdkClient extends LLMClient {
   public type = "aisdk" as const;
@@ -85,7 +85,14 @@ export class AISdkClient extends LLMClient {
         schema: options.response_model.schema,
       });
 
-      return response.object;
+      return {
+        data: response.object,
+        usage: {
+          prompt_tokens: response.usage.promptTokens ?? 0,
+          completion_tokens: response.usage.completionTokens ?? 0,
+          total_tokens: response.usage.totalTokens ?? 0,
+        },
+      } as T;
     }
 
     const tools: Record<string, CoreTool> = {};
@@ -103,6 +110,13 @@ export class AISdkClient extends LLMClient {
       tools,
     });
 
-    return response as T;
+    return {
+      data: response.text,
+      usage: {
+        prompt_tokens: response.usage.promptTokens ?? 0,
+        completion_tokens: response.usage.completionTokens ?? 0,
+        total_tokens: response.usage.totalTokens ?? 0,
+      },
+    } as T;
   }
 }
diff --git a/examples/external_clients/langchain.ts b/examples/external_clients/langchain.ts
diff --git a/examples/langchain.ts b/examples/langchain.ts