make stricter completion condition for extract (#151)

anishk23733 · web-flow · commit 3e3749529d22 · 2024-10-31T22:02:58.000-07:00
* make stricter completion condition for extract

* small prompt update

* add github stars eval

* small eval change

* remove unnecessary vars

* fix nits
diff --git a/evals/index.eval.ts b/evals/index.eval.ts
@@ -390,6 +390,63 @@ const homedepot = async () => {
   }
 };
 
+const extract_github_stars = async () => {
+  const logger = new EvalLogger();
+
+  const stagehand = new Stagehand({
+    env,
+    verbose: 2,
+    headless: process.env.HEADLESS !== "false",
+    logger: (message: { category?: string; message: string }) => {
+      logger.log(message.message);
+    },
+  });
+
+  logger.init(stagehand);
+
+  const { debugUrl, sessionUrl } = await stagehand.init();
+
+  try {
+    await stagehand.page.goto("https://github.com/facebook/react");
+
+    const { stars } = await stagehand.extract({
+      instruction: "Extract the number of stars for the project",
+      schema: z.object({
+        stars: z.number().describe("the number of stars for the project"),
+      }),
+      modelName: "gpt-4o-2024-08-06",
+    });
+
+    const expectedStarsString = await stagehand.page
+      .locator("#repo-stars-counter-star")
+      .first()
+      .innerHTML();
+
+    const expectedStars = expectedStarsString.toLowerCase().endsWith('k') 
+      ? parseFloat(expectedStarsString.slice(0, -1)) * 1000
+      : parseFloat(expectedStarsString);
+
+    await stagehand.context.close().catch(() => {});
+    return {
+      _success: stars === expectedStars,
+      stars,
+      debugUrl,
+      sessionUrl,
+      logs: logger.getLogs(),
+    };
+  } catch (error) {
+    console.error("Error or timeout occurred:", error);
+    await stagehand.context.close().catch(() => {});
+    return {
+      _success: false,
+      error: JSON.parse(JSON.stringify(error, null, 2)),
+      debugUrl,
+      sessionUrl,
+      logs: logger.getLogs(),
+    };
+  }
+};
+
 const extract_collaborators_from_github_repository = async () => {
   const logger = new EvalLogger();
 
@@ -1145,6 +1202,7 @@ const tasks = {
   peeler_complex,
   wikipedia,
   simple_google_search,
+  extract_github_stars,
   extract_collaborators_from_github_repository,
   extract_last_twenty_github_commits,
   costar,
@@ -1194,6 +1252,7 @@ const testcases = [
   },
   { input: { name: "peeler_complex" } },
   { input: { name: "simple_google_search" } },
+  { input: { name: "extract_github_stars" } },
   {
     input: {
       name: "extract_collaborators_from_github_repository",
diff --git a/lib/prompt.ts b/lib/prompt.ts
@@ -252,8 +252,10 @@ const metadataSystemPrompt = `You are an AI assistant tasked with evaluating the
 Analyze the extraction response and determine if the task is completed or if more information is needed.
 
 Strictly abide by the following criteria:
-1. If you are certain that the instruction is completed, set the completion status to true, even if there are still chunks left.
-2. If there could still be more information to extract and there are still chunks left, set the completion status to false.`;
+1. Once the instruction has been satisfied by the current extraction response, ALWAYS set completion status to true and stop processing, regardless of remaining chunks.
+2. Only set completion status to false if BOTH of these conditions are true:
+   - The instruction has not been satisfied yet
+   - There are still chunks left to process (chunksTotal > chunksSeen)`;
 
 export function buildMetadataSystemPrompt() {
   return {
@@ -272,8 +274,8 @@ export function buildMetadataPrompt(
     role: "user",
     content: `Instruction: ${instruction}
 Extracted content: ${JSON.stringify(extractionResponse, null, 2)}
-Chunks seen: ${chunksSeen}
-Chunks total: ${chunksTotal}`,
+chunksSeen: ${chunksSeen}
+chunksTotal: ${chunksTotal}`,
   };
 }