Skip to content

Commit d7d4f59

Browse files
authored
add caching to actions (by sub-action) + add variables to act (#171)
* add caching on the llm provider level * add sub action caching * action cache hit * remove pnpm lock * revert readme * beep boop * add caching on the llm provider level * remove all of playground * default back to env for browserbase args * leftover * leftover * fix eval * fix the bug in prev commit * update how eval looks * move act to handlers folder * rename * add new file * update vision for less error (increase timeout + change error to warning) * clean up the bundle script npm script
1 parent 703636b commit d7d4f59

21 files changed

+2156
-1145
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ evals/**/public
1313
lib/dom/bundle.js
1414
evals/public
1515
*.tgz
16+
evals/playground.ts

evals/index.eval.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,10 @@ const peeler_complex = async () => {
269269
await stagehand.page.goto(`https://chefstoys.com/`, { timeout: 60000 });
270270

271271
await stagehand.act({
272-
action: "search for peelers",
272+
action: "search for %search_query%",
273+
variables: {
274+
search_query: "peeler",
275+
},
273276
});
274277

275278
await stagehand.act({

evals/playground.ts

Lines changed: 0 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -1,126 +0,0 @@
1-
import { Stagehand } from "../lib";
2-
import { z } from "zod";
3-
import { EvalLogger } from "./utils";
4-
5-
// eval failing
6-
const homedepot = async () => {
7-
const stagehand = new Stagehand({
8-
env: "LOCAL",
9-
verbose: 1,
10-
debugDom: true,
11-
headless: process.env.HEADLESS !== "false",
12-
});
13-
14-
await stagehand.init();
15-
16-
try {
17-
await stagehand.page.goto("https://www.homedepot.com/");
18-
19-
await stagehand.act({ action: "search for gas grills" });
20-
21-
await stagehand.act({ action: "click on the first gas grill" });
22-
23-
await stagehand.act({ action: "click on the Product Details" });
24-
25-
await stagehand.act({ action: "find the Primary Burner BTU" });
26-
27-
const productSpecs = await stagehand.extract({
28-
instruction: "Extract the Primary Burner BTU of the product",
29-
schema: z.object({
30-
productSpecs: z
31-
.array(
32-
z.object({
33-
burnerBTU: z.string().describe("Primary Burner BTU"),
34-
}),
35-
)
36-
.describe("Gas grill Primary Burner BTU"),
37-
}),
38-
modelName: "gpt-4o-2024-08-06",
39-
});
40-
console.log("The gas grill primary burner BTU is:", productSpecs);
41-
42-
if (
43-
!productSpecs ||
44-
!productSpecs.productSpecs ||
45-
productSpecs.productSpecs.length === 0
46-
) {
47-
return false;
48-
}
49-
50-
return true;
51-
} catch (error) {
52-
console.error(`Error in homedepot function: ${error.message}`);
53-
return false;
54-
} finally {
55-
await stagehand.context.close();
56-
}
57-
};
58-
59-
const vanta = async () => {
60-
const logger = new EvalLogger();
61-
62-
const stagehand = new Stagehand({
63-
env: "LOCAL",
64-
headless: process.env.HEADLESS !== "false",
65-
logger: (message: any) => {
66-
logger.log(message);
67-
},
68-
verbose: 2,
69-
});
70-
71-
logger.init(stagehand);
72-
73-
const { debugUrl, sessionUrl } = await stagehand.init();
74-
75-
await stagehand.page.goto("https://www.vanta.com/");
76-
77-
const observations = await stagehand.observe({
78-
instruction: "find the text for the request demo button",
79-
});
80-
81-
console.log("Observations:", observations);
82-
83-
if (observations.length === 0) {
84-
await stagehand.context.close();
85-
return {
86-
_success: false,
87-
observations,
88-
debugUrl,
89-
sessionUrl,
90-
logs: logger.getLogs(),
91-
};
92-
}
93-
94-
const observationResult = await stagehand.page
95-
.locator(observations[0].selector)
96-
.first()
97-
.innerHTML();
98-
99-
const expectedLocator = `body > div.page-wrapper > div.nav_component > div.nav_element.w-nav > div.padding-global > div > div > nav > div.nav_cta-wrapper.is-new > a.nav_cta-button-desktop.is-smaller.w-button`;
100-
101-
const expectedResult = await stagehand.page
102-
.locator(expectedLocator)
103-
.first()
104-
.innerHTML();
105-
106-
await stagehand.context.close();
107-
108-
return {
109-
_success: observationResult == expectedResult,
110-
expected: expectedResult,
111-
actual: observationResult,
112-
debugUrl,
113-
sessionUrl,
114-
logs: logger.getLogs(),
115-
};
116-
};
117-
118-
async function main() {
119-
// const homedepotResult = await homedepot();
120-
const vantaResult = await vanta();
121-
122-
// console.log("Result:", homedepotResult);
123-
console.log("Result:", vantaResult);
124-
}
125-
126-
main().catch(console.error);

examples/example.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ async function example() {
2121
});
2222
console.log(`Our favorite contributor is ${contributor.username}`);
2323
}
24+
2425
(async () => {
2526
await example();
2627
})();

lib/cache/ActionCache.ts

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import { BaseCache, CacheEntry } from "./BaseCache";
2+
3+
export interface PlaywrightCommand {
4+
method: string;
5+
args: string[];
6+
}
7+
8+
export interface ActionEntry extends CacheEntry {
9+
data: {
10+
playwrightCommand: PlaywrightCommand;
11+
componentString: string;
12+
xpaths: string[];
13+
newStepString: string;
14+
completed: boolean;
15+
previousSelectors: string[];
16+
action: string;
17+
};
18+
}
19+
20+
/**
21+
* ActionCache handles logging and retrieving actions along with their Playwright commands.
22+
*/
23+
export class ActionCache extends BaseCache<ActionEntry> {
24+
constructor(
25+
logger: (message: {
26+
category?: string;
27+
message: string;
28+
level?: number;
29+
}) => void,
30+
cacheDir?: string,
31+
cacheFile?: string,
32+
) {
33+
super(logger, cacheDir, cacheFile || "action_cache.json");
34+
}
35+
36+
public async addActionStep({
37+
url,
38+
action,
39+
previousSelectors,
40+
playwrightCommand,
41+
componentString,
42+
xpaths,
43+
newStepString,
44+
completed,
45+
requestId,
46+
}: {
47+
url: string;
48+
action: string;
49+
previousSelectors: string[];
50+
playwrightCommand: PlaywrightCommand;
51+
componentString: string;
52+
requestId: string;
53+
xpaths: string[];
54+
newStepString: string;
55+
completed: boolean;
56+
}): Promise<void> {
57+
this.logger({
58+
category: "action_cache",
59+
message: `Adding action step to cache: ${action}, requestId: ${requestId}, url: ${url}, previousSelectors: ${previousSelectors}`,
60+
level: 1,
61+
});
62+
63+
await this.set(
64+
{ url, action, previousSelectors },
65+
{
66+
playwrightCommand,
67+
componentString,
68+
xpaths,
69+
newStepString,
70+
completed,
71+
previousSelectors,
72+
action,
73+
},
74+
requestId,
75+
);
76+
}
77+
78+
/**
79+
* Retrieves all actions for a specific trajectory.
80+
* @param trajectoryId - Unique identifier for the trajectory.
81+
* @param requestId - The identifier for the current request.
82+
* @returns An array of TrajectoryEntry objects or null if not found.
83+
*/
84+
public async getActionStep({
85+
url,
86+
action,
87+
previousSelectors,
88+
requestId,
89+
}: {
90+
url: string;
91+
action: string;
92+
previousSelectors: string[];
93+
requestId: string;
94+
}): Promise<ActionEntry["data"] | null> {
95+
const data = await super.get({ url, action, previousSelectors }, requestId);
96+
if (!data) {
97+
return null;
98+
}
99+
100+
return data;
101+
}
102+
103+
public async removeActionStep(cacheHashObj: {
104+
url: string;
105+
action: string;
106+
previousSelectors: string[];
107+
requestId: string;
108+
}): Promise<void> {
109+
await super.delete(cacheHashObj);
110+
}
111+
112+
/**
113+
* Clears all actions for a specific trajectory.
114+
* @param trajectoryId - Unique identifier for the trajectory.
115+
* @param requestId - The identifier for the current request.
116+
*/
117+
public async clearAction(requestId: string): Promise<void> {
118+
await super.deleteCacheForRequestId(requestId);
119+
this.logger({
120+
category: "action_cache",
121+
message: `Cleared action for ID: ${requestId}`,
122+
level: 1,
123+
});
124+
}
125+
126+
/**
127+
* Resets the entire action cache.
128+
*/
129+
public async resetCache(): Promise<void> {
130+
await super.resetCache();
131+
this.logger({
132+
category: "action_cache",
133+
message: "Action cache has been reset.",
134+
level: 1,
135+
});
136+
}
137+
}

0 commit comments

Comments
 (0)