Skip to content

Commit e43c9fe

Browse files
authored
[feat]: adding kimi-k2 for evals (#885)
# why To test the best models with stagehand # what changed changed the way we handle groq models for evals + preview models in the groq client. added model to list. # test plan
1 parent 87e09c6 commit e43c9fe

File tree

3 files changed

+10
-6
lines changed

3 files changed

+10
-6
lines changed

evals/index.eval.ts

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -335,13 +335,12 @@ const generateFilteredTestcases = (): Testcase[] => {
335335
llmClient = new AISdkClient({
336336
model: wrapAISDKModel(anthropic(input.modelName)),
337337
});
338-
} else if (input.modelName.includes("groq")) {
338+
} else if (
339+
input.modelName.includes("groq") ||
340+
input.modelName.includes("kimi")
341+
) {
339342
llmClient = new AISdkClient({
340-
model: wrapAISDKModel(
341-
groq(
342-
input.modelName.substring(input.modelName.indexOf("/") + 1),
343-
),
344-
),
343+
model: wrapAISDKModel(groq(input.modelName)),
345344
});
346345
} else if (input.modelName.includes("cerebras")) {
347346
llmClient = new AISdkClient({

evals/initStagehand.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,10 @@ export const initStagehand = async ({
9191
logger.init(stagehand);
9292

9393
const { debugUrl, sessionUrl } = await stagehand.init();
94+
95+
// Set navigation timeout to 60 seconds for evaluations
96+
stagehand.context.setDefaultNavigationTimeout(60_000);
97+
9498
return {
9599
stagehand,
96100
stagehandConfig: config,

lib/llm/LLMProvider.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ const modelToProviderMap: { [key in AvailableModel]: ModelProvider } = {
8484
"cerebras-llama-3.1-8b": "cerebras",
8585
"groq-llama-3.3-70b-versatile": "groq",
8686
"groq-llama-3.3-70b-specdec": "groq",
87+
"moonshotai/kimi-k2-instruct": "groq",
8788
"gemini-1.5-flash": "google",
8889
"gemini-1.5-pro": "google",
8990
"gemini-1.5-flash-8b": "google",

0 commit comments

Comments
 (0)