Skip to content

Commit 705d8d6

Browse files
Replace extract_research_reports with cloned static site (#415)
* replace extract_research_reports with extract_jstor_news * prettier
1 parent 4aa4813 commit 705d8d6

File tree

2 files changed

+20
-17
lines changed

2 files changed

+20
-17
lines changed

evals/evals.config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@
201201
"categories": ["text_extract"]
202202
},
203203
{
204-
"name": "extract_research_reports",
204+
"name": "extract_jstor_news",
205205
"categories": ["text_extract"]
206206
}
207207
]

evals/tasks/extract_research_reports.ts renamed to evals/tasks/extract_jstor_news.ts

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { EvalFunction } from "../../types/evals";
22
import { initStagehand } from "../initStagehand";
33
import { z } from "zod";
44

5-
export const extract_research_reports: EvalFunction = async ({
5+
export const extract_jstor_news: EvalFunction = async ({
66
modelName,
77
logger,
88
useTextExtract,
@@ -15,22 +15,22 @@ export const extract_research_reports: EvalFunction = async ({
1515
const { debugUrl, sessionUrl } = initResponse;
1616

1717
await stagehand.init();
18-
await stagehand.page.goto(
19-
"http://www.dsbd.gov.za/index.php/research-reports",
20-
{ waitUntil: "load" },
21-
);
18+
await stagehand.page.goto("http://jstor-eval.surge.sh", {
19+
waitUntil: "load",
20+
});
21+
await stagehand.page.act({ action: "close the cookie" });
2222

2323
const result = await stagehand.page.extract({
24-
instruction:
25-
"Extract ALL the research report names. Do not extract the names of the PDF attachments.",
24+
instruction: "Extract ALL the news report titles and their dates.",
2625
schema: z.object({
2726
reports: z.array(
2827
z.object({
2928
report_name: z
3029
.string()
31-
.describe(
32-
"The name or title of the research report. NOT the name of the PDF attachment.",
33-
),
30+
.describe("The name or title of the news report."),
31+
publish_date: z
32+
.string()
33+
.describe("The date the news report was published."),
3434
}),
3535
),
3636
}),
@@ -41,15 +41,16 @@ export const extract_research_reports: EvalFunction = async ({
4141
await stagehand.close();
4242

4343
const reports = result.reports;
44-
const expectedLength = 9;
44+
const expectedLength = 10;
4545

4646
const expectedFirstItem = {
47-
report_name:
48-
"Longitudinal Study on SMMEs and Co-operatives in South Africa and the study on the Eastern SeaBoard",
47+
report_name: "JSTOR retires Publisher Sales Service",
48+
publish_date: "December 9, 2024",
4949
};
5050

5151
const expectedLastItem = {
52-
report_name: "Research Agenda",
52+
report_name: "Path to Open announces 2024 titles",
53+
publish_date: "May 10, 2024",
5354
};
5455

5556
if (reports.length !== expectedLength) {
@@ -76,7 +77,8 @@ export const extract_research_reports: EvalFunction = async ({
7677
};
7778
}
7879
const firstItemMatches =
79-
reports[0].report_name === expectedFirstItem.report_name;
80+
reports[0].report_name === expectedFirstItem.report_name &&
81+
reports[0].publish_date === expectedFirstItem.publish_date;
8082

8183
if (!firstItemMatches) {
8284
logger.error({
@@ -103,7 +105,8 @@ export const extract_research_reports: EvalFunction = async ({
103105
}
104106

105107
const lastItemMatches =
106-
reports[reports.length - 1].report_name === expectedLastItem.report_name;
108+
reports[reports.length - 1].report_name === expectedLastItem.report_name &&
109+
reports[reports.length - 1].publish_date === expectedLastItem.publish_date;
107110

108111
if (!lastItemMatches) {
109112
logger.error({

0 commit comments

Comments
 (0)