Skip to content

Commit f884f42

Browse files
Saghennsarrazin
andauthored
Always close playwright page (#1171)
fix: always close playwright page Co-authored-by: Nathan Sarrazin <sarrazin.nathan@gmail.com>
1 parent 0bb6c27 commit f884f42

File tree

1 file changed

+34
-30
lines changed

1 file changed

+34
-30
lines changed

src/lib/server/websearch/scrape/scrape.ts

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -24,37 +24,41 @@ export const scrape = (maxCharsPerElem: number) =>
2424
export async function scrapeUrl(url: string, maxCharsPerElem: number) {
2525
const { res, page } = await loadPage(url);
2626

27-
if (!res) throw Error("Failed to load page");
27+
try {
28+
if (!res) throw Error("Failed to load page");
2829

29-
// Check if it's a non-html content type that we can handle directly
30-
// TODO: direct mappings to markdown can be added for markdown, csv and others
31-
const contentType = res.headers()["content-type"] ?? "";
32-
if (
33-
contentType.includes("text/plain") ||
34-
contentType.includes("text/markdown") ||
35-
contentType.includes("application/json") ||
36-
contentType.includes("application/xml") ||
37-
contentType.includes("text/csv")
38-
) {
39-
const title = await page.title();
40-
const content = await page.content();
41-
return {
42-
title,
43-
markdownTree: htmlToMarkdownTree(
30+
// Check if it's a non-html content type that we can handle directly
31+
// TODO: direct mappings to markdown can be added for markdown, csv and others
32+
const contentType = res.headers()["content-type"] ?? "";
33+
if (
34+
contentType.includes("text/plain") ||
35+
contentType.includes("text/markdown") ||
36+
contentType.includes("application/json") ||
37+
contentType.includes("application/xml") ||
38+
contentType.includes("text/csv")
39+
) {
40+
const title = await page.title();
41+
const content = await page.content();
42+
return {
4443
title,
45-
[{ tagName: "p", attributes: {}, content: [content] }],
46-
maxCharsPerElem
47-
),
48-
};
49-
}
44+
markdownTree: htmlToMarkdownTree(
45+
title,
46+
[{ tagName: "p", attributes: {}, content: [content] }],
47+
maxCharsPerElem
48+
),
49+
};
50+
}
5051

51-
return timeout(page.evaluate(spatialParser), 2000)
52-
.then(({ elements, ...parsed }) => ({
53-
...parsed,
54-
markdownTree: htmlToMarkdownTree(parsed.title, elements, maxCharsPerElem),
55-
}))
56-
.catch((cause) => {
57-
throw Error("Parsing failed", { cause });
58-
})
59-
.finally(() => page.close());
52+
const scrapedOutput = await timeout(page.evaluate(spatialParser), 2000)
53+
.then(({ elements, ...parsed }) => ({
54+
...parsed,
55+
markdownTree: htmlToMarkdownTree(parsed.title, elements, maxCharsPerElem),
56+
}))
57+
.catch((cause) => {
58+
throw Error("Parsing failed", { cause });
59+
});
60+
return scrapedOutput;
61+
} finally {
62+
page.close();
63+
}
6064
}

0 commit comments

Comments
 (0)