Skip to content

Commit 723982a

Browse files
Saghennsarrazin
andauthored
Fix Playwright leak and use multiple contexts (#1187)
* fix: playwright leak and use multiple contexts * lint --------- Co-authored-by: Nathan Sarrazin <sarrazin.nathan@gmail.com>
1 parent e988c06 commit 723982a

File tree

2 files changed

+39
-36
lines changed

2 files changed

+39
-36
lines changed
Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,35 @@
11
import {
2-
type BrowserContext,
32
chromium,
43
devices,
54
type Page,
65
type BrowserContextOptions,
76
type Response,
7+
type Browser,
88
} from "playwright";
99
import { PlaywrightBlocker } from "@cliqz/adblocker-playwright";
1010
import { env } from "$env/dynamic/private";
11+
import { logger } from "$lib/server/logger";
1112

12-
// Singleton initialized by initPlaywrightService
13-
let playwrightService: Promise<{ ctx: BrowserContext; blocker: PlaywrightBlocker }> | undefined;
14-
15-
async function initPlaywrightService() {
16-
if (playwrightService) return playwrightService;
13+
const blocker = await PlaywrightBlocker.fromPrebuiltAdsAndTracking(fetch).then((blker) => {
14+
const mostBlocked = blker.blockFonts().blockMedias().blockFrames().blockImages();
15+
if (env.WEBSEARCH_JAVASCRIPT === "false") return mostBlocked.blockScripts();
16+
return mostBlocked;
17+
});
1718

19+
let browserSingleton: Promise<Browser> | undefined;
20+
async function getBrowser() {
1821
const browser = await chromium.launch({ headless: true });
19-
2022
process.on("SIGINT", () => browser.close());
23+
browser.on("disconnected", () => {
24+
logger.warn("Browser closed");
25+
browserSingleton = undefined;
26+
});
27+
return browser;
28+
}
29+
30+
async function getPlaywrightCtx() {
31+
if (!browserSingleton) browserSingleton = getBrowser();
32+
const browser = await browserSingleton;
2133

2234
const device = devices["Desktop Chrome"];
2335
const options: BrowserContextOptions = {
@@ -36,31 +48,26 @@ async function initPlaywrightService() {
3648
timezoneId: "America/New_York",
3749
locale: "en-US",
3850
};
39-
const ctx = await browser.newContext(options);
40-
const blocker = await PlaywrightBlocker.fromPrebuiltAdsAndTracking(fetch).then((blker) => {
41-
const mostBlocked = blker.blockFonts().blockMedias().blockFrames().blockImages();
42-
if (env.WEBSEARCH_JAVASCRIPT === "false") return mostBlocked.blockScripts();
43-
return mostBlocked;
44-
});
45-
46-
// Clear the singleton when the context closes
47-
ctx.on("close", () => {
48-
playwrightService = undefined;
49-
});
50-
51-
return Object.freeze({ ctx, blocker });
51+
return browser.newContext(options);
5252
}
5353

54-
export async function loadPage(url: string): Promise<{ res?: Response; page: Page }> {
55-
if (!playwrightService) playwrightService = initPlaywrightService();
56-
const { ctx, blocker } = await playwrightService;
54+
export async function withPage<T>(
55+
url: string,
56+
callback: (page: Page, response?: Response) => Promise<T>
57+
): Promise<T> {
58+
const ctx = await getPlaywrightCtx();
5759

58-
const page = await ctx.newPage();
59-
await blocker.enableBlockingInPage(page);
60+
try {
61+
const page = await ctx.newPage();
62+
await blocker.enableBlockingInPage(page);
6063

61-
const res = await page.goto(url, { waitUntil: "load", timeout: 3500 }).catch(() => {
62-
console.warn(`Failed to load page within 2s: ${url}`);
63-
});
64+
const res = await page.goto(url, { waitUntil: "load", timeout: 3500 }).catch(() => {
65+
console.warn(`Failed to load page within 2s: ${url}`);
66+
});
6467

65-
return { res: res ?? undefined, page };
68+
// await needed here so that we don't close the context before the callback is done
69+
return await callback(page, res ?? undefined);
70+
} finally {
71+
ctx.close();
72+
}
6673
}

src/lib/server/websearch/scrape/scrape.ts

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import type { WebSearchScrapedSource, WebSearchSource } from "$lib/types/WebSearch";
22
import type { MessageWebSearchUpdate } from "$lib/types/MessageUpdate";
3-
import { loadPage } from "./playwright";
3+
import { withPage } from "./playwright";
44

55
import { spatialParser } from "./parser";
66
import { htmlToMarkdownTree } from "../markdown/tree";
@@ -30,9 +30,7 @@ export const scrape = (maxCharsPerElem: number) =>
3030
};
3131

3232
export async function scrapeUrl(url: string, maxCharsPerElem: number) {
33-
const { res, page } = await loadPage(url);
34-
35-
try {
33+
return withPage(url, async (page, res) => {
3634
if (!res) throw Error("Failed to load page");
3735

3836
// Check if it's a non-html content type that we can handle directly
@@ -66,7 +64,5 @@ export async function scrapeUrl(url: string, maxCharsPerElem: number) {
6664
throw Error("Parsing failed", { cause });
6765
});
6866
return scrapedOutput;
69-
} finally {
70-
page.close();
71-
}
67+
});
7268
}

0 commit comments

Comments
 (0)