From 1a84158c5388fa0e63dff793ed0153b36fbd9052 Mon Sep 17 00:00:00 2001 From: James Ritchie Date: Tue, 24 Sep 2024 13:13:12 +0100 Subject: [PATCH 1/5] WIP puppeteer examples --- docs/examples/intro.mdx | 1 + docs/examples/puppeteer.mdx | 76 +++++++++++++++++++++++++++++++++++++ docs/mint.json | 1 + 3 files changed, 78 insertions(+) create mode 100644 docs/examples/puppeteer.mdx diff --git a/docs/examples/intro.mdx b/docs/examples/intro.mdx index 7de0967a27..fcaceeecd7 100644 --- a/docs/examples/intro.mdx +++ b/docs/examples/intro.mdx @@ -11,6 +11,7 @@ description: "Learn how to use Trigger.dev with these practical task examples." | [OpenAI with retrying](/examples/open-ai-with-retrying) | Create a reusable OpenAI task with custom retry options. | | [PDF to image](/examples/pdf-to-image) | Use `MuPDF` to turn a PDF into images and save them to Cloudflare R2. | | [React to PDF](/examples/react-pdf) | Use `react-pdf` to generate a PDF and save it to Cloudflare R2. | +| [Puppeteer](/examples/puppeteer) | Use Puppeteer to generate a PDF or scrape for data. | | [Resend email sequence](/examples/resend-email-sequence) | Send a sequence of emails over several days using Resend with Trigger.dev. | | [Sharp image processing](/examples/sharp-image-processing) | Use Sharp to process an image and save it to Cloudflare R2. | | [Vercel AI SDK](/examples/vercel-ai-sdk) | Use Vercel AI SDK to generate text using OpenAI. | diff --git a/docs/examples/puppeteer.mdx b/docs/examples/puppeteer.mdx new file mode 100644 index 0000000000..cb493f0c92 --- /dev/null +++ b/docs/examples/puppeteer.mdx @@ -0,0 +1,76 @@ +--- +title: "Puppeteer" +sidebarTitle: "Puppeteer" +description: "These examples demonstrate how to use Puppeteer with Trigger.dev." +--- + +import LocalDevelopment from "/snippets/local-development-extensions.mdx"; + +## Overview + +There are 2 example tasks to follow on this page: + +1. [Generate a PDF](/examples/puppeteer#generate-a-pdf) +2. [Scrape data from a website](/examples/puppeteer#scrape-data-from-a-website) + + + **WEB SCRAPING WARNING:** Direct scraping of third-party websites without explicit permission using Trigger.dev Cloud is strictly prohibited and will result in immediate account suspension. If web scraping is necessary for your project, you MUST use a proxy service to comply with our terms of service. + + +## Adding build configurations + +To use all examples on this page, you'll first need to add these build settings to your `trigger.config.ts` file: + +```ts trigger.config.ts +import { defineConfig } from "@trigger.dev/sdk/v3"; + +export default defineConfig({ + project: "", + // Your other config settings... + build: { + // This is required to use the Puppeteer library + external: ["puppeteer"], + }, +}); +``` + + + Any packages that install or build a native binary should be added to external, as native binaries + cannot be bundled. + + +## Generate a PDF from a web page + +```ts trigger/generate-pdf.ts +code here +``` + +### Testing your task + +To test this task in the dashboard, you can use the following payload: + +```json +{ + "imageUrl": "", // Replace with a URL to a JPEG image + "watermarkUrl": "" // Replace with a URL to a PNG watermark image +} +``` + +## Scrape data from a website + +```ts trigger/scrape-website.ts +code here +``` + +### Testing your task + +To test this task in the dashboard, you can use the following payload: + +```json +{ + "imageUrl": "", // Replace with a URL to a JPEG image + "watermarkUrl": "" // Replace with a URL to a PNG watermark image +} +``` + + \ No newline at end of file diff --git a/docs/mint.json b/docs/mint.json index be73ca4cdc..8a1f0fb9ce 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -282,6 +282,7 @@ "examples/ffmpeg-video-processing", "examples/open-ai-with-retrying", "examples/pdf-to-image", + "examples/puppeteer", "examples/sharp-image-processing", "examples/react-pdf", "examples/resend-email-sequence", From 1d8502e8ad5442b73193c760b991f8ddfc1a91f9 Mon Sep 17 00:00:00 2001 From: James Ritchie Date: Tue, 24 Sep 2024 16:21:55 +0100 Subject: [PATCH 2/5] 2 puppeteer examples --- docs/examples/puppeteer.mdx | 134 ++++++++++++++++++++----- docs/snippets/web-scraping-warning.mdx | 3 + 2 files changed, 113 insertions(+), 24 deletions(-) create mode 100644 docs/snippets/web-scraping-warning.mdx diff --git a/docs/examples/puppeteer.mdx b/docs/examples/puppeteer.mdx index cb493f0c92..27f06d9723 100644 --- a/docs/examples/puppeteer.mdx +++ b/docs/examples/puppeteer.mdx @@ -5,17 +5,17 @@ description: "These examples demonstrate how to use Puppeteer with Trigger.dev." --- import LocalDevelopment from "/snippets/local-development-extensions.mdx"; +import ScrapingWarning from "/snippets/web-scraping-warning.mdx"; ## Overview There are 2 example tasks to follow on this page: -1. [Generate a PDF](/examples/puppeteer#generate-a-pdf) -2. [Scrape data from a website](/examples/puppeteer#scrape-data-from-a-website) +1. [Basic example](/examples/puppeteer#basic-example) +2. [Generate a PDF from a web page](/examples/puppeteer#generate-a-pdf-from-a-web-page) +3. [Scrape data from a website](/examples/puppeteer#scrape-data-from-a-website) - - **WEB SCRAPING WARNING:** Direct scraping of third-party websites without explicit permission using Trigger.dev Cloud is strictly prohibited and will result in immediate account suspension. If web scraping is necessary for your project, you MUST use a proxy service to comply with our terms of service. - + ## Adding build configurations @@ -35,42 +35,128 @@ export default defineConfig({ ``` - Any packages that install or build a native binary should be added to external, as native binaries - cannot be bundled. + Ensure you use `puppeteer` not `puppeteer-core` in your build configuration. -## Generate a PDF from a web page +## Set an environment variable -```ts trigger/generate-pdf.ts -code here +Add the following environment variable in your Trigger.dev dashboard on the Environment Variables page: + +```bash +PUPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable", +``` + +## Basic example + +### Overview + +In this example we use Puppeteer to log out the title of a web page, in this case Google. + +### Task code + +```ts trigger/puppeteer-basic-example.ts +import { logger, task } from "@trigger.dev/sdk/v3"; +import puppeteer from "puppeteer"; + +export const puppeteerTask = task({ + id: "puppeteer-log-title", + run: async () => { + const browser = await puppeteer.launch(); + const page = await browser.newPage(); + + await page.goto("https://google.com"); + + const content = await page.title(); + logger.info("Content", { content }); + + await browser.close(); + }, +}); ``` ### Testing your task -To test this task in the dashboard, you can use the following payload: +There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. + +## Generate a PDF from a web page + +### Overview + +In this example we use Puppeteer to generate a PDF from a web page and upload it to Cloudflare R2. + +### Task code + +```ts trigger/puppeteer-generate-pdf.ts +import { logger, task } from "@trigger.dev/sdk/v3"; +import puppeteer from "puppeteer"; +import { PutObjectCommand, S3Client } from "@aws-sdk/client-s3"; + +// Initialize S3 client +const s3Client = new S3Client({ + region: "auto", + endpoint: process.env.S3_ENDPOINT, + credentials: { + accessKeyId: process.env.R2_ACCESS_KEY_ID ?? "", + secretAccessKey: process.env.R2_SECRET_ACCESS_KEY ?? "", + }, +}); + +export const puppeteerWebpageToPDF = task({ + id: "puppeteer-webpage-to-pdf", + run: async () => { + const browser = await puppeteer.launch(); + const page = await browser.newPage(); + const response = await page.goto("https://google.com"); + const url = response?.url() ?? "No URL found"; + + // Generate PDF from the webpage + const generatePdf = await page.pdf(); + + logger.info("PDF generated from URL", { url }); + + await browser.close(); + + // Upload to R2 + const s3Key = `pdfs/test.pdf`; + const uploadParams = { + Bucket: process.env.S3_BUCKET, + Key: s3Key, + Body: generatePdf, + ContentType: "application/pdf", + }; + + logger.log("Uploading to R2 with params", uploadParams); + + // Upload the PDF to R2 and return the URL. + await s3Client.send(new PutObjectCommand(uploadParams)); + const s3Url = `https://${process.env.S3_BUCKET}.s3.amazonaws.com/${s3Key}`; + logger.log("PDF uploaded to R2", { url: s3Url }); + return { pdfUrl: s3Url }; + }, +}); -```json -{ - "imageUrl": "", // Replace with a URL to a JPEG image - "watermarkUrl": "" // Replace with a URL to a PNG watermark image -} ``` +### Testing your task + +There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. + ## Scrape data from a website +### Overview + +In this example we use Puppeteer with a proxy to scrape the content from a webpage and log it out. + + + +### Task code + ```ts trigger/scrape-website.ts code here ``` ### Testing your task -To test this task in the dashboard, you can use the following payload: - -```json -{ - "imageUrl": "", // Replace with a URL to a JPEG image - "watermarkUrl": "" // Replace with a URL to a PNG watermark image -} -``` +There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. \ No newline at end of file diff --git a/docs/snippets/web-scraping-warning.mdx b/docs/snippets/web-scraping-warning.mdx new file mode 100644 index 0000000000..0e979ede0d --- /dev/null +++ b/docs/snippets/web-scraping-warning.mdx @@ -0,0 +1,3 @@ + + **WEB SCRAPING WARNING:** Direct scraping of third-party websites without explicit permission using Trigger.dev Cloud is strictly prohibited and will result in immediate account suspension. If web scraping is necessary for your project, you MUST use a proxy service to comply with our terms of service. + \ No newline at end of file From c66f98dd0a9a6cb3ebe42a1f171047de134411ba Mon Sep 17 00:00:00 2001 From: James Ritchie Date: Wed, 25 Sep 2024 11:34:34 +0100 Subject: [PATCH 3/5] Added another puppeteer example --- docs/config/config-file.mdx | 31 ++++++++++++++- docs/examples/intro.mdx | 2 +- docs/examples/puppeteer.mdx | 77 ++++++++++++++++++++++++++++++------- 3 files changed, 95 insertions(+), 15 deletions(-) diff --git a/docs/config/config-file.mdx b/docs/config/config-file.mdx index 593e8fd9ab..565e159c62 100644 --- a/docs/config/config-file.mdx +++ b/docs/config/config-file.mdx @@ -4,6 +4,7 @@ sidebarTitle: "Configuration" description: "This file is used to configure your project and how it's built." --- +import ScrapingWarning from "/snippets/web-scraping-warning.mdx"; import BundlePackages from "/snippets/bundle-packages.mdx"; The `trigger.config.ts` file is used to configure your Trigger.dev project. It is a TypeScript file at the root of your project that exports a default configuration object. Here's an example: @@ -473,6 +474,34 @@ export default defineConfig({ }); ``` +#### puppeteer + + + +To use Puppeteer in your project, add these build settings to your `trigger.config.ts` file: + +```ts trigger.config.ts +import { defineConfig } from "@trigger.dev/sdk/v3"; + +export default defineConfig({ + project: "", + // Your other config settings... + build: { + extensions: [puppeteer()], + }, +}); +``` + +And add the following environment variable in your Trigger.dev dashboard on the Environment Variables page: + +```bash +PUPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable", +``` + + + Ensure you use `puppeteer` not `puppeteer-core` in your build configuration. + + #### ffmpeg You can add the `ffmpeg` build extension to your build process: @@ -482,7 +511,7 @@ import { defineConfig } from "@trigger.dev/sdk/v3"; import { ffmpeg } from "@trigger.dev/build/extensions/core"; export default defineConfig({ - //..other stuff + // Your other config settings... build: { extensions: [ffmpeg()], }, diff --git a/docs/examples/intro.mdx b/docs/examples/intro.mdx index fcaceeecd7..2209865aaf 100644 --- a/docs/examples/intro.mdx +++ b/docs/examples/intro.mdx @@ -11,7 +11,7 @@ description: "Learn how to use Trigger.dev with these practical task examples." | [OpenAI with retrying](/examples/open-ai-with-retrying) | Create a reusable OpenAI task with custom retry options. | | [PDF to image](/examples/pdf-to-image) | Use `MuPDF` to turn a PDF into images and save them to Cloudflare R2. | | [React to PDF](/examples/react-pdf) | Use `react-pdf` to generate a PDF and save it to Cloudflare R2. | -| [Puppeteer](/examples/puppeteer) | Use Puppeteer to generate a PDF or scrape for data. | +| [Puppeteer](/examples/puppeteer) | Use Puppeteer to generate a PDF or scrape a webpage. | | [Resend email sequence](/examples/resend-email-sequence) | Send a sequence of emails over several days using Resend with Trigger.dev. | | [Sharp image processing](/examples/sharp-image-processing) | Use Sharp to process an image and save it to Cloudflare R2. | | [Vercel AI SDK](/examples/vercel-ai-sdk) | Use Vercel AI SDK to generate text using OpenAI. | diff --git a/docs/examples/puppeteer.mdx b/docs/examples/puppeteer.mdx index 27f06d9723..674d63704b 100644 --- a/docs/examples/puppeteer.mdx +++ b/docs/examples/puppeteer.mdx @@ -9,15 +9,15 @@ import ScrapingWarning from "/snippets/web-scraping-warning.mdx"; ## Overview -There are 2 example tasks to follow on this page: +There are 3 example tasks to follow on this page: 1. [Basic example](/examples/puppeteer#basic-example) 2. [Generate a PDF from a web page](/examples/puppeteer#generate-a-pdf-from-a-web-page) -3. [Scrape data from a website](/examples/puppeteer#scrape-data-from-a-website) +3. [Scrape content from a web page](/examples/puppeteer#scrape-data-from-a-website) -## Adding build configurations +## Build configurations To use all examples on this page, you'll first need to add these build settings to your `trigger.config.ts` file: @@ -29,15 +29,11 @@ export default defineConfig({ // Your other config settings... build: { // This is required to use the Puppeteer library - external: ["puppeteer"], + extensions: [puppeteer()], }, }); ``` - - Ensure you use `puppeteer` not `puppeteer-core` in your build configuration. - - ## Set an environment variable Add the following environment variable in your Trigger.dev dashboard on the Environment Variables page: @@ -109,7 +105,7 @@ export const puppeteerWebpageToPDF = task({ const response = await page.goto("https://google.com"); const url = response?.url() ?? "No URL found"; - // Generate PDF from the webpage + // Generate PDF from the web page const generatePdf = await page.pdf(); logger.info("PDF generated from URL", { url }); @@ -141,22 +137,77 @@ export const puppeteerWebpageToPDF = task({ There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. -## Scrape data from a website +## Scrape content from a web page ### Overview -In this example we use Puppeteer with a proxy to scrape the content from a webpage and log it out. +In this example we use Puppeteer with a BrowserBase proxy to scrape the GitHub stars count from the [Trigger.dev](https://trigger.dev) landing page and log it out. ### Task code ```ts trigger/scrape-website.ts -code here +import { logger, task } from "@trigger.dev/sdk/v3"; +import puppeteer from "puppeteer-core"; + +export const puppeteerScrapeWithProxy = task({ + id: "puppeteer-scrape-with-proxy", + run: async () => { + const browser = await puppeteer.connect({ + browserWSEndpoint: `wss://connect.browserbase.com?apiKey=${process.env.BROWSERBASE_API_KEY}`, + }); + + const page = await browser.newPage(); + + // Set up BrowserBase proxy authentication + await page.authenticate({ + username: "api", + password: process.env.BROWSERBASE_API_KEY || "", + }); + + try { + // Navigate to the target website + await page.goto("https://trigger.dev", { waitUntil: "networkidle0" }); + + // Scrape the GitHub stars count + const starCount = await page.evaluate(() => { + const starElement = document.querySelector(".github-star-count"); + const text = starElement?.textContent ?? "0"; + const numberText = text.replace(/[^0-9]/g, ""); + return parseInt(numberText); + }); + + logger.info("GitHub star count", { starCount }); + + return { starCount }; + } catch (error) { + logger.error("Error during scraping", { + error: error instanceof Error ? error.message : String(error), + }); + throw error; + } finally { + await browser.close(); + } + }, +}); ``` ### Testing your task There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. - \ No newline at end of file + + +## Proxying + +If you're using Trigger.dev Cloud and Puppeteer or any other tool to scrape content from websites you don't own, you'll need to proxy your requests. **If you don't you'll risk getting our IP address blocked and we will ban you from our service.** + +Here are a list of proxy services we recommend: + +- [Browserbase](https://www.browserbase.com/) +- [Brightdata](https://brightdata.com/) +- [Browserless](https://browserless.io/) +- [Oxylabs](https://oxylabs.io/) +- [ScrapingBee](https://scrapingbee.com/) +- [Smartproxy](https://smartproxy.com/) \ No newline at end of file From 6a800d5466a0bfdb0c75120726bb02f37b4b2890 Mon Sep 17 00:00:00 2001 From: James Ritchie Date: Wed, 25 Sep 2024 13:40:26 +0100 Subject: [PATCH 4/5] Copy improvements --- docs/config/config-file.mdx | 6 +++--- docs/examples/puppeteer.mdx | 28 +++++++++++++++----------- docs/snippets/web-scraping-warning.mdx | 2 +- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/docs/config/config-file.mdx b/docs/config/config-file.mdx index 565e159c62..3546a42b3d 100644 --- a/docs/config/config-file.mdx +++ b/docs/config/config-file.mdx @@ -498,9 +498,7 @@ And add the following environment variable in your Trigger.dev dashboard on the PUPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable", ``` - - Ensure you use `puppeteer` not `puppeteer-core` in your build configuration. - +Follow [this example](/examples/puppeteer) to get setup with Trigger.dev and Puppeteer in your project. #### ffmpeg @@ -534,6 +532,8 @@ export default defineConfig({ This extension will also add the `FFMPEG_PATH` and `FFPROBE_PATH` to your environment variables, making it easy to use popular ffmpeg libraries like `fluent-ffmpeg`. +Follow [this example](/examples/ffmpeg-video-processing) to get setup with Trigger.dev and FFmpeg in your project. + #### esbuild plugins You can easily add existing or custom esbuild plugins to your build process using the `esbuildPlugin` extension: diff --git a/docs/examples/puppeteer.mdx b/docs/examples/puppeteer.mdx index 674d63704b..8f003b0d95 100644 --- a/docs/examples/puppeteer.mdx +++ b/docs/examples/puppeteer.mdx @@ -13,9 +13,9 @@ There are 3 example tasks to follow on this page: 1. [Basic example](/examples/puppeteer#basic-example) 2. [Generate a PDF from a web page](/examples/puppeteer#generate-a-pdf-from-a-web-page) -3. [Scrape content from a web page](/examples/puppeteer#scrape-data-from-a-website) +3. [Scrape content from a web page](/examples/puppeteer#scrape-content-from-a-web-page) - + ## Build configurations @@ -34,9 +34,11 @@ export default defineConfig({ }); ``` +Learn more about [build configurations](/config/config-file#build-configuration) including setting default retry settings, customizing the build environment, and more. + ## Set an environment variable -Add the following environment variable in your Trigger.dev dashboard on the Environment Variables page: +Set the following environment variable in your [Trigger.dev dashboard](/deploy-environment-variables) or [using the SDK](/deploy-environment-variables#in-your-code): ```bash PUPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable", @@ -46,7 +48,7 @@ PUPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable", ### Overview -In this example we use Puppeteer to log out the title of a web page, in this case Google. +In this example we use [Puppeteer](https://pptr.dev/) to log out the title of a web page, in this case from the [Trigger.dev](https://trigger.dev) landing page. ### Task code @@ -60,7 +62,7 @@ export const puppeteerTask = task({ const browser = await puppeteer.launch(); const page = await browser.newPage(); - await page.goto("https://google.com"); + await page.goto("https://trigger.dev"); const content = await page.title(); logger.info("Content", { content }); @@ -72,13 +74,13 @@ export const puppeteerTask = task({ ### Testing your task -There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. +There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. Learn more about testing tasks [here](/run-tests). ## Generate a PDF from a web page ### Overview -In this example we use Puppeteer to generate a PDF from a web page and upload it to Cloudflare R2. +In this example we use [Puppeteer](https://pptr.dev/) to generate a PDF from the [Trigger.dev](https://trigger.dev) landing page and upload it to [Cloudflare R2](https://developers.cloudflare.com/r2/). ### Task code @@ -102,7 +104,7 @@ export const puppeteerWebpageToPDF = task({ run: async () => { const browser = await puppeteer.launch(); const page = await browser.newPage(); - const response = await page.goto("https://google.com"); + const response = await page.goto("https://trigger.dev"); const url = response?.url() ?? "No URL found"; // Generate PDF from the web page @@ -135,15 +137,17 @@ export const puppeteerWebpageToPDF = task({ ### Testing your task -There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. +There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. Learn more about testing tasks [here](/run-tests). ## Scrape content from a web page ### Overview -In this example we use Puppeteer with a BrowserBase proxy to scrape the GitHub stars count from the [Trigger.dev](https://trigger.dev) landing page and log it out. +In this example we use [Puppeteer](https://pptr.dev/) with a [BrowserBase](https://www.browserbase.com/) proxy to scrape the GitHub stars count from the [Trigger.dev](https://trigger.dev) landing page and log it out. See [this list](/examples/puppeteer#proxying) for more proxying services we recommend. - + + **WEB SCRAPING:** When web scraping, you MUST use a proxy to comply with our terms of service. Direct scraping of third-party websites without the site owner's permission using Trigger.dev Cloud is prohibited and will result in account suspension. + ### Task code @@ -195,7 +199,7 @@ export const puppeteerScrapeWithProxy = task({ ### Testing your task -There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. +There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. Learn more about testing tasks [here](/run-tests). diff --git a/docs/snippets/web-scraping-warning.mdx b/docs/snippets/web-scraping-warning.mdx index 0e979ede0d..651d69da22 100644 --- a/docs/snippets/web-scraping-warning.mdx +++ b/docs/snippets/web-scraping-warning.mdx @@ -1,3 +1,3 @@ - **WEB SCRAPING WARNING:** Direct scraping of third-party websites without explicit permission using Trigger.dev Cloud is strictly prohibited and will result in immediate account suspension. If web scraping is necessary for your project, you MUST use a proxy service to comply with our terms of service. + **WEB SCRAPING:** When web scraping, you MUST use a proxy to comply with our terms of service. Direct scraping of third-party websites without the site owner's permission using Trigger.dev Cloud is prohibited and will result in account suspension. See [this example](/examples/puppeteer#scrape-content-from-a-web-page) using a proxy. \ No newline at end of file From 25255a141ce90370c2b381cace6352d442b99f27 Mon Sep 17 00:00:00 2001 From: James Ritchie Date: Wed, 25 Sep 2024 13:54:35 +0100 Subject: [PATCH 5/5] Improved the web scraping warning --- docs/examples/puppeteer.mdx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/examples/puppeteer.mdx b/docs/examples/puppeteer.mdx index 8f003b0d95..94bd20d2ff 100644 --- a/docs/examples/puppeteer.mdx +++ b/docs/examples/puppeteer.mdx @@ -145,9 +145,9 @@ There's no payload required for this task so you can just click "Run test" from In this example we use [Puppeteer](https://pptr.dev/) with a [BrowserBase](https://www.browserbase.com/) proxy to scrape the GitHub stars count from the [Trigger.dev](https://trigger.dev) landing page and log it out. See [this list](/examples/puppeteer#proxying) for more proxying services we recommend. - - **WEB SCRAPING:** When web scraping, you MUST use a proxy to comply with our terms of service. Direct scraping of third-party websites without the site owner's permission using Trigger.dev Cloud is prohibited and will result in account suspension. - + + When web scraping, you MUST use the technique below which uses a proxy with Puppeteer. Direct scraping without using `browserWSEndpoint` is prohibited and will result in account suspension. + ### Task code