From 1a84158c5388fa0e63dff793ed0153b36fbd9052 Mon Sep 17 00:00:00 2001
From: James Ritchie <james@jamesritchie.co.uk>
Date: Tue, 24 Sep 2024 13:13:12 +0100
Subject: [PATCH 1/5] WIP puppeteer examples

---
 docs/examples/intro.mdx     |  1 +
 docs/examples/puppeteer.mdx | 76 +++++++++++++++++++++++++++++++++++++
 docs/mint.json              |  1 +
 3 files changed, 78 insertions(+)
 create mode 100644 docs/examples/puppeteer.mdx
diff --git a/docs/examples/intro.mdx b/docs/examples/intro.mdx
index 7de0967a27..fcaceeecd7 100644
--- a/docs/examples/intro.mdx
+++ b/docs/examples/intro.mdx
@@ -11,6 +11,7 @@ description: "Learn how to use Trigger.dev with these practical task examples."
 | [OpenAI with retrying](/examples/open-ai-with-retrying)       | Create a reusable OpenAI task with custom retry options.                    |
 | [PDF to image](/examples/pdf-to-image)                        | Use `MuPDF` to turn a PDF into images and save them to Cloudflare R2.       |
 | [React to PDF](/examples/react-pdf)                           | Use `react-pdf` to generate a PDF and save it to Cloudflare R2.             |
+| [Puppeteer](/examples/puppeteer)                              | Use Puppeteer to generate a PDF or scrape for data.                         |
 | [Resend email sequence](/examples/resend-email-sequence)      | Send a sequence of emails over several days using Resend with Trigger.dev.  |
 | [Sharp image processing](/examples/sharp-image-processing)    | Use Sharp to process an image and save it to Cloudflare R2.                 |
 | [Vercel AI SDK](/examples/vercel-ai-sdk)                      | Use Vercel AI SDK to generate text using OpenAI.                            |
diff --git a/docs/examples/puppeteer.mdx b/docs/examples/puppeteer.mdx
new file mode 100644
index 0000000000..cb493f0c92
--- /dev/null
+++ b/docs/examples/puppeteer.mdx
@@ -0,0 +1,76 @@
+---
+title: "Puppeteer"
+sidebarTitle: "Puppeteer"
+description: "These examples demonstrate how to use Puppeteer with Trigger.dev."
+---
+
+import LocalDevelopment from "/snippets/local-development-extensions.mdx";
+
+## Overview
+
+There are 2 example tasks to follow on this page:
+
+1. [Generate a PDF](/examples/puppeteer#generate-a-pdf)
+2. [Scrape data from a website](/examples/puppeteer#scrape-data-from-a-website)
+
+<Warning>
+  **WEB SCRAPING WARNING:** Direct scraping of third-party websites without explicit permission using Trigger.dev Cloud is strictly prohibited and will result in immediate account suspension. If web scraping is necessary for your project, you MUST use a proxy service to comply with our terms of service.
+</Warning>
+
+## Adding build configurations
+
+To use all examples on this page, you'll first need to add these build settings to your `trigger.config.ts` file:
+
+```ts trigger.config.ts
+import { defineConfig } from "@trigger.dev/sdk/v3";
+
+export default defineConfig({
+  project: "<project ref>",
+  // Your other config settings...
+  build: {
+    // This is required to use the Puppeteer library
+    external: ["puppeteer"],
+  },
+});
+```
+
+<Note>
+  Any packages that install or build a native binary should be added to external, as native binaries
+  cannot be bundled.
+</Note>
+
+## Generate a PDF from a web page
+
+```ts trigger/generate-pdf.ts
+code here
+```
+
+### Testing your task
+
+To test this task in the dashboard, you can use the following payload:
+
+```json
+{
+  "imageUrl": "<an-image-url.jpg>", // Replace with a URL to a JPEG image
+  "watermarkUrl": "<an-image-url.png>" // Replace with a URL to a PNG watermark image
+}
+```
+
+## Scrape data from a website
+
+```ts trigger/scrape-website.ts
+code here
+```
+
+### Testing your task
+
+To test this task in the dashboard, you can use the following payload:
+
+```json
+{
+  "imageUrl": "<an-image-url.jpg>", // Replace with a URL to a JPEG image
+  "watermarkUrl": "<an-image-url.png>" // Replace with a URL to a PNG watermark image
+}
+```
+
+<LocalDevelopment packages={"the Puppeteer library"} />
\ No newline at end of file
diff --git a/docs/mint.json b/docs/mint.json
index be73ca4cdc..8a1f0fb9ce 100644
--- a/docs/mint.json
+++ b/docs/mint.json
@@ -282,6 +282,7 @@
         "examples/ffmpeg-video-processing",
         "examples/open-ai-with-retrying",
         "examples/pdf-to-image",
+        "examples/puppeteer",
         "examples/sharp-image-processing",
         "examples/react-pdf",
         "examples/resend-email-sequence",

From 1d8502e8ad5442b73193c760b991f8ddfc1a91f9 Mon Sep 17 00:00:00 2001
From: James Ritchie <james@jamesritchie.co.uk>
Date: Tue, 24 Sep 2024 16:21:55 +0100
Subject: [PATCH 2/5] 2 puppeteer examples

---
 docs/examples/puppeteer.mdx            | 134 ++++++++++++++++++++-----
 docs/snippets/web-scraping-warning.mdx |   3 +
 2 files changed, 113 insertions(+), 24 deletions(-)
 create mode 100644 docs/snippets/web-scraping-warning.mdx

diff --git a/docs/examples/puppeteer.mdx b/docs/examples/puppeteer.mdx
index cb493f0c92..27f06d9723 100644
--- a/docs/examples/puppeteer.mdx
+++ b/docs/examples/puppeteer.mdx
@@ -5,17 +5,17 @@ description: "These examples demonstrate how to use Puppeteer with Trigger.dev."
 ---
 
 import LocalDevelopment from "/snippets/local-development-extensions.mdx";
+import ScrapingWarning from "/snippets/web-scraping-warning.mdx";
 
 ## Overview
 
 There are 2 example tasks to follow on this page:
 
-1. [Generate a PDF](/examples/puppeteer#generate-a-pdf)
-2. [Scrape data from a website](/examples/puppeteer#scrape-data-from-a-website)
+1. [Basic example](/examples/puppeteer#basic-example)
+2. [Generate a PDF from a web page](/examples/puppeteer#generate-a-pdf-from-a-web-page)
+3. [Scrape data from a website](/examples/puppeteer#scrape-data-from-a-website)
 
-<Warning>
-  **WEB SCRAPING WARNING:** Direct scraping of third-party websites without explicit permission using Trigger.dev Cloud is strictly prohibited and will result in immediate account suspension. If web scraping is necessary for your project, you MUST use a proxy service to comply with our terms of service.
-</Warning>
+<ScrapingWarning />
 
 ## Adding build configurations
 
@@ -35,42 +35,128 @@ export default defineConfig({
 ```
 
 <Note>
-  Any packages that install or build a native binary should be added to external, as native binaries
-  cannot be bundled.
+  Ensure you use `puppeteer` not `puppeteer-core` in your build configuration.
 </Note>
 
-## Generate a PDF from a web page
+## Set an environment variable
 
-```ts trigger/generate-pdf.ts
-code here
+Add the following environment variable in your Trigger.dev dashboard on the Environment Variables page:
+
+```bash
+PUPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable",
+```
+
+## Basic example
+
+### Overview
+
+In this example we use Puppeteer to log out the title of a web page, in this case Google.
+
+### Task code
+
+```ts trigger/puppeteer-basic-example.ts
+import { logger, task } from "@trigger.dev/sdk/v3";
+import puppeteer from "puppeteer";
+
+export const puppeteerTask = task({
+  id: "puppeteer-log-title",
+  run: async () => {
+    const browser = await puppeteer.launch();
+    const page = await browser.newPage();
+
+    await page.goto("https://google.com");
+
+    const content = await page.title();
+    logger.info("Content", { content });
+
+    await browser.close();
+  },
+});
 ```
 
 ### Testing your task
 
-To test this task in the dashboard, you can use the following payload:
+There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard.
+
+## Generate a PDF from a web page
+
+### Overview
+
+In this example we use Puppeteer to generate a PDF from a web page and upload it to Cloudflare R2.
+
+### Task code
+
+```ts trigger/puppeteer-generate-pdf.ts
+import { logger, task } from "@trigger.dev/sdk/v3";
+import puppeteer from "puppeteer";
+import { PutObjectCommand, S3Client } from "@aws-sdk/client-s3";
+
+// Initialize S3 client
+const s3Client = new S3Client({
+  region: "auto",
+  endpoint: process.env.S3_ENDPOINT,
+  credentials: {
+    accessKeyId: process.env.R2_ACCESS_KEY_ID ?? "",
+    secretAccessKey: process.env.R2_SECRET_ACCESS_KEY ?? "",
+  },
+});
+
+export const puppeteerWebpageToPDF = task({
+  id: "puppeteer-webpage-to-pdf",
+  run: async () => {
+    const browser = await puppeteer.launch();
+    const page = await browser.newPage();
+    const response = await page.goto("https://google.com");
+    const url = response?.url() ?? "No URL found";
+
+    // Generate PDF from the webpage
+    const generatePdf = await page.pdf();
+
+    logger.info("PDF generated from URL", { url });
+
+    await browser.close();
+
+    // Upload to R2
+    const s3Key = `pdfs/test.pdf`;
+    const uploadParams = {
+      Bucket: process.env.S3_BUCKET,
+      Key: s3Key,
+      Body: generatePdf,
+      ContentType: "application/pdf",
+    };
+
+    logger.log("Uploading to R2 with params", uploadParams);
+
+    // Upload the PDF to R2 and return the URL.
+    await s3Client.send(new PutObjectCommand(uploadParams));
+    const s3Url = `https://${process.env.S3_BUCKET}.s3.amazonaws.com/${s3Key}`;
+    logger.log("PDF uploaded to R2", { url: s3Url });
+    return { pdfUrl: s3Url };
+  },
+});
 
-```json
-{
-  "imageUrl": "<an-image-url.jpg>", // Replace with a URL to a JPEG image
-  "watermarkUrl": "<an-image-url.png>" // Replace with a URL to a PNG watermark image
-}
 ```
 
+### Testing your task
+
+There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard.
+
 ## Scrape data from a website
 
+### Overview
+
+In this example we use Puppeteer with a proxy to scrape the content from a webpage and log it out.
+
+<ScrapingWarning />
+
+### Task code
+
 ```ts trigger/scrape-website.ts
 code here
 ```
 
 ### Testing your task
 
-To test this task in the dashboard, you can use the following payload:
-
-```json
-{
-  "imageUrl": "<an-image-url.jpg>", // Replace with a URL to a JPEG image
-  "watermarkUrl": "<an-image-url.png>" // Replace with a URL to a PNG watermark image
-}
-```
+There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard.
 
 <LocalDevelopment packages={"the Puppeteer library"} />
\ No newline at end of file
diff --git a/docs/snippets/web-scraping-warning.mdx b/docs/snippets/web-scraping-warning.mdx
new file mode 100644
index 0000000000..0e979ede0d
--- /dev/null
+++ b/docs/snippets/web-scraping-warning.mdx
@@ -0,0 +1,3 @@
+<Warning>
+  **WEB SCRAPING WARNING:** Direct scraping of third-party websites without explicit permission using Trigger.dev Cloud is strictly prohibited and will result in immediate account suspension. If web scraping is necessary for your project, you MUST use a proxy service to comply with our terms of service.
+</Warning>
\ No newline at end of file

From c66f98dd0a9a6cb3ebe42a1f171047de134411ba Mon Sep 17 00:00:00 2001
From: James Ritchie <james@jamesritchie.co.uk>
Date: Wed, 25 Sep 2024 11:34:34 +0100
Subject: [PATCH 3/5] Added another puppeteer example

---
 docs/config/config-file.mdx | 31 ++++++++++++++-
 docs/examples/intro.mdx     |  2 +-
 docs/examples/puppeteer.mdx | 77 ++++++++++++++++++++++++++++++-------
 3 files changed, 95 insertions(+), 15 deletions(-)

diff --git a/docs/config/config-file.mdx b/docs/config/config-file.mdx
index 593e8fd9ab..565e159c62 100644
--- a/docs/config/config-file.mdx
+++ b/docs/config/config-file.mdx
@@ -4,6 +4,7 @@ sidebarTitle: "Configuration"
 description: "This file is used to configure your project and how it's built."
 ---
 
+import ScrapingWarning from "/snippets/web-scraping-warning.mdx";
 import BundlePackages from "/snippets/bundle-packages.mdx";
 
 The `trigger.config.ts` file is used to configure your Trigger.dev project. It is a TypeScript file at the root of your project that exports a default configuration object. Here's an example:
@@ -473,6 +474,34 @@ export default defineConfig({
 });
 ```
 
+#### puppeteer
+
+<ScrapingWarning />
+
+To use Puppeteer in your project, add these build settings to your `trigger.config.ts` file:
+
+```ts trigger.config.ts
+import { defineConfig } from "@trigger.dev/sdk/v3";
+
+export default defineConfig({
+  project: "<project ref>",
+  // Your other config settings...
+  build: {
+    extensions: [puppeteer()],
+  },
+});
+```
+
+And add the following environment variable in your Trigger.dev dashboard on the Environment Variables page:
+
+```bash
+PUPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable",
+```
+
+<Note>
+  Ensure you use `puppeteer` not `puppeteer-core` in your build configuration.
+</Note>
+
 #### ffmpeg
 
 You can add the `ffmpeg` build extension to your build process:
@@ -482,7 +511,7 @@ import { defineConfig } from "@trigger.dev/sdk/v3";
 import { ffmpeg } from "@trigger.dev/build/extensions/core";
 
 export default defineConfig({
-  //..other stuff
+  // Your other config settings...
   build: {
     extensions: [ffmpeg()],
   },
diff --git a/docs/examples/intro.mdx b/docs/examples/intro.mdx
index fcaceeecd7..2209865aaf 100644
--- a/docs/examples/intro.mdx
+++ b/docs/examples/intro.mdx
@@ -11,7 +11,7 @@ description: "Learn how to use Trigger.dev with these practical task examples."
 | [OpenAI with retrying](/examples/open-ai-with-retrying)       | Create a reusable OpenAI task with custom retry options.                    |
 | [PDF to image](/examples/pdf-to-image)                        | Use `MuPDF` to turn a PDF into images and save them to Cloudflare R2.       |
 | [React to PDF](/examples/react-pdf)                           | Use `react-pdf` to generate a PDF and save it to Cloudflare R2.             |
-| [Puppeteer](/examples/puppeteer)                              | Use Puppeteer to generate a PDF or scrape for data.                         |
+| [Puppeteer](/examples/puppeteer)                              | Use Puppeteer to generate a PDF or scrape a webpage.                         |
 | [Resend email sequence](/examples/resend-email-sequence)      | Send a sequence of emails over several days using Resend with Trigger.dev.  |
 | [Sharp image processing](/examples/sharp-image-processing)    | Use Sharp to process an image and save it to Cloudflare R2.                 |
 | [Vercel AI SDK](/examples/vercel-ai-sdk)                      | Use Vercel AI SDK to generate text using OpenAI.                            |
diff --git a/docs/examples/puppeteer.mdx b/docs/examples/puppeteer.mdx
index 27f06d9723..674d63704b 100644
--- a/docs/examples/puppeteer.mdx
+++ b/docs/examples/puppeteer.mdx
@@ -9,15 +9,15 @@ import ScrapingWarning from "/snippets/web-scraping-warning.mdx";
 
 ## Overview
 
-There are 2 example tasks to follow on this page:
+There are 3 example tasks to follow on this page:
 
 1. [Basic example](/examples/puppeteer#basic-example)
 2. [Generate a PDF from a web page](/examples/puppeteer#generate-a-pdf-from-a-web-page)
-3. [Scrape data from a website](/examples/puppeteer#scrape-data-from-a-website)
+3. [Scrape content from a web page](/examples/puppeteer#scrape-data-from-a-website)
 
 <ScrapingWarning />
 
-## Adding build configurations
+## Build configurations
 
 To use all examples on this page, you'll first need to add these build settings to your `trigger.config.ts` file:
 
@@ -29,15 +29,11 @@ export default defineConfig({
   // Your other config settings...
   build: {
     // This is required to use the Puppeteer library
-    external: ["puppeteer"],
+    extensions: [puppeteer()],
   },
 });
 ```
 
-<Note>
-  Ensure you use `puppeteer` not `puppeteer-core` in your build configuration.
-</Note>
-
 ## Set an environment variable
 
 Add the following environment variable in your Trigger.dev dashboard on the Environment Variables page:
@@ -109,7 +105,7 @@ export const puppeteerWebpageToPDF = task({
     const response = await page.goto("https://google.com");
     const url = response?.url() ?? "No URL found";
 
-    // Generate PDF from the webpage
+    // Generate PDF from the web page
     const generatePdf = await page.pdf();
 
     logger.info("PDF generated from URL", { url });
@@ -141,22 +137,77 @@ export const puppeteerWebpageToPDF = task({
 
 There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard.
 
-## Scrape data from a website
+## Scrape content from a web page
 
 ### Overview
 
-In this example we use Puppeteer with a proxy to scrape the content from a webpage and log it out.
+In this example we use Puppeteer with a BrowserBase proxy to scrape the GitHub stars count from the [Trigger.dev](https://trigger.dev) landing page and log it out.
 
 <ScrapingWarning />
 
 ### Task code
 
 ```ts trigger/scrape-website.ts
-code here
+import { logger, task } from "@trigger.dev/sdk/v3";
+import puppeteer from "puppeteer-core";
+
+export const puppeteerScrapeWithProxy = task({
+  id: "puppeteer-scrape-with-proxy",
+  run: async () => {
+    const browser = await puppeteer.connect({
+      browserWSEndpoint: `wss://connect.browserbase.com?apiKey=${process.env.BROWSERBASE_API_KEY}`,
+    });
+
+    const page = await browser.newPage();
+
+    // Set up BrowserBase proxy authentication
+    await page.authenticate({
+      username: "api",
+      password: process.env.BROWSERBASE_API_KEY || "",
+    });
+
+    try {
+      // Navigate to the target website
+      await page.goto("https://trigger.dev", { waitUntil: "networkidle0" });
+
+      // Scrape the GitHub stars count
+      const starCount = await page.evaluate(() => {
+        const starElement = document.querySelector(".github-star-count");
+        const text = starElement?.textContent ?? "0";
+        const numberText = text.replace(/[^0-9]/g, "");
+        return parseInt(numberText);
+      });
+
+      logger.info("GitHub star count", { starCount });
+
+      return { starCount };
+    } catch (error) {
+      logger.error("Error during scraping", {
+        error: error instanceof Error ? error.message : String(error),
+      });
+      throw error;
+    } finally {
+      await browser.close();
+    }
+  },
+});
 ```
 
 ### Testing your task
 
 There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard.
 
-<LocalDevelopment packages={"the Puppeteer library"} />
\ No newline at end of file
+<LocalDevelopment packages={"the Puppeteer library."} />
+
+## Proxying
+
+If you're using Trigger.dev Cloud and Puppeteer or any other tool to scrape content from websites you don't own, you'll need to proxy your requests. **If you don't you'll risk getting our IP address blocked and we will ban you from our service.**
+
+Here are a list of proxy services we recommend:
+
+- [Browserbase](https://www.browserbase.com/)
+- [Brightdata](https://brightdata.com/)
+- [Browserless](https://browserless.io/)
+- [Oxylabs](https://oxylabs.io/)
+- [ScrapingBee](https://scrapingbee.com/)
+- [Smartproxy](https://smartproxy.com/)
\ No newline at end of file

From 6a800d5466a0bfdb0c75120726bb02f37b4b2890 Mon Sep 17 00:00:00 2001
From: James Ritchie <james@jamesritchie.co.uk>
Date: Wed, 25 Sep 2024 13:40:26 +0100
Subject: [PATCH 4/5] Copy improvements

---
 docs/config/config-file.mdx            |  6 +++---
 docs/examples/puppeteer.mdx            | 28 +++++++++++++++-----------
 docs/snippets/web-scraping-warning.mdx |  2 +-
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/docs/config/config-file.mdx b/docs/config/config-file.mdx
index 565e159c62..3546a42b3d 100644
--- a/docs/config/config-file.mdx
+++ b/docs/config/config-file.mdx
@@ -498,9 +498,7 @@ And add the following environment variable in your Trigger.dev dashboard on the
 PUPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable",
 ```
 
-<Note>
-  Ensure you use `puppeteer` not `puppeteer-core` in your build configuration.
-</Note>
+Follow [this example](/examples/puppeteer) to get setup with Trigger.dev and Puppeteer in your project.
 
 #### ffmpeg
 
@@ -534,6 +532,8 @@ export default defineConfig({
 
 This extension will also add the `FFMPEG_PATH` and `FFPROBE_PATH` to your environment variables, making it easy to use popular ffmpeg libraries like `fluent-ffmpeg`.
 
+Follow [this example](/examples/ffmpeg-video-processing) to get setup with Trigger.dev and FFmpeg in your project.
+
 #### esbuild plugins
 
 You can easily add existing or custom esbuild plugins to your build process using the `esbuildPlugin` extension:
diff --git a/docs/examples/puppeteer.mdx b/docs/examples/puppeteer.mdx
index 674d63704b..8f003b0d95 100644
--- a/docs/examples/puppeteer.mdx
+++ b/docs/examples/puppeteer.mdx
@@ -13,9 +13,9 @@ There are 3 example tasks to follow on this page:
 
 1. [Basic example](/examples/puppeteer#basic-example)
 2. [Generate a PDF from a web page](/examples/puppeteer#generate-a-pdf-from-a-web-page)
-3. [Scrape content from a web page](/examples/puppeteer#scrape-data-from-a-website)
+3. [Scrape content from a web page](/examples/puppeteer#scrape-content-from-a-web-page)
 
-<ScrapingWarning />
+<ScrapingWarning/>
 
 ## Build configurations
 
@@ -34,9 +34,11 @@ export default defineConfig({
 });
 ```
 
+Learn more about [build configurations](/config/config-file#build-configuration) including setting default retry settings, customizing the build environment, and more.
+
 ## Set an environment variable
 
-Add the following environment variable in your Trigger.dev dashboard on the Environment Variables page:
+Set the following environment variable in your [Trigger.dev dashboard](/deploy-environment-variables) or [using the SDK](/deploy-environment-variables#in-your-code):
 
 ```bash
 PUPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable",
@@ -46,7 +48,7 @@ PUPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable",
 
 ### Overview
 
-In this example we use Puppeteer to log out the title of a web page, in this case Google.
+In this example we use [Puppeteer](https://pptr.dev/) to log out the title of a web page, in this case from the [Trigger.dev](https://trigger.dev) landing page.
 
 ### Task code
 
@@ -60,7 +62,7 @@ export const puppeteerTask = task({
     const browser = await puppeteer.launch();
     const page = await browser.newPage();
 
-    await page.goto("https://google.com");
+    await page.goto("https://trigger.dev");
 
     const content = await page.title();
     logger.info("Content", { content });
@@ -72,13 +74,13 @@ export const puppeteerTask = task({
 
 ### Testing your task
 
-There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard.
+There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. Learn more about testing tasks [here](/run-tests).
 
 ## Generate a PDF from a web page
 
 ### Overview
 
-In this example we use Puppeteer to generate a PDF from a web page and upload it to Cloudflare R2.
+In this example we use [Puppeteer](https://pptr.dev/) to generate a PDF from the [Trigger.dev](https://trigger.dev) landing page and upload it to [Cloudflare R2](https://developers.cloudflare.com/r2/).
 
 ### Task code
 
@@ -102,7 +104,7 @@ export const puppeteerWebpageToPDF = task({
   run: async () => {
     const browser = await puppeteer.launch();
     const page = await browser.newPage();
-    const response = await page.goto("https://google.com");
+    const response = await page.goto("https://trigger.dev");
     const url = response?.url() ?? "No URL found";
 
     // Generate PDF from the web page
@@ -135,15 +137,17 @@ export const puppeteerWebpageToPDF = task({
 
 ### Testing your task
 
-There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard.
+There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. Learn more about testing tasks [here](/run-tests).
 
 ## Scrape content from a web page
 
 ### Overview
 
-In this example we use Puppeteer with a BrowserBase proxy to scrape the GitHub stars count from the [Trigger.dev](https://trigger.dev) landing page and log it out.
+In this example we use [Puppeteer](https://pptr.dev/) with a [BrowserBase](https://www.browserbase.com/) proxy to scrape the GitHub stars count from the [Trigger.dev](https://trigger.dev) landing page and log it out. See [this list](/examples/puppeteer#proxying) for more proxying services we recommend.
 
-<ScrapingWarning />
+<Warning>
+  **WEB SCRAPING:** When web scraping, you MUST use a proxy to comply with our terms of service. Direct scraping of third-party websites without the site owner's permission using Trigger.dev Cloud is prohibited and will result in account suspension.
+</Warning>
 
 ### Task code
 
@@ -195,7 +199,7 @@ export const puppeteerScrapeWithProxy = task({
 
 ### Testing your task
 
-There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard.
+There's no payload required for this task so you can just click "Run test" from the Test page in the dashboard. Learn more about testing tasks [here](/run-tests).
 
 <LocalDevelopment packages={"the Puppeteer library."} />
 
diff --git a/docs/snippets/web-scraping-warning.mdx b/docs/snippets/web-scraping-warning.mdx
index 0e979ede0d..651d69da22 100644
--- a/docs/snippets/web-scraping-warning.mdx
+++ b/docs/snippets/web-scraping-warning.mdx
@@ -1,3 +1,3 @@
 <Warning>
-  **WEB SCRAPING WARNING:** Direct scraping of third-party websites without explicit permission using Trigger.dev Cloud is strictly prohibited and will result in immediate account suspension. If web scraping is necessary for your project, you MUST use a proxy service to comply with our terms of service.
+  **WEB SCRAPING:** When web scraping, you MUST use a proxy to comply with our terms of service. Direct scraping of third-party websites without the site owner's permission using Trigger.dev Cloud is prohibited and will result in account suspension. See [this example](/examples/puppeteer#scrape-content-from-a-web-page) using a proxy.
 </Warning>
\ No newline at end of file

From 25255a141ce90370c2b381cace6352d442b99f27 Mon Sep 17 00:00:00 2001
From: James Ritchie <james@jamesritchie.co.uk>
Date: Wed, 25 Sep 2024 13:54:35 +0100
Subject: [PATCH 5/5] Improved the web scraping warning

---
 docs/examples/puppeteer.mdx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/examples/puppeteer.mdx b/docs/examples/puppeteer.mdx
index 8f003b0d95..94bd20d2ff 100644
--- a/docs/examples/puppeteer.mdx
+++ b/docs/examples/puppeteer.mdx
@@ -145,9 +145,9 @@ There's no payload required for this task so you can just click "Run test" from
 
 In this example we use [Puppeteer](https://pptr.dev/) with a [BrowserBase](https://www.browserbase.com/) proxy to scrape the GitHub stars count from the [Trigger.dev](https://trigger.dev) landing page and log it out. See [this list](/examples/puppeteer#proxying) for more proxying services we recommend.
 
-<Warning>
-  **WEB SCRAPING:** When web scraping, you MUST use a proxy to comply with our terms of service. Direct scraping of third-party websites without the site owner's permission using Trigger.dev Cloud is prohibited and will result in account suspension.
-</Warning>
+<Note>
+  When web scraping, you MUST use the technique below which uses a proxy with Puppeteer. Direct scraping without using `browserWSEndpoint` is prohibited and will result in account suspension.
+</Note>
 
 ### Task code