From 3b6cb800068c1e4c740c19c09a697441461d2a26 Mon Sep 17 00:00:00 2001
From: D-K-P <8297864+D-K-P@users.noreply.github.com>
Date: Tue, 18 Mar 2025 16:19:58 +0000
Subject: [PATCH 1/4] Added python image processing and updated docs.json

---
 docs/docs.json                                |   8 +-
 .../guides/python/python-image-processing.mdx | 547 ++++++++++++++++++
 2 files changed, 553 insertions(+), 2 deletions(-)
 create mode 100644 docs/guides/python/python-image-processing.mdx
diff --git a/docs/docs.json b/docs/docs.json
index 5c4c995e30..f5884a6234 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -299,6 +299,7 @@
               }
             ]
           },
+
           {
             "group": "Example projects",
             "pages": [
@@ -306,10 +307,13 @@
               "guides/example-projects/claude-thinking-chatbot",
               "guides/example-projects/realtime-fal-ai",
               "guides/example-projects/realtime-csv-importer",
-              "guides/example-projects/vercel-ai-sdk-image-generator",
-              "guides/python/python-crawl4ai"
+              "guides/example-projects/vercel-ai-sdk-image-generator"
             ]
           },
+          {
+            "group": "Python examples",
+            "pages": ["guides/python/python-image-processing", "guides/python/python-crawl4ai"]
+          },
           {
             "group": "Example tasks",
             "pages": [
diff --git a/docs/guides/python/python-image-processing.mdx b/docs/guides/python/python-image-processing.mdx
new file mode 100644
index 0000000000..83192988d6
--- /dev/null
+++ b/docs/guides/python/python-image-processing.mdx
@@ -0,0 +1,547 @@
+---
+title: "Python image processing example"
+sidebarTitle: "Python image processing"
+description: "Learn how to use Trigger.dev with Python to process images from URLs and upload them to S3."
+---
+
+import PythonLearnMore from "/snippets/python-learn-more.mdx";
+
+## Overview
+
+This demo showcases how to use Trigger.dev with Python to process an image using Pillow (PIL) from a URL and upload it to S3-compatible storage bucket.
+
+## Prerequisites
+
+- A project with [Trigger.dev initialized](/quick-start)
+- [Python](https://www.python.org/) installed on your local machine
+
+## Features
+
+- A [Trigger.dev](https://trigger.dev) task to trigger the image processing Python script, and then upload the processed image to S3-compatible storage
+- The [Trigger.dev Python build extension](https://trigger.dev/docs/config/extensions/pythonExtension) to install dependencies and run Python scripts
+- [Pillow (PIL)](https://pillow.readthedocs.io/) for powerful image processing capabilities
+- [AWS SDK v3](https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/client/s3/) for S3 uploads
+- S3-compatible storage support (AWS S3, Cloudflare R2, etc.)
+
+## GitHub repo
+
+<Card
+  title="View the project on GitHub"
+  icon="GitHub"
+  href="https://github.com/triggerdotdev/examples/tree/main/python-image-processing"
+>
+  Click here to view the full code for this project in our examples repository on GitHub. You can
+  fork it and use it as a starting point for your own project.
+</Card>
+
+## The code
+
+### Build configuration
+
+After you've initialized your project with Trigger.dev, add these build settings to your `trigger.config.ts` file:
+
+```ts trigger.config.ts
+import { pythonExtension } from "@trigger.dev/python/extension";
+import { defineConfig } from "@trigger.dev/sdk/v3";
+
+export default defineConfig({
+  runtime: "node",
+  project: "<your-project-ref>",
+  // Your other config settings...
+  build: {
+    extensions: [
+      pythonExtension({
+        // The path to your requirements.txt file
+        requirementsFile: "./requirements.txt",
+        // The path to your Python binary
+        devPythonBinaryPath: `venv/bin/python`,
+        // The paths to your Python scripts to run
+        scripts: ["src/python/**/*.py"],
+      }),
+    ],
+  },
+});
+```
+
+<Info>
+  Learn more about executing scripts in your Trigger.dev project using our Python build extension
+  [here](/config/extensions/pythonExtension).
+</Info>
+
+### Task code
+
+This task uses the `python.runScript` method to run the `image-processing.py` script with the given image URL as an argument. You can adjust the image processing parameters in the payload, with options such as height, width, quality, output format, etc.
+
+```ts src/trigger/processImage.ts
+import { schemaTask } from "@trigger.dev/sdk/v3";
+import { z } from "zod";
+import { python } from "@trigger.dev/python";
+import { promises as fs } from "fs";
+import { S3Client } from "@aws-sdk/client-s3";
+import { Upload } from "@aws-sdk/lib-storage";
+
+// Initialize S3 client
+const s3Client = new S3Client({
+  region: "auto",
+  endpoint: process.env.S3_ENDPOINT,
+  credentials: {
+    accessKeyId: process.env.S3_ACCESS_KEY_ID ?? "",
+    secretAccessKey: process.env.S3_SECRET_ACCESS_KEY ?? "",
+  },
+});
+
+// Define the input schema with Zod
+const imageProcessingSchema = z.object({
+  imageUrl: z.string().url(),
+  height: z.number().positive().optional().default(800),
+  width: z.number().positive().optional().default(600),
+  quality: z.number().min(1).max(100).optional().default(85),
+  maintainAspectRatio: z.boolean().optional().default(true),
+  outputFormat: z.enum(["jpeg", "png", "webp", "gif", "avif"]).optional().default("jpeg"),
+  brightness: z.number().optional(),
+  contrast: z.number().optional(),
+  sharpness: z.number().optional(),
+  grayscale: z.boolean().optional().default(false),
+});
+
+// Define the output schema
+const outputSchema = z.object({
+  url: z.string().url(),
+  key: z.string(),
+  format: z.string(),
+  originalSize: z.object({
+    width: z.number(),
+    height: z.number(),
+  }),
+  newSize: z.object({
+    width: z.number(),
+    height: z.number(),
+  }),
+  fileSizeBytes: z.number(),
+  exitCode: z.number(),
+});
+
+export const processImage = schemaTask({
+  id: "process-image",
+  schema: imageProcessingSchema,
+  run: async (payload, io) => {
+    const {
+      imageUrl,
+      height,
+      width,
+      quality,
+      maintainAspectRatio,
+      outputFormat,
+      brightness,
+      contrast,
+      sharpness,
+      grayscale,
+    } = payload;
+
+    try {
+      // Run the Python script
+      const result = await python.runScript("./src/python/image-processing.py", [
+        imageUrl,
+        height.toString(),
+        width.toString(),
+        quality.toString(),
+        maintainAspectRatio.toString(),
+        outputFormat,
+        brightness?.toString() || "null",
+        contrast?.toString() || "null",
+        sharpness?.toString() || "null",
+        grayscale.toString(),
+      ]);
+
+      const { outputPath, format, originalSize, newSize, fileSizeBytes } = JSON.parse(
+        result.stdout
+      );
+
+      // Read file once
+      const fileContent = await fs.readFile(outputPath);
+
+      try {
+        // Upload to S3
+        const key = `processed-images/${Date.now()}-${outputPath.split("/").pop()}`;
+        await new Upload({
+          client: s3Client,
+          params: {
+            Bucket: process.env.S3_BUCKET!,
+            Key: key,
+            Body: fileContent,
+            ContentType: `image/${format}`,
+          },
+        }).done();
+
+        return {
+          url: `${process.env.S3_PUBLIC_URL}/${key}`,
+          key,
+          format,
+          originalSize,
+          newSize,
+          fileSizeBytes,
+          exitCode: result.exitCode,
+        };
+      } finally {
+        // Always clean up the temp file
+        await fs.unlink(outputPath).catch(console.error);
+      }
+    } catch (error) {
+      throw new Error(
+        `Processing failed: ${error instanceof Error ? error.message : "Unknown error"}`
+      );
+    }
+  },
+});
+```
+
+### Add a requirements.txt file
+
+Add the following to your `requirements.txt` file. This is required in Python projects to install the dependencies.
+
+```txt requirements.txt
+# Core dependencies
+Pillow==10.2.0            # Image processing library
+python-dotenv==1.0.0      # Environment variable management
+requests==2.31.0          # HTTP requests
+numpy==1.26.3             # Numerical operations (for advanced processing)
+
+# Optional enhancements
+opencv-python==4.8.1.78   # For more advanced image processing
+```
+
+### The Python script
+
+The Python script uses Pillow (PIL) to process an image. You can see the original script in our examples repository [here](https://github.com/triggerdotdev/examples/blob/main/python-image-processing/src/python/image-processing.py).
+
+```python src/python/image-processing.py
+from PIL import Image, ImageOps, ImageEnhance
+import io
+from io import BytesIO
+import os
+from typing import Tuple, List, Dict, Optional, Union
+import logging
+import sys
+import json
+import requests
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+class ImageProcessor:
+    """Image processing utility for resizing, optimizing, and converting images."""
+
+    # Supported formats for conversion
+    SUPPORTED_FORMATS = ['JPEG', 'PNG', 'WEBP', 'GIF', 'AVIF']
+
+    @staticmethod
+    def open_image(image_data: Union[bytes, str]) -> Image.Image:
+        """Open an image from bytes or file path."""
+        try:
+            if isinstance(image_data, bytes):
+                return Image.open(io.BytesIO(image_data))
+            else:
+                return Image.open(image_data)
+        except Exception as e:
+            logger.error(f"Failed to open image: {e}")
+            raise ValueError(f"Could not open image: {e}")
+
+    @staticmethod
+    def resize_image(
+        img: Image.Image,
+        width: Optional[int] = None,
+        height: Optional[int] = None,
+        maintain_aspect_ratio: bool = True
+    ) -> Image.Image:
+        """
+        Resize an image to specified dimensions.
+
+        Args:
+            img: PIL Image object
+            width: Target width (None to auto-calculate from height)
+            height: Target height (None to auto-calculate from width)
+            maintain_aspect_ratio: Whether to maintain the original aspect ratio
+
+        Returns:
+            Resized PIL Image
+        """
+        if width is None and height is None:
+            return img  # No resize needed
+
+        original_width, original_height = img.size
+
+        if maintain_aspect_ratio:
+            if width and height:
+                # Calculate the best fit while maintaining aspect ratio
+                ratio = min(width / original_width, height / original_height)
+                new_width = int(original_width * ratio)
+                new_height = int(original_height * ratio)
+            elif width:
+                # Calculate height based on width
+                ratio = width / original_width
+                new_width = width
+                new_height = int(original_height * ratio)
+            else:
+                # Calculate width based on height
+                ratio = height / original_height
+                new_width = int(original_width * ratio)
+                new_height = height
+        else:
+            # Force exact dimensions
+            new_width = width if width else original_width
+            new_height = height if height else original_height
+
+        return img.resize((new_width, new_height), Image.LANCZOS)
+
+    @staticmethod
+    def optimize_image(
+        img: Image.Image,
+        quality: int = 85,
+        format: Optional[str] = None
+    ) -> Tuple[bytes, str]:
+        """
+        Optimize an image for web delivery.
+
+        Args:
+            img: PIL Image object
+            quality: JPEG/WebP quality (0-100)
+            format: Output format (JPEG, PNG, WEBP, etc.)
+
+        Returns:
+            Tuple of (image_bytes, format)
+        """
+        if format is None:
+            format = img.format or 'JPEG'
+
+        format = format.upper()
+        if format not in ImageProcessor.SUPPORTED_FORMATS:
+            format = 'JPEG'  # Default to JPEG if unsupported format
+
+        # Convert mode if needed
+        if format == 'JPEG' and img.mode in ('RGBA', 'P'):
+            img = img.convert('RGB')
+
+        # Save to bytes
+        buffer = io.BytesIO()
+
+        if format == 'JPEG':
+            img.save(buffer, format=format, quality=quality, optimize=True)
+        elif format == 'PNG':
+            img.save(buffer, format=format, optimize=True)
+        elif format == 'WEBP':
+            img.save(buffer, format=format, quality=quality)
+        elif format == 'AVIF':
+            img.save(buffer, format=format, quality=quality)
+        else:
+            img.save(buffer, format=format)
+
+        buffer.seek(0)
+        return buffer.getvalue(), format.lower()
+
+    @staticmethod
+    def apply_filters(
+        img: Image.Image,
+        brightness: Optional[float] = None,
+        contrast: Optional[float] = None,
+        sharpness: Optional[float] = None,
+        grayscale: bool = False
+    ) -> Image.Image:
+        """
+        Apply various filters and enhancements to an image.
+
+        Args:
+            img: PIL Image object
+            brightness: Brightness factor (0.0-2.0, 1.0 is original)
+            contrast: Contrast factor (0.0-2.0, 1.0 is original)
+            sharpness: Sharpness factor (0.0-2.0, 1.0 is original)
+            grayscale: Convert to grayscale if True
+
+        Returns:
+            Processed PIL Image
+        """
+        # Apply grayscale first if requested
+        if grayscale:
+            img = ImageOps.grayscale(img)
+            # Convert back to RGB if other filters will be applied
+            if any(x is not None for x in [brightness, contrast, sharpness]):
+                img = img.convert('RGB')
+
+        # Apply enhancements
+        if brightness is not None:
+            img = ImageEnhance.Brightness(img).enhance(brightness)
+
+        if contrast is not None:
+            img = ImageEnhance.Contrast(img).enhance(contrast)
+
+        if sharpness is not None:
+            img = ImageEnhance.Sharpness(img).enhance(sharpness)
+
+        return img
+
+    @staticmethod
+    def process_image(
+        image_data: Union[bytes, str],
+        width: Optional[int] = None,
+        height: Optional[int] = None,
+        maintain_aspect_ratio: bool = True,
+        quality: int = 85,
+        output_format: Optional[str] = None,
+        brightness: Optional[float] = None,
+        contrast: Optional[float] = None,
+        sharpness: Optional[float] = None,
+        grayscale: bool = False
+    ) -> Dict:
+        """
+        Process an image with all available options.
+
+        Args:
+            image_data: Image bytes or file path
+            width: Target width
+            height: Target height
+            maintain_aspect_ratio: Whether to maintain aspect ratio
+            quality: Output quality
+            output_format: Output format
+            brightness: Brightness adjustment
+            contrast: Contrast adjustment
+            sharpness: Sharpness adjustment
+            grayscale: Convert to grayscale
+
+        Returns:
+            Dict with processed image data and metadata
+        """
+        # Open the image
+        img = ImageProcessor.open_image(image_data)
+        original_format = img.format
+        original_size = img.size
+
+        # Apply filters
+        img = ImageProcessor.apply_filters(
+            img,
+            brightness=brightness,
+            contrast=contrast,
+            sharpness=sharpness,
+            grayscale=grayscale
+        )
+
+        # Resize if needed
+        if width or height:
+            img = ImageProcessor.resize_image(
+                img,
+                width=width,
+                height=height,
+                maintain_aspect_ratio=maintain_aspect_ratio
+            )
+
+        # Optimize and get bytes
+        processed_bytes, actual_format = ImageProcessor.optimize_image(
+            img,
+            quality=quality,
+            format=output_format
+        )
+
+        # Return result with metadata
+        return {
+            "processed_image": processed_bytes,
+            "format": actual_format,
+            "original_format": original_format,
+            "original_size": original_size,
+            "new_size": img.size,
+            "file_size_bytes": len(processed_bytes)
+        }
+
+def process_image(url, height, width, quality):
+    # Download image from URL
+    response = requests.get(url)
+    img = Image.open(BytesIO(response.content))
+
+    # Resize
+    img = img.resize((int(width), int(height)), Image.Resampling.LANCZOS)
+
+    # Save with quality setting
+    output_path = f"/tmp/processed_{width}x{height}.jpg"
+    img.save(output_path, "JPEG", quality=int(quality))
+
+    return output_path
+
+if __name__ == "__main__":
+    url = sys.argv[1]
+    height = int(sys.argv[2])
+    width = int(sys.argv[3])
+    quality = int(sys.argv[4])
+    maintain_aspect_ratio = sys.argv[5].lower() == 'true'
+    output_format = sys.argv[6]
+    brightness = float(sys.argv[7]) if sys.argv[7] != 'null' else None
+    contrast = float(sys.argv[8]) if sys.argv[8] != 'null' else None
+    sharpness = float(sys.argv[9]) if sys.argv[9] != 'null' else None
+    grayscale = sys.argv[10].lower() == 'true'
+
+    processor = ImageProcessor()
+    result = processor.process_image(
+        requests.get(url).content,
+        width=width,
+        height=height,
+        maintain_aspect_ratio=maintain_aspect_ratio,
+        quality=quality,
+        output_format=output_format,
+        brightness=brightness,
+        contrast=contrast,
+        sharpness=sharpness,
+        grayscale=grayscale
+    )
+
+    output_path = f"/tmp/processed_{width}x{height}.{result['format']}"
+    with open(output_path, 'wb') as f:
+        f.write(result['processed_image'])
+
+    print(json.dumps({
+        "outputPath": output_path,
+        "format": result['format'],
+        "originalSize": result['original_size'],
+        "newSize": result['new_size'],
+        "fileSizeBytes": result['file_size_bytes']
+    }))
+```
+
+## Testing your task
+
+1. Create a virtual environment `python -m venv venv`
+2. Activate the virtual environment, depending on your OS: On Mac/Linux: `source venv/bin/activate`, on Windows: `venv\Scripts\activate`
+3. Install the Python dependencies `pip install -r requirements.txt`
+4. Set up your S3-compatible storage credentials in your environment variables, in .env for local development, or in the Trigger.dev dashboard for production:
+   ```
+   S3_ENDPOINT=https://your-endpoint.com
+   S3_ACCESS_KEY_ID=your-access-key
+   S3_SECRET_ACCESS_KEY=your-secret-key
+   S3_BUCKET=your-bucket-name
+   S3_PUBLIC_URL=https://your-public-url.com
+   ```
+5. Copy the project ref from your [Trigger.dev dashboard](https://cloud.trigger.dev) and add it to the `trigger.config.ts` file.
+6. Run the Trigger.dev dev CLI command with `npx trigger dev@latest dev` (it may ask you to authorize the CLI if you haven't already).
+7. Test the task in the dashboard by providing a valid image URL and processing options.
+8. Deploy the task to production using the CLI command `npx trigger.dev@latest deploy`
+
+## Example Payload
+
+These are all optional parameters that can be passed to the `image-processing.py` Python script from the `processImage.ts` task.
+
+```json
+{
+  "imageUrl": "<your-image-url>",
+  "height": 1200,
+  "width": 900,
+  "quality": 90,
+  "maintainAspectRatio": true,
+  "outputFormat": "webp",
+  "brightness": 1.2,
+  "contrast": 1.1,
+  "sharpness": 1.3,
+  "grayscale": false
+}
+```
+
+## Deploying your task
+
+Deploy the task to production using the CLI command `npx trigger.dev@latest deploy`
+
+<PythonLearnMore />

From c84a10d468ddf86ff400019bf1926ef0f122c6d8 Mon Sep 17 00:00:00 2001
From: D-K-P <8297864+D-K-P@users.noreply.github.com>
Date: Tue, 18 Mar 2025 16:20:04 +0000
Subject: [PATCH 2/4] Edits to crawl4ai

---
 docs/guides/python/python-crawl4ai.mdx | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/docs/guides/python/python-crawl4ai.mdx b/docs/guides/python/python-crawl4ai.mdx
index 51d23fe75d..de60d3f301 100644
--- a/docs/guides/python/python-crawl4ai.mdx
+++ b/docs/guides/python/python-crawl4ai.mdx
@@ -7,15 +7,15 @@ description: "Learn how to use Python, Crawl4AI and Playwright to create a headl
 import ScrapingWarning from "/snippets/web-scraping-warning.mdx";
 import PythonLearnMore from "/snippets/python-learn-more.mdx";
 
+## Overview
+
+This demo showcases how to use Trigger.dev with Python to build a web crawler that uses a headless browser to navigate websites and extract content.
+
 ## Prerequisites
 
 - A project with [Trigger.dev initialized](/quick-start)
 - [Python](https://www.python.org/) installed on your local machine
 
-## Overview
-
-This demo showcases how to use Trigger.dev with Python to build a web crawler that uses a headless browser to navigate websites and extract content.
-
 ## Features
 
 - [Trigger.dev](https://trigger.dev) for background task orchestration
@@ -113,7 +113,10 @@ export function installPlaywrightChromium(): BuildExtension {
 }
 ```
 
-Learn more about the [trigger.config.ts](/config/config-file) file including setting default retry settings, customizing the build environment, and more.
+<Info>
+  Learn more about executing scripts in your Trigger.dev project using our Python build extension
+  [here](/config/extensions/pythonExtension).
+</Info>
 
 ### Task code
 

From f217fa0bd9cd62fa3ec75527b38e1e44213f7f91 Mon Sep 17 00:00:00 2001
From: D-K-P <8297864+D-K-P@users.noreply.github.com>
Date: Tue, 18 Mar 2025 16:40:43 +0000
Subject: [PATCH 3/4] Updated commands

---
 docs/guides/python/python-crawl4ai.mdx         | 6 +++---
 docs/guides/python/python-image-processing.mdx | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/guides/python/python-crawl4ai.mdx b/docs/guides/python/python-crawl4ai.mdx
index de60d3f301..c821bf6c98 100644
--- a/docs/guides/python/python-crawl4ai.mdx
+++ b/docs/guides/python/python-crawl4ai.mdx
@@ -215,14 +215,14 @@ if __name__ == "__main__":
 1. Create a virtual environment `python -m venv venv`
 2. Activate the virtual environment, depending on your OS: On Mac/Linux: `source venv/bin/activate`, on Windows: `venv\Scripts\activate`
 3. Install the Python dependencies `pip install -r requirements.txt`
-4. If you haven't already, copy your project ref from your [Trigger.dev dashboard](https://cloud.trigger.dev) and and add it to the `trigger.config.ts` file.
-5. Run the Trigger.dev dev CLI command with with `npx trigger dev@latest dev` (it may ask you to authorize the CLI if you haven't already).
+4. If you haven't already, copy your project ref from your [Trigger.dev dashboard](https://cloud.trigger.dev) and add it to the `trigger.config.ts` file.
+5. Run the Trigger.dev CLI `dev` command (it may ask you to authorize the CLI if you haven't already).
 6. Test the task in the dashboard, using a URL of your choice.
 
 <ScrapingWarning />
 
 ## Deploying your task
 
-Deploy the task to production using the CLI command `npx trigger.dev@latest deploy`
+Deploy the task to production using the Trigger.dev CLI `deploy` command.
 
 <PythonLearnMore />
diff --git a/docs/guides/python/python-image-processing.mdx b/docs/guides/python/python-image-processing.mdx
index 83192988d6..64e73ecdae 100644
--- a/docs/guides/python/python-image-processing.mdx
+++ b/docs/guides/python/python-image-processing.mdx
@@ -517,9 +517,9 @@ if __name__ == "__main__":
    S3_PUBLIC_URL=https://your-public-url.com
    ```
 5. Copy the project ref from your [Trigger.dev dashboard](https://cloud.trigger.dev) and add it to the `trigger.config.ts` file.
-6. Run the Trigger.dev dev CLI command with `npx trigger dev@latest dev` (it may ask you to authorize the CLI if you haven't already).
+6. Run the Trigger.dev CLI `dev` command (it may ask you to authorize the CLI if you haven't already).
 7. Test the task in the dashboard by providing a valid image URL and processing options.
-8. Deploy the task to production using the CLI command `npx trigger.dev@latest deploy`
+8. Deploy the task to production using the Trigger.dev CLI `deploy` command.
 
 ## Example Payload
 

From 18a089cf999bd4bc01e12e00e95245963712681d Mon Sep 17 00:00:00 2001
From: D-K-P <8297864+D-K-P@users.noreply.github.com>
Date: Tue, 18 Mar 2025 16:41:37 +0000
Subject: [PATCH 4/4] Added pdf form extractor

---
 docs/docs.json                                |  13 +-
 .../python/python-pdf-form-extractor.mdx      | 194 ++++++++++++++++++
 2 files changed, 202 insertions(+), 5 deletions(-)
 create mode 100644 docs/guides/python/python-pdf-form-extractor.mdx

diff --git a/docs/docs.json b/docs/docs.json
index f5884a6234..ba9fb7bde2 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -299,7 +299,14 @@
               }
             ]
           },
-
+          {
+            "group": "Python guides",
+            "pages": [
+              "guides/python/python-image-processing",
+              "guides/python/python-crawl4ai",
+              "guides/python/python-pdf-form-extractor"
+            ]
+          },
           {
             "group": "Example projects",
             "pages": [
@@ -310,10 +317,6 @@
               "guides/example-projects/vercel-ai-sdk-image-generator"
             ]
           },
-          {
-            "group": "Python examples",
-            "pages": ["guides/python/python-image-processing", "guides/python/python-crawl4ai"]
-          },
           {
             "group": "Example tasks",
             "pages": [
diff --git a/docs/guides/python/python-pdf-form-extractor.mdx b/docs/guides/python/python-pdf-form-extractor.mdx
new file mode 100644
index 0000000000..a62f0e8dc1
--- /dev/null
+++ b/docs/guides/python/python-pdf-form-extractor.mdx
@@ -0,0 +1,194 @@
+---
+title: "Python PDF form extractor example"
+sidebarTitle: "Python PDF form extractor"
+description: "Learn how to use Trigger.dev with Python to extract form data from PDF files."
+---
+
+import PythonLearnMore from "/snippets/python-learn-more.mdx";
+
+## Overview
+
+This demo showcases how to use Trigger.dev with Python to extract structured form data from a PDF file available at a URL.
+
+## Prerequisites
+
+- A project with [Trigger.dev initialized](/quick-start)
+- [Python](https://www.python.org/) installed on your local machine
+
+## Features
+
+- A [Trigger.dev](https://trigger.dev) task to trigger the Python script
+- [Trigger.dev Python build extension](https://trigger.dev/docs/config/extensions/pythonExtension) to install the dependencies and run the Python script
+- [PyMuPDF](https://pymupdf.readthedocs.io/en/latest/) to extract form data from PDF files
+- [Requests](https://docs.python-requests.org/en/master/) to download PDF files from URLs
+
+## GitHub repo
+
+<Card
+  title="View the project on GitHub"
+  icon="GitHub"
+  href="https://github.com/triggerdotdev/examples/edit/main/python-pdf-form-extractor/"
+>
+  Click here to view the full code for this project in our examples repository on GitHub. You can
+  fork it and use it as a starting point for your own project.
+</Card>
+
+## The code
+
+### Build configuration
+
+After you've initialized your project with Trigger.dev, add these build settings to your `trigger.config.ts` file:
+
+```ts trigger.config.ts
+import { pythonExtension } from "@trigger.dev/python/extension";
+import { defineConfig } from "@trigger.dev/sdk/v3";
+
+export default defineConfig({
+  runtime: "node",
+  project: "<your-project-ref>",
+  // Your other config settings...
+  build: {
+    extensions: [
+      pythonExtension({
+        // The path to your requirements.txt file
+        requirementsFile: "./requirements.txt",
+        // The path to your Python binary
+        devPythonBinaryPath: `venv/bin/python`,
+        // The paths to your Python scripts to run
+        scripts: ["src/python/**/*.py"],
+      }),
+    ],
+  },
+});
+```
+
+<Info>
+  Learn more about executing scripts in your Trigger.dev project using our Python build extension
+  [here](/config/extensions/pythonExtension).
+</Info>
+
+### Task code
+
+This task uses the `python.runScript` method to run the `image-processing.py` script with the given image URL as an argument. You can adjust the image processing parameters in the payload, with options such as height, width, quality, output format, etc.
+
+```ts src/trigger/pythonPdfTask.ts
+import { task } from "@trigger.dev/sdk/v3";
+import { python } from "@trigger.dev/python";
+
+export const processPdfForm = task({
+  id: "process-pdf-form",
+  run: async (payload: { pdfUrl: string }, io: any) => {
+    const { pdfUrl } = payload;
+    const args = [pdfUrl];
+
+    const result = await python.runScript("./src/python/extract-pdf-form.py", args);
+
+    // Parse the JSON output from the script
+    let formData;
+    try {
+      formData = JSON.parse(result.stdout);
+    } catch (error) {
+      throw new Error(`Failed to parse JSON output: ${result.stdout}`);
+    }
+
+    return {
+      formData,
+      stderr: result.stderr,
+      exitCode: result.exitCode,
+    };
+  },
+});
+```
+
+### Add a requirements.txt file
+
+Add the following to your `requirements.txt` file. This is required in Python projects to install the dependencies.
+
+```txt requirements.txt
+PyMuPDF==1.23.8
+requests==2.31.0
+```
+
+### The Python script
+
+The Python script uses PyMuPDF to extract form data from a PDF file. You can see the original script in our examples repository [here](https://github.com/triggerdotdev/examples/blob/main/python-pdf-form-extractor/src/python/extract-pdf-form.py).
+
+```python src/python/extract-pdf-form.py
+import fitz  # PyMuPDF
+import requests
+import os
+import json
+import sys
+from urllib.parse import urlparse
+
+def download_pdf(url):
+    """Download PDF from URL to a temporary file"""
+    response = requests.get(url)
+    response.raise_for_status()
+
+    # Get filename from URL or use default
+    filename = os.path.basename(urlparse(url).path) or "downloaded.pdf"
+    filepath = os.path.join("/tmp", filename)
+
+    with open(filepath, 'wb') as f:
+        f.write(response.content)
+    return filepath
+
+def extract_form_data(pdf_path):
+    """Extract form data from a PDF file."""
+    doc = fitz.open(pdf_path)
+    form_data = {}
+
+    for page_num, page in enumerate(doc):
+        fields = page.widgets()
+        for field in fields:
+            field_name = field.field_name or f"unnamed_field_{page_num}_{len(form_data)}"
+            field_type = field.field_type_string
+            field_value = field.field_value
+
+            # For checkboxes, convert to boolean
+            if field_type == "CheckBox":
+                field_value = field_value == "Yes"
+
+            form_data[field_name] = {
+                "type": field_type,
+                "value": field_value,
+                "page": page_num + 1
+            }
+
+    return form_data
+
+def main():
+    if len(sys.argv) < 2:
+        print(json.dumps({"error": "PDF URL is required as an argument"}), file=sys.stderr)
+        return 1
+
+    url = sys.argv[1]
+
+    try:
+        pdf_path = download_pdf(url)
+        form_data = extract_form_data(pdf_path)
+
+        # Convert to JSON for structured output
+        structured_output = json.dumps(form_data, indent=2)
+        print(structured_output)
+        return 0
+    except Exception as e:
+        print(json.dumps({"error": str(e)}), file=sys.stderr)
+        return 1
+
+if __name__ == "__main__":
+    sys.exit(main())
+```
+
+## Testing your task
+
+1. Create a virtual environment `python -m venv venv`
+2. Activate the virtual environment, depending on your OS: On Mac/Linux: `source venv/bin/activate`, on Windows: `venv\Scripts\activate`
+3. Install the Python dependencies `pip install -r requirements.txt`
+4. Copy the project ref from your [Trigger.dev dashboard](https://cloud.trigger.dev) and add it to the `trigger.config.ts` file.
+5. Run the Trigger.dev CLI `dev` command (it may ask you to authorize the CLI if you haven't already).
+6. Test the task in the dashboard by providing a valid PDF URL.
+7. Deploy the task to production using the Trigger.dev CLI `deploy` command.
+
+<PythonLearnMore />