Skip to content

Commit 9fe4588

Browse files
3
1 parent 689eb7c commit 9fe4588

File tree

5 files changed

+57
-287
lines changed

5 files changed

+57
-287
lines changed

docker/Dockerfile

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,36 @@
1-
# Multi-stage Dockerfile that works on macOS for development and Ubuntu for GPU deployment
2-
ARG BASE_IMAGE=python:3.11-slim
1+
# CPU-only Dockerfile for macOS development and Ubuntu CPU deployment
2+
FROM python:3.11.13-slim
33

4-
# Use CUDA runtime for GPU support when building on Linux/Ubuntu
5-
# Use regular Python when building on macOS
6-
FROM ${BASE_IMAGE}
4+
# Set working directory
5+
WORKDIR /app
76

87
# Set environment variables
8+
ENV PYTHONDONTWRITEBYTECODE=1
99
ENV PYTHONUNBUFFERED=1
10-
ENV DEBIAN_FRONTEND=noninteractive
1110

1211
# Install system dependencies
1312
RUN apt-get update && apt-get install -y \
1413
build-essential \
15-
curl \
14+
wget \
15+
ca-certificates \
1616
&& rm -rf /var/lib/apt/lists/*
1717

18-
# Create a non-root user
19-
RUN useradd --create-home --shell /bin/bash app
20-
USER app
21-
WORKDIR /home/app
22-
23-
# Set up Python environment
24-
ENV PATH="/home/app/.local/bin:$PATH"
25-
2618
# Copy requirements first for better caching
27-
COPY --chown=app:app docker/requirements.txt ./requirements.txt
19+
COPY docker/requirements.txt .
2820

2921
# Install Python dependencies
30-
RUN pip install --user --no-cache-dir -r requirements.txt
31-
32-
# Pre-install spaCy models to avoid runtime downloads
33-
RUN python -c "import spacy; spacy.cli.download('en_core_web_lg')"
34-
35-
# Optionally pre-download GLiNER model (this will reduce first-run time)
36-
# Comment out if you want to download at runtime instead
37-
RUN python -c "from gliner import GLiNER; GLiNER.from_pretrained('urchade/gliner_small-v2.1')"
22+
RUN pip install --no-cache-dir -r requirements.txt
3823

39-
# Copy only necessary source files
40-
COPY --chown=app:app validator/ ./validator/
41-
COPY --chown=app:app docker/server_docker.py ./server.py
24+
# Copy application code
25+
COPY validator/ ./validator/
26+
COPY docker/server.py ./server.py
4227

43-
# Set default environment variables
44-
ENV PII_ENTITIES='["EMAIL_ADDRESS", "PHONE_NUMBER", "PERSON", "DATE_TIME"]'
45-
ENV USE_GPU=false
28+
# Pre-download and cache model files during build to avoid runtime delays
29+
# This runs the exact same guard initialization as the server to cache models
30+
RUN python -c "from validator import GuardrailsPII; from guardrails import Guard; print('Caching PII detection models...'); guard = Guard().use(GuardrailsPII(entities=['EMAIL_ADDRESS', 'PHONE_NUMBER', 'PERSON', 'DATE_TIME'], on_fail='fix', use_gpu=False)); result = guard.validate('Test email john@example.com and phone 555-123-4567'); print('Models cached successfully!')"
4631

47-
# Expose port
32+
# Expose port 8000
4833
EXPOSE 8000
4934

50-
# Run the application
35+
# Start the Flask application
5136
CMD ["python", "server.py"]

docker/Dockerfile.gpu

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,43 @@
11
# GPU-enabled Dockerfile for Ubuntu deployment with NVIDIA CUDA support
22
FROM nvidia/cuda:11.8-runtime-ubuntu22.04
33

4+
# Set working directory
5+
WORKDIR /app
6+
47
# Set environment variables
8+
ENV PYTHONDONTWRITEBYTECODE=1
59
ENV PYTHONUNBUFFERED=1
610
ENV DEBIAN_FRONTEND=noninteractive
711

8-
# Install Python and essential packages
12+
# Install Python and system dependencies
913
RUN apt-get update && apt-get install -y \
1014
python3 \
1115
python3-pip \
1216
python3-dev \
1317
build-essential \
18+
wget \
19+
ca-certificates \
1420
&& rm -rf /var/lib/apt/lists/*
1521

16-
# Create a non-root user
17-
RUN useradd --create-home --shell /bin/bash app
18-
USER app
19-
WORKDIR /home/app
20-
21-
# Set up Python environment
22-
ENV PATH="/home/app/.local/bin:$PATH"
22+
# Create symlink for python command
23+
RUN ln -s /usr/bin/python3 /usr/bin/python
2324

2425
# Copy requirements first for better caching
25-
COPY --chown=app:app docker/requirements.txt ./requirements.txt
26+
COPY docker/requirements.txt .
2627

2728
# Install Python dependencies
28-
RUN pip3 install --user --no-cache-dir -r requirements.txt
29-
30-
# Pre-install spaCy models to avoid runtime downloads
31-
RUN python3 -c "import spacy; spacy.cli.download('en_core_web_lg')"
32-
33-
# Optionally pre-download GLiNER model (this will reduce first-run time)
34-
# Comment out if you want to download at runtime instead
35-
RUN python3 -c "from gliner import GLiNER; GLiNER.from_pretrained('urchade/gliner_small-v2.1')"
29+
RUN pip install --no-cache-dir -r requirements.txt
3630

37-
# Copy only necessary source files
38-
COPY --chown=app:app validator/ ./validator/
39-
COPY --chown=app:app docker/server_docker.py ./server.py
31+
# Copy application code
32+
COPY validator/ ./validator/
33+
COPY docker/server.py ./server.py
4034

41-
# Set default environment variables for GPU
42-
ENV PII_ENTITIES='["EMAIL_ADDRESS", "PHONE_NUMBER", "PERSON", "DATE_TIME"]'
43-
ENV USE_GPU=true
35+
# Pre-download and cache model files during build to avoid runtime delays
36+
# This runs the exact same guard initialization as the server to cache models with GPU support
37+
RUN python -c "from validator import GuardrailsPII; from guardrails import Guard; print('Caching PII detection models with GPU support...'); guard = Guard().use(GuardrailsPII(entities=['EMAIL_ADDRESS', 'PHONE_NUMBER', 'PERSON', 'DATE_TIME'], on_fail='fix', use_gpu=True)); result = guard.validate('Test email john@example.com and phone 555-123-4567'); print('Models cached successfully!')"
4438

45-
# Expose port
39+
# Expose port 8000
4640
EXPOSE 8000
4741

48-
# Run the application
49-
CMD ["python3", "server.py"]
42+
# Start the Flask application
43+
CMD ["python", "server.py"]

docker/docker-README.md

Lines changed: 1 addition & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,3 @@
1-
# PII Detection Docker Container
2-
3-
This directory contains Docker configuration for the PII detection and masking service that works on both macOS (development) and Ubuntu (GPU deployment).
4-
5-
## Features
6-
7-
- **Cross-Platform**: Works on macOS for development and Ubuntu for GPU deployment
8-
- **GPU Support**: NVIDIA GPU acceleration for improved performance (Ubuntu only)
9-
- **Configurable Entities**: Dynamic PII entity types via environment variables
10-
- **Lightweight**: Optimized image size with minimal dependencies
11-
12-
## Development on macOS
13-
141
### Build and Run (CPU-only)
152

163
```bash
@@ -39,99 +26,4 @@ docker build -f docker/Dockerfile.gpu -t pii-detector-gpu:latest .
3926
docker run --gpus all -p 8000:8000 \
4027
-e USE_GPU=true \
4128
-e PII_ENTITIES='["EMAIL_ADDRESS", "PHONE_NUMBER", "PERSON", "DATE_TIME"]' \
42-
pii-detector-gpu:latest
43-
44-
# Or use docker-compose
45-
docker-compose up pii-detector-gpu
46-
```
47-
48-
## Docker Files
49-
50-
- **`Dockerfile`**: CPU-only version, works on macOS and Ubuntu
51-
- **`Dockerfile.gpu`**: GPU-enabled version for Ubuntu deployment
52-
- **`docker-compose.yml`**: Multi-service setup for different scenarios
53-
54-
## Environment Variables
55-
56-
| Variable | Description | Default | Example |
57-
|----------|-------------|---------|---------|
58-
| `USE_GPU` | Enable/disable GPU acceleration | `false` (CPU), `true` (GPU) | `false` |
59-
| `PII_ENTITIES` | JSON array of PII entity types | `["EMAIL_ADDRESS", "PHONE_NUMBER", "PERSON", "DATE_TIME"]` | `["EMAIL_ADDRESS", "PHONE_NUMBER"]` |
60-
61-
## Docker Compose Services
62-
63-
| Service | Purpose | Port | GPU |
64-
|---------|---------|------|-----|
65-
| `pii-detector-gpu` | Production with GPU | 8000 ||
66-
| `pii-detector-cpu` | Production without GPU | 8001 ||
67-
| `pii-detector-dev` | Development/testing | 8002 ||
68-
69-
## API Endpoints
70-
71-
### POST /validate
72-
Validates text for PII and returns anonymized version.
73-
74-
**Request:**
75-
```json
76-
{
77-
"text": "Contact John Doe at john.doe@example.com"
78-
}
79-
```
80-
81-
**Response:**
82-
```json
83-
{
84-
"verdict": false,
85-
"assessment": [
86-
{
87-
"piiEntity": "PERSON",
88-
"piiValue": "John Doe"
89-
},
90-
{
91-
"piiEntity": "EMAIL_ADDRESS",
92-
"piiValue": "john.doe@example.com"
93-
}
94-
],
95-
"anonymizedText": "Contact [PERSON] at [EMAIL_ADDRESS]"
96-
}
97-
```
98-
99-
## Requirements
100-
101-
### For macOS Development
102-
- Docker Desktop
103-
104-
### For Ubuntu GPU Deployment
105-
- Docker with NVIDIA Container Toolkit
106-
- NVIDIA GPU with CUDA 11.8+
107-
108-
## Supported PII Entities
109-
110-
Common entity types include:
111-
- `EMAIL_ADDRESS`
112-
- `PHONE_NUMBER`
113-
- `PERSON`
114-
- `DATE_TIME`
115-
- `CREDIT_CARD`
116-
- `SSN`
117-
- `IBAN_CODE`
118-
- `IP_ADDRESS`
119-
- `LOCATION`
120-
- `ORGANIZATION`
121-
122-
## Quick Commands
123-
124-
```bash
125-
# Development on macOS
126-
docker build -f docker/Dockerfile -t pii-detector:latest .
127-
docker run -p 8000:8000 -e USE_GPU=false pii-detector:latest
128-
129-
# Production on Ubuntu
130-
docker build -f docker/Dockerfile.gpu -t pii-detector-gpu:latest .
131-
docker run --gpus all -p 8000:8000 -e USE_GPU=true pii-detector-gpu:latest
132-
133-
# Using docker-compose
134-
docker-compose up pii-detector-cpu # CPU only
135-
docker-compose up pii-detector-gpu # GPU enabled
136-
docker-compose up pii-detector-dev # Development
137-
```
29+
pii-detector-gpu:latest

docker/docker-compose.yml

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,34 @@
11
version: '3.8'
22

33
services:
4-
# For GPU deployment on Ubuntu/Linux
5-
pii-detector-gpu:
4+
# CPU-only deployment (works on macOS and Ubuntu)
5+
pii-detection-cpu:
6+
build:
7+
context: ..
8+
dockerfile: docker/Dockerfile
9+
ports:
10+
- "8000:8000"
11+
environment:
12+
- PYTHONUNBUFFERED=1
13+
restart: unless-stopped
14+
container_name: pii-detection-cpu
15+
16+
# GPU-enabled deployment (Ubuntu with NVIDIA GPU)
17+
pii-detection-gpu:
618
build:
719
context: ..
820
dockerfile: docker/Dockerfile.gpu
921
ports:
1022
- "8000:8000"
1123
environment:
12-
- USE_GPU=true
13-
- PII_ENTITIES=["EMAIL_ADDRESS", "PHONE_NUMBER", "PERSON", "DATE_TIME"]
24+
- PYTHONUNBUFFERED=1
25+
restart: unless-stopped
26+
container_name: pii-detection-gpu
27+
# Enable GPU support - requires Docker with NVIDIA Container Toolkit
1428
deploy:
1529
resources:
1630
reservations:
1731
devices:
1832
- driver: nvidia
19-
count: 1
33+
count: all
2034
capabilities: [gpu]
21-
22-
# For CPU-only deployment (works on macOS and Ubuntu)
23-
pii-detector-cpu:
24-
build:
25-
context: ..
26-
dockerfile: docker/Dockerfile
27-
ports:
28-
- "8001:8000"
29-
environment:
30-
- USE_GPU=false
31-
- PII_ENTITIES=["EMAIL_ADDRESS", "PHONE_NUMBER", "PERSON", "DATE_TIME"]
32-
33-
# Minimal configuration for development
34-
pii-detector-dev:
35-
build:
36-
context: ..
37-
dockerfile: docker/Dockerfile
38-
ports:
39-
- "8002:8000"
40-
environment:
41-
- USE_GPU=false
42-
- PII_ENTITIES=["EMAIL_ADDRESS", "PHONE_NUMBER"]

0 commit comments

Comments
 (0)