Skip to content

Production CI/CD Pipeline #43

Production CI/CD Pipeline

Production CI/CD Pipeline #43

# Production CI/CD Pipeline for Rental ML System
# Enterprise-grade automated testing, security scanning, and deployment
name: Production CI/CD Pipeline
on:
push:
branches:
- main
- release/*
tags:
- 'v*'
pull_request:
branches:
- main
- develop
schedule:
# Run security scans daily at 2 AM UTC
- cron: '0 2 * * *'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
PYTHON_VERSION: '3.11'
NODE_VERSION: '18'
DOCKER_BUILDKIT: 1
COMPOSE_DOCKER_CLI_BUILD: 1
jobs:
# ================================
# Code Quality and Security Scan
# ================================
code-quality:
name: Code Quality & Security
runs-on: ubuntu-latest
strategy:
matrix:
check: [lint, security, dependency]
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements/dev.txt
pip install -r requirements/base.txt
- name: Run linting (flake8, black, isort)
if: matrix.check == 'lint'
run: |
echo "::group::Black formatting check"
black --check --diff src/ tests/
echo "::endgroup::"
echo "::group::isort import sorting check"
isort --check-only --diff src/ tests/
echo "::endgroup::"
echo "::group::flake8 linting"
flake8 src/ tests/
echo "::endgroup::"
echo "::group::mypy type checking"
mypy src/
echo "::endgroup::"
- name: Security scanning with bandit
if: matrix.check == 'security'
run: |
echo "::group::Bandit security scan"
bandit -r src/ -f json -o bandit-report.json
bandit -r src/ -f txt
echo "::endgroup::"
- name: Dependency vulnerability scan
if: matrix.check == 'dependency'
run: |
echo "::group::Safety dependency scan"
safety check --json --output safety-report.json
safety check
echo "::endgroup::"
echo "::group::pip-audit vulnerability scan"
pip-audit --format=json --output=pip-audit-report.json
pip-audit
echo "::endgroup::"
- name: Upload security reports
if: matrix.check == 'security' || matrix.check == 'dependency'
uses: actions/upload-artifact@v3
with:
name: security-reports
path: |
bandit-report.json
safety-report.json
pip-audit-report.json
retention-days: 30
# ================================
# Unit and Integration Tests
# ================================
test:
name: Test Suite
runs-on: ubuntu-latest
strategy:
matrix:
test-type: [unit, integration, ml]
services:
postgres:
image: postgres:15
env:
POSTGRES_PASSWORD: test_password
POSTGRES_USER: test_user
POSTGRES_DB: test_db
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432
redis:
image: redis:7-alpine
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements/dev.txt
pip install -r requirements/base.txt
- name: Set up test environment
run: |
cp .env.test .env
export DATABASE_URL="postgresql://test_user:test_password@localhost:5432/test_db"
export REDIS_URL="redis://localhost:6379/0"
- name: Run database migrations
run: |
python migrations/run_migrations.py
env:
DATABASE_URL: postgresql://test_user:test_password@localhost:5432/test_db
- name: Run unit tests
if: matrix.test-type == 'unit'
run: |
pytest tests/unit/ \
--cov=src \
--cov-report=xml \
--cov-report=html \
--cov-report=term \
--junit-xml=junit-unit.xml \
-v
env:
DATABASE_URL: postgresql://test_user:test_password@localhost:5432/test_db
REDIS_URL: redis://localhost:6379/0
- name: Run integration tests
if: matrix.test-type == 'integration'
run: |
pytest tests/integration/ \
--junit-xml=junit-integration.xml \
-v
env:
DATABASE_URL: postgresql://test_user:test_password@localhost:5432/test_db
REDIS_URL: redis://localhost:6379/0
- name: Run ML tests
if: matrix.test-type == 'ml'
run: |
pytest tests/unit/test_infrastructure/ \
-k "test_ml" \
--junit-xml=junit-ml.xml \
-v
- name: Upload test results
uses: actions/upload-artifact@v3
if: always()
with:
name: test-results-${{ matrix.test-type }}
path: |
junit-*.xml
htmlcov/
.coverage
retention-days: 30
- name: Upload coverage to Codecov
if: matrix.test-type == 'unit'
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
flags: unittests
name: codecov-umbrella
# ================================
# Performance Testing
# ================================
performance-test:
name: Performance Testing
runs-on: ubuntu-latest
needs: [test]
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build test image
uses: docker/build-push-action@v5
with:
context: .
file: ./docker/Dockerfile.api
target: production
tags: rental-ml-api:test
load: true
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Start test environment
run: |
docker-compose -f docker-compose.test.yml up -d
sleep 30
- name: Install k6
run: |
sudo gpg -k
sudo gpg --no-default-keyring --keyring /usr/share/keyrings/k6-archive-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys C5AD17C747E3415A3642D57D77C6C491D6AC1D69
echo "deb [signed-by=/usr/share/keyrings/k6-archive-keyring.gpg] https://dl.k6.io/deb stable main" | sudo tee /etc/apt/sources.list.d/k6.list
sudo apt-get update
sudo apt-get install k6
- name: Run load tests
run: |
k6 run tests/performance/load-test.js \
--out json=load-test-results.json \
--env BASE_URL=http://localhost:8000
- name: Run stress tests
run: |
k6 run tests/performance/stress-test.js \
--out json=stress-test-results.json \
--env BASE_URL=http://localhost:8000
- name: Upload performance results
uses: actions/upload-artifact@v3
with:
name: performance-results
path: |
load-test-results.json
stress-test-results.json
retention-days: 30
- name: Cleanup
if: always()
run: docker-compose -f docker-compose.test.yml down -v
# ================================
# Container Security Scanning
# ================================
container-security:
name: Container Security Scan
runs-on: ubuntu-latest
strategy:
matrix:
image: [api, ml-training, scraping]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build image for scanning
uses: docker/build-push-action@v5
with:
context: .
file: ./docker/Dockerfile.${{ matrix.image }}
target: production
tags: rental-ml-${{ matrix.image }}:scan
load: true
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
image-ref: 'rental-ml-${{ matrix.image }}:scan'
format: 'sarif'
output: 'trivy-${{ matrix.image }}.sarif'
- name: Upload Trivy scan results
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: 'trivy-${{ matrix.image }}.sarif'
category: 'trivy-${{ matrix.image }}'
- name: Run Hadolint Dockerfile linter
uses: hadolint/hadolint-action@v3.1.0
with:
dockerfile: ./docker/Dockerfile.${{ matrix.image }}
format: sarif
output-file: hadolint-${{ matrix.image }}.sarif
- name: Upload Hadolint results
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: hadolint-${{ matrix.image }}.sarif
category: 'hadolint-${{ matrix.image }}'
# ================================
# Build and Push Container Images
# ================================
build-images:
name: Build & Push Images
runs-on: ubuntu-latest
needs: [code-quality, test, container-security]
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/'))
strategy:
matrix:
image: [api, ml-training, scraping]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-${{ matrix.image }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha,prefix={{branch}}-
type=raw,value=latest,enable={{is_default_branch}}
- name: Build and push image
uses: docker/build-push-action@v5
with:
context: .
file: ./docker/Dockerfile.${{ matrix.image }}
target: production
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
platforms: linux/amd64,linux/arm64
# ================================
# Deploy to Staging
# ================================
deploy-staging:
name: Deploy to Staging
runs-on: ubuntu-latest
needs: [build-images, performance-test]
if: github.ref == 'refs/heads/main'
environment:
name: staging
url: https://staging.rental-ml.com
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Configure kubectl
run: |
echo "${{ secrets.STAGING_KUBECONFIG }}" | base64 -d > kubeconfig
export KUBECONFIG=kubeconfig
- name: Deploy to staging
run: |
# Update image tags in Kustomization
cd k8s/overlays/staging
kustomize edit set image \
ghcr.io/${{ env.IMAGE_NAME }}-api:${{ github.sha }} \
ghcr.io/${{ env.IMAGE_NAME }}-ml-training:${{ github.sha }} \
ghcr.io/${{ env.IMAGE_NAME }}-scraping:${{ github.sha }}
# Apply manifests
kubectl apply -k . --wait=true
# Wait for rollout
kubectl rollout status deployment/rental-ml-api -n rental-ml-staging --timeout=600s
kubectl rollout status deployment/rental-ml-worker -n rental-ml-staging --timeout=600s
- name: Run smoke tests
run: |
# Wait for services to be ready
sleep 60
# Run basic health checks
curl -f https://staging.rental-ml.com/health
curl -f https://staging.rental-ml.com/api/v1/health
- name: Run E2E tests
run: |
npm install -g @playwright/test
npx playwright test tests/e2e/ --config=playwright.staging.config.js
# ================================
# Deploy to Production
# ================================
deploy-production:
name: Deploy to Production
runs-on: ubuntu-latest
needs: [deploy-staging]
if: startsWith(github.ref, 'refs/tags/v')
environment:
name: production
url: https://api.rental-ml.com
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Configure kubectl
run: |
echo "${{ secrets.PRODUCTION_KUBECONFIG }}" | base64 -d > kubeconfig
export KUBECONFIG=kubeconfig
- name: Pre-deployment checks
run: |
# Check cluster health
kubectl get nodes
kubectl get pods -n rental-ml-prod
# Check database connectivity
kubectl exec -n rental-ml-prod statefulset/postgres-primary -- pg_isready
- name: Blue-Green Deployment
run: |
# Deploy to green environment
cd k8s/overlays/prod
# Create green deployment
sed 's/rental-ml-api/rental-ml-api-green/g' ../../base/08-app-deployment.yaml > green-deployment.yaml
# Update image tags
kustomize edit set image \
ghcr.io/${{ env.IMAGE_NAME }}-api:${{ github.ref_name }} \
ghcr.io/${{ env.IMAGE_NAME }}-ml-training:${{ github.ref_name }} \
ghcr.io/${{ env.IMAGE_NAME }}-scraping:${{ github.ref_name }}
# Deploy green environment
kubectl apply -f green-deployment.yaml
kubectl rollout status deployment/rental-ml-api-green -n rental-ml-prod --timeout=600s
- name: Health check green deployment
run: |
# Get green service endpoint
GREEN_IP=$(kubectl get service rental-ml-api-green-service -n rental-ml-prod -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
# Health check
curl -f http://$GREEN_IP:8000/health
curl -f http://$GREEN_IP:8000/api/v1/health
- name: Switch traffic to green
run: |
# Update service selector to point to green deployment
kubectl patch service rental-ml-api-service -n rental-ml-prod -p '{"spec":{"selector":{"version":"green"}}}'
# Wait for traffic switch
sleep 30
- name: Final health check
run: |
# Production health check
curl -f https://api.rental-ml.com/health
curl -f https://api.rental-ml.com/api/v1/health
- name: Cleanup blue deployment
run: |
# Remove old blue deployment
kubectl delete deployment rental-ml-api -n rental-ml-prod --ignore-not-found=true
# Rename green to blue for next deployment
kubectl patch deployment rental-ml-api-green -n rental-ml-prod -p '{"metadata":{"name":"rental-ml-api"}}'
- name: Post-deployment monitoring
run: |
# Check metrics and logs
kubectl logs -n rental-ml-prod deployment/rental-ml-api --tail=100
# Verify all pods are running
kubectl get pods -n rental-ml-prod
# ================================
# Security Monitoring
# ================================
security-monitoring:
name: Security Monitoring
runs-on: ubuntu-latest
if: github.event_name == 'schedule'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Run OWASP ZAP security scan
uses: zaproxy/action-full-scan@v0.8.0
with:
target: 'https://api.rental-ml.com'
rules_file_name: '.zap/rules.tsv'
cmd_options: '-a'
- name: Upload ZAP results
uses: actions/upload-artifact@v3
with:
name: zap-security-scan
path: report_html.html
retention-days: 30
- name: Notify security team
if: failure()
uses: 8398a7/action-slack@v3
with:
status: failure
channel: '#security-alerts'
text: 'Security scan failed for Rental ML System'
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}