Production CI/CD Pipeline #56
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Production CI/CD Pipeline for Rental ML System | |
# Enterprise-grade automated testing, security scanning, and deployment | |
name: Production CI/CD Pipeline | |
on: | |
push: | |
branches: | |
- main | |
- release/* | |
tags: | |
- 'v*' | |
pull_request: | |
branches: | |
- main | |
- develop | |
schedule: | |
# Run security scans daily at 2 AM UTC | |
- cron: '0 2 * * *' | |
env: | |
REGISTRY: ghcr.io | |
IMAGE_NAME: ${{ github.repository }} | |
PYTHON_VERSION: '3.11' | |
NODE_VERSION: '18' | |
DOCKER_BUILDKIT: 1 | |
COMPOSE_DOCKER_CLI_BUILD: 1 | |
jobs: | |
# ================================ | |
# Code Quality and Security Scan | |
# ================================ | |
code-quality: | |
name: Code Quality & Security | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
check: [lint, security, dependency] | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
cache: 'pip' | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install -r requirements/dev.txt | |
pip install -r requirements/base.txt | |
- name: Run linting (flake8, black, isort) | |
if: matrix.check == 'lint' | |
run: | | |
echo "::group::Black formatting check" | |
black --check --diff src/ tests/ | |
echo "::endgroup::" | |
echo "::group::isort import sorting check" | |
isort --check-only --diff src/ tests/ | |
echo "::endgroup::" | |
echo "::group::flake8 linting" | |
flake8 src/ tests/ | |
echo "::endgroup::" | |
echo "::group::mypy type checking" | |
mypy src/ | |
echo "::endgroup::" | |
- name: Security scanning with bandit | |
if: matrix.check == 'security' | |
run: | | |
echo "::group::Bandit security scan" | |
bandit -r src/ -f json -o bandit-report.json | |
bandit -r src/ -f txt | |
echo "::endgroup::" | |
- name: Dependency vulnerability scan | |
if: matrix.check == 'dependency' | |
run: | | |
echo "::group::Safety dependency scan" | |
safety check --json --output safety-report.json | |
safety check | |
echo "::endgroup::" | |
echo "::group::pip-audit vulnerability scan" | |
pip-audit --format=json --output=pip-audit-report.json | |
pip-audit | |
echo "::endgroup::" | |
- name: Upload security reports | |
if: matrix.check == 'security' || matrix.check == 'dependency' | |
uses: actions/upload-artifact@v3 | |
with: | |
name: security-reports | |
path: | | |
bandit-report.json | |
safety-report.json | |
pip-audit-report.json | |
retention-days: 30 | |
# ================================ | |
# Unit and Integration Tests | |
# ================================ | |
test: | |
name: Test Suite | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
test-type: [unit, integration, ml] | |
services: | |
postgres: | |
image: postgres:15 | |
env: | |
POSTGRES_PASSWORD: test_password | |
POSTGRES_USER: test_user | |
POSTGRES_DB: test_db | |
options: >- | |
--health-cmd pg_isready | |
--health-interval 10s | |
--health-timeout 5s | |
--health-retries 5 | |
ports: | |
- 5432:5432 | |
redis: | |
image: redis:7-alpine | |
options: >- | |
--health-cmd "redis-cli ping" | |
--health-interval 10s | |
--health-timeout 5s | |
--health-retries 5 | |
ports: | |
- 6379:6379 | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
cache: 'pip' | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install -r requirements/dev.txt | |
pip install -r requirements/base.txt | |
- name: Set up test environment | |
run: | | |
cp .env.test .env | |
export DATABASE_URL="postgresql://test_user:test_password@localhost:5432/test_db" | |
export REDIS_URL="redis://localhost:6379/0" | |
- name: Run database migrations | |
run: | | |
python migrations/run_migrations.py | |
env: | |
DATABASE_URL: postgresql://test_user:test_password@localhost:5432/test_db | |
- name: Run unit tests | |
if: matrix.test-type == 'unit' | |
run: | | |
pytest tests/unit/ \ | |
--cov=src \ | |
--cov-report=xml \ | |
--cov-report=html \ | |
--cov-report=term \ | |
--junit-xml=junit-unit.xml \ | |
-v | |
env: | |
DATABASE_URL: postgresql://test_user:test_password@localhost:5432/test_db | |
REDIS_URL: redis://localhost:6379/0 | |
- name: Run integration tests | |
if: matrix.test-type == 'integration' | |
run: | | |
pytest tests/integration/ \ | |
--junit-xml=junit-integration.xml \ | |
-v | |
env: | |
DATABASE_URL: postgresql://test_user:test_password@localhost:5432/test_db | |
REDIS_URL: redis://localhost:6379/0 | |
- name: Run ML tests | |
if: matrix.test-type == 'ml' | |
run: | | |
pytest tests/unit/test_infrastructure/ \ | |
-k "test_ml" \ | |
--junit-xml=junit-ml.xml \ | |
-v | |
- name: Upload test results | |
uses: actions/upload-artifact@v3 | |
if: always() | |
with: | |
name: test-results-${{ matrix.test-type }} | |
path: | | |
junit-*.xml | |
htmlcov/ | |
.coverage | |
retention-days: 30 | |
- name: Upload coverage to Codecov | |
if: matrix.test-type == 'unit' | |
uses: codecov/codecov-action@v3 | |
with: | |
file: ./coverage.xml | |
flags: unittests | |
name: codecov-umbrella | |
# ================================ | |
# Performance Testing | |
# ================================ | |
performance-test: | |
name: Performance Testing | |
runs-on: ubuntu-latest | |
needs: [test] | |
if: github.event_name == 'push' && github.ref == 'refs/heads/main' | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v3 | |
- name: Build test image | |
uses: docker/build-push-action@v5 | |
with: | |
context: . | |
file: ./docker/Dockerfile.api | |
target: production | |
tags: rental-ml-api:test | |
load: true | |
cache-from: type=gha | |
cache-to: type=gha,mode=max | |
- name: Start test environment | |
run: | | |
docker-compose -f docker-compose.test.yml up -d | |
sleep 30 | |
- name: Install k6 | |
run: | | |
sudo gpg -k | |
sudo gpg --no-default-keyring --keyring /usr/share/keyrings/k6-archive-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys C5AD17C747E3415A3642D57D77C6C491D6AC1D69 | |
echo "deb [signed-by=/usr/share/keyrings/k6-archive-keyring.gpg] https://dl.k6.io/deb stable main" | sudo tee /etc/apt/sources.list.d/k6.list | |
sudo apt-get update | |
sudo apt-get install k6 | |
- name: Run load tests | |
run: | | |
k6 run tests/performance/load-test.js \ | |
--out json=load-test-results.json \ | |
--env BASE_URL=http://localhost:8000 | |
- name: Run stress tests | |
run: | | |
k6 run tests/performance/stress-test.js \ | |
--out json=stress-test-results.json \ | |
--env BASE_URL=http://localhost:8000 | |
- name: Upload performance results | |
uses: actions/upload-artifact@v3 | |
with: | |
name: performance-results | |
path: | | |
load-test-results.json | |
stress-test-results.json | |
retention-days: 30 | |
- name: Cleanup | |
if: always() | |
run: docker-compose -f docker-compose.test.yml down -v | |
# ================================ | |
# Container Security Scanning | |
# ================================ | |
container-security: | |
name: Container Security Scan | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
image: [api, ml-training, scraping] | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v3 | |
- name: Build image for scanning | |
uses: docker/build-push-action@v5 | |
with: | |
context: . | |
file: ./docker/Dockerfile.${{ matrix.image }} | |
target: production | |
tags: rental-ml-${{ matrix.image }}:scan | |
load: true | |
- name: Run Trivy vulnerability scanner | |
uses: aquasecurity/trivy-action@master | |
with: | |
image-ref: 'rental-ml-${{ matrix.image }}:scan' | |
format: 'sarif' | |
output: 'trivy-${{ matrix.image }}.sarif' | |
- name: Upload Trivy scan results | |
uses: github/codeql-action/upload-sarif@v2 | |
with: | |
sarif_file: 'trivy-${{ matrix.image }}.sarif' | |
category: 'trivy-${{ matrix.image }}' | |
- name: Run Hadolint Dockerfile linter | |
uses: hadolint/hadolint-action@v3.1.0 | |
with: | |
dockerfile: ./docker/Dockerfile.${{ matrix.image }} | |
format: sarif | |
output-file: hadolint-${{ matrix.image }}.sarif | |
- name: Upload Hadolint results | |
uses: github/codeql-action/upload-sarif@v2 | |
with: | |
sarif_file: hadolint-${{ matrix.image }}.sarif | |
category: 'hadolint-${{ matrix.image }}' | |
# ================================ | |
# Build and Push Container Images | |
# ================================ | |
build-images: | |
name: Build & Push Images | |
runs-on: ubuntu-latest | |
needs: [code-quality, test, container-security] | |
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/')) | |
strategy: | |
matrix: | |
image: [api, ml-training, scraping] | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v3 | |
- name: Log in to Container Registry | |
uses: docker/login-action@v3 | |
with: | |
registry: ${{ env.REGISTRY }} | |
username: ${{ github.actor }} | |
password: ${{ secrets.GITHUB_TOKEN }} | |
- name: Extract metadata | |
id: meta | |
uses: docker/metadata-action@v5 | |
with: | |
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-${{ matrix.image }} | |
tags: | | |
type=ref,event=branch | |
type=ref,event=pr | |
type=semver,pattern={{version}} | |
type=semver,pattern={{major}}.{{minor}} | |
type=sha,prefix={{branch}}- | |
type=raw,value=latest,enable={{is_default_branch}} | |
- name: Build and push image | |
uses: docker/build-push-action@v5 | |
with: | |
context: . | |
file: ./docker/Dockerfile.${{ matrix.image }} | |
target: production | |
push: true | |
tags: ${{ steps.meta.outputs.tags }} | |
labels: ${{ steps.meta.outputs.labels }} | |
cache-from: type=gha | |
cache-to: type=gha,mode=max | |
platforms: linux/amd64,linux/arm64 | |
# ================================ | |
# Deploy to Staging | |
# ================================ | |
deploy-staging: | |
name: Deploy to Staging | |
runs-on: ubuntu-latest | |
needs: [build-images, performance-test] | |
if: github.ref == 'refs/heads/main' | |
environment: | |
name: staging | |
url: https://staging.rental-ml.com | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Configure kubectl | |
run: | | |
echo "${{ secrets.STAGING_KUBECONFIG }}" | base64 -d > kubeconfig | |
export KUBECONFIG=kubeconfig | |
- name: Deploy to staging | |
run: | | |
# Update image tags in Kustomization | |
cd k8s/overlays/staging | |
kustomize edit set image \ | |
ghcr.io/${{ env.IMAGE_NAME }}-api:${{ github.sha }} \ | |
ghcr.io/${{ env.IMAGE_NAME }}-ml-training:${{ github.sha }} \ | |
ghcr.io/${{ env.IMAGE_NAME }}-scraping:${{ github.sha }} | |
# Apply manifests | |
kubectl apply -k . --wait=true | |
# Wait for rollout | |
kubectl rollout status deployment/rental-ml-api -n rental-ml-staging --timeout=600s | |
kubectl rollout status deployment/rental-ml-worker -n rental-ml-staging --timeout=600s | |
- name: Run smoke tests | |
run: | | |
# Wait for services to be ready | |
sleep 60 | |
# Run basic health checks | |
curl -f https://staging.rental-ml.com/health | |
curl -f https://staging.rental-ml.com/api/v1/health | |
- name: Run E2E tests | |
run: | | |
npm install -g @playwright/test | |
npx playwright test tests/e2e/ --config=playwright.staging.config.js | |
# ================================ | |
# Deploy to Production | |
# ================================ | |
deploy-production: | |
name: Deploy to Production | |
runs-on: ubuntu-latest | |
needs: [deploy-staging] | |
if: startsWith(github.ref, 'refs/tags/v') | |
environment: | |
name: production | |
url: https://api.rental-ml.com | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Configure kubectl | |
run: | | |
echo "${{ secrets.PRODUCTION_KUBECONFIG }}" | base64 -d > kubeconfig | |
export KUBECONFIG=kubeconfig | |
- name: Pre-deployment checks | |
run: | | |
# Check cluster health | |
kubectl get nodes | |
kubectl get pods -n rental-ml-prod | |
# Check database connectivity | |
kubectl exec -n rental-ml-prod statefulset/postgres-primary -- pg_isready | |
- name: Blue-Green Deployment | |
run: | | |
# Deploy to green environment | |
cd k8s/overlays/prod | |
# Create green deployment | |
sed 's/rental-ml-api/rental-ml-api-green/g' ../../base/08-app-deployment.yaml > green-deployment.yaml | |
# Update image tags | |
kustomize edit set image \ | |
ghcr.io/${{ env.IMAGE_NAME }}-api:${{ github.ref_name }} \ | |
ghcr.io/${{ env.IMAGE_NAME }}-ml-training:${{ github.ref_name }} \ | |
ghcr.io/${{ env.IMAGE_NAME }}-scraping:${{ github.ref_name }} | |
# Deploy green environment | |
kubectl apply -f green-deployment.yaml | |
kubectl rollout status deployment/rental-ml-api-green -n rental-ml-prod --timeout=600s | |
- name: Health check green deployment | |
run: | | |
# Get green service endpoint | |
GREEN_IP=$(kubectl get service rental-ml-api-green-service -n rental-ml-prod -o jsonpath='{.status.loadBalancer.ingress[0].ip}') | |
# Health check | |
curl -f http://$GREEN_IP:8000/health | |
curl -f http://$GREEN_IP:8000/api/v1/health | |
- name: Switch traffic to green | |
run: | | |
# Update service selector to point to green deployment | |
kubectl patch service rental-ml-api-service -n rental-ml-prod -p '{"spec":{"selector":{"version":"green"}}}' | |
# Wait for traffic switch | |
sleep 30 | |
- name: Final health check | |
run: | | |
# Production health check | |
curl -f https://api.rental-ml.com/health | |
curl -f https://api.rental-ml.com/api/v1/health | |
- name: Cleanup blue deployment | |
run: | | |
# Remove old blue deployment | |
kubectl delete deployment rental-ml-api -n rental-ml-prod --ignore-not-found=true | |
# Rename green to blue for next deployment | |
kubectl patch deployment rental-ml-api-green -n rental-ml-prod -p '{"metadata":{"name":"rental-ml-api"}}' | |
- name: Post-deployment monitoring | |
run: | | |
# Check metrics and logs | |
kubectl logs -n rental-ml-prod deployment/rental-ml-api --tail=100 | |
# Verify all pods are running | |
kubectl get pods -n rental-ml-prod | |
# ================================ | |
# Security Monitoring | |
# ================================ | |
security-monitoring: | |
name: Security Monitoring | |
runs-on: ubuntu-latest | |
if: github.event_name == 'schedule' | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Run OWASP ZAP security scan | |
uses: zaproxy/action-full-scan@v0.8.0 | |
with: | |
target: 'https://api.rental-ml.com' | |
rules_file_name: '.zap/rules.tsv' | |
cmd_options: '-a' | |
- name: Upload ZAP results | |
uses: actions/upload-artifact@v3 | |
with: | |
name: zap-security-scan | |
path: report_html.html | |
retention-days: 30 | |
- name: Notify security team | |
if: failure() | |
uses: 8398a7/action-slack@v3 | |
with: | |
status: failure | |
channel: '#security-alerts' | |
text: 'Security scan failed for Rental ML System' | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} |