Production CI/CD Pipeline #43

Workflow file for this run

.github/workflows/ci-cd-production.yml at 65e494a

	# Production CI/CD Pipeline for Rental ML System
	# Enterprise-grade automated testing, security scanning, and deployment

	name: Production CI/CD Pipeline

	on:
	push:
	branches:
	- main
	- release/*
	tags:
	- 'v*'
	pull_request:
	branches:
	- main
	- develop
	schedule:
	# Run security scans daily at 2 AM UTC
	- cron: '0 2 * * *'

	env:
	REGISTRY: ghcr.io
	IMAGE_NAME: ${{ github.repository }}
	PYTHON_VERSION: '3.11'
	NODE_VERSION: '18'
	DOCKER_BUILDKIT: 1
	COMPOSE_DOCKER_CLI_BUILD: 1

	jobs:
	# ================================
	# Code Quality and Security Scan
	# ================================
	code-quality:
	name: Code Quality & Security
	runs-on: ubuntu-latest
	strategy:
	matrix:
	check: [lint, security, dependency]
	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Set up Python
	uses: actions/setup-python@v4
	with:
	python-version: ${{ env.PYTHON_VERSION }}
	cache: 'pip'

	- name: Install dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install -r requirements/dev.txt
	pip install -r requirements/base.txt

	- name: Run linting (flake8, black, isort)
	if: matrix.check == 'lint'
	run: \|
	echo "::group::Black formatting check"
	black --check --diff src/ tests/
	echo "::endgroup::"

	echo "::group::isort import sorting check"
	isort --check-only --diff src/ tests/
	echo "::endgroup::"

	echo "::group::flake8 linting"
	flake8 src/ tests/
	echo "::endgroup::"

	echo "::group::mypy type checking"
	mypy src/
	echo "::endgroup::"

	- name: Security scanning with bandit
	if: matrix.check == 'security'
	run: \|
	echo "::group::Bandit security scan"
	bandit -r src/ -f json -o bandit-report.json
	bandit -r src/ -f txt
	echo "::endgroup::"

	- name: Dependency vulnerability scan
	if: matrix.check == 'dependency'
	run: \|
	echo "::group::Safety dependency scan"
	safety check --json --output safety-report.json
	safety check
	echo "::endgroup::"

	echo "::group::pip-audit vulnerability scan"
	pip-audit --format=json --output=pip-audit-report.json
	pip-audit
	echo "::endgroup::"

	- name: Upload security reports
	if: matrix.check == 'security' \|\| matrix.check == 'dependency'
	uses: actions/upload-artifact@v3
	with:
	name: security-reports
	path: \|
	bandit-report.json
	safety-report.json
	pip-audit-report.json
	retention-days: 30

	# ================================
	# Unit and Integration Tests
	# ================================
	test:
	name: Test Suite
	runs-on: ubuntu-latest
	strategy:
	matrix:
	test-type: [unit, integration, ml]
	services:
	postgres:
	image: postgres:15
	env:
	POSTGRES_PASSWORD: test_password
	POSTGRES_USER: test_user
	POSTGRES_DB: test_db
	options: >-
	--health-cmd pg_isready
	--health-interval 10s
	--health-timeout 5s
	--health-retries 5
	ports:
	- 5432:5432

	redis:
	image: redis:7-alpine
	options: >-
	--health-cmd "redis-cli ping"
	--health-interval 10s
	--health-timeout 5s
	--health-retries 5
	ports:
	- 6379:6379

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Set up Python
	uses: actions/setup-python@v4
	with:
	python-version: ${{ env.PYTHON_VERSION }}
	cache: 'pip'

	- name: Install dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install -r requirements/dev.txt
	pip install -r requirements/base.txt

	- name: Set up test environment
	run: \|
	cp .env.test .env
	export DATABASE_URL="postgresql://test_user:test_password@localhost:5432/test_db"
	export REDIS_URL="redis://localhost:6379/0"

	- name: Run database migrations
	run: \|
	python migrations/run_migrations.py
	env:
	DATABASE_URL: postgresql://test_user:test_password@localhost:5432/test_db

	- name: Run unit tests
	if: matrix.test-type == 'unit'
	run: \|
	pytest tests/unit/ \
	--cov=src \
	--cov-report=xml \
	--cov-report=html \
	--cov-report=term \
	--junit-xml=junit-unit.xml \
	-v
	env:
	DATABASE_URL: postgresql://test_user:test_password@localhost:5432/test_db
	REDIS_URL: redis://localhost:6379/0

	- name: Run integration tests
	if: matrix.test-type == 'integration'
	run: \|
	pytest tests/integration/ \
	--junit-xml=junit-integration.xml \
	-v
	env:
	DATABASE_URL: postgresql://test_user:test_password@localhost:5432/test_db
	REDIS_URL: redis://localhost:6379/0

	- name: Run ML tests
	if: matrix.test-type == 'ml'
	run: \|
	pytest tests/unit/test_infrastructure/ \
	-k "test_ml" \
	--junit-xml=junit-ml.xml \
	-v

	- name: Upload test results
	uses: actions/upload-artifact@v3
	if: always()
	with:
	name: test-results-${{ matrix.test-type }}
	path: \|
	junit-*.xml
	htmlcov/
	.coverage
	retention-days: 30

	- name: Upload coverage to Codecov
	if: matrix.test-type == 'unit'
	uses: codecov/codecov-action@v3
	with:
	file: ./coverage.xml
	flags: unittests
	name: codecov-umbrella

	# ================================
	# Performance Testing
	# ================================
	performance-test:
	name: Performance Testing
	runs-on: ubuntu-latest
	needs: [test]
	if: github.event_name == 'push' && github.ref == 'refs/heads/main'

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v3

	- name: Build test image
	uses: docker/build-push-action@v5
	with:
	context: .
	file: ./docker/Dockerfile.api
	target: production
	tags: rental-ml-api:test
	load: true
	cache-from: type=gha
	cache-to: type=gha,mode=max

	- name: Start test environment
	run: \|
	docker-compose -f docker-compose.test.yml up -d
	sleep 30

	- name: Install k6
	run: \|
	sudo gpg -k
	sudo gpg --no-default-keyring --keyring /usr/share/keyrings/k6-archive-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys C5AD17C747E3415A3642D57D77C6C491D6AC1D69
	echo "deb [signed-by=/usr/share/keyrings/k6-archive-keyring.gpg] https://dl.k6.io/deb stable main" \| sudo tee /etc/apt/sources.list.d/k6.list
	sudo apt-get update
	sudo apt-get install k6

	- name: Run load tests
	run: \|
	k6 run tests/performance/load-test.js \
	--out json=load-test-results.json \
	--env BASE_URL=http://localhost:8000

	- name: Run stress tests
	run: \|
	k6 run tests/performance/stress-test.js \
	--out json=stress-test-results.json \
	--env BASE_URL=http://localhost:8000

	- name: Upload performance results
	uses: actions/upload-artifact@v3
	with:
	name: performance-results
	path: \|
	load-test-results.json
	stress-test-results.json
	retention-days: 30

	- name: Cleanup
	if: always()
	run: docker-compose -f docker-compose.test.yml down -v

	# ================================
	# Container Security Scanning
	# ================================
	container-security:
	name: Container Security Scan
	runs-on: ubuntu-latest
	strategy:
	matrix:
	image: [api, ml-training, scraping]

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v3

	- name: Build image for scanning
	uses: docker/build-push-action@v5
	with:
	context: .
	file: ./docker/Dockerfile.${{ matrix.image }}
	target: production
	tags: rental-ml-${{ matrix.image }}:scan
	load: true

	- name: Run Trivy vulnerability scanner
	uses: aquasecurity/trivy-action@master
	with:
	image-ref: 'rental-ml-${{ matrix.image }}:scan'
	format: 'sarif'
	output: 'trivy-${{ matrix.image }}.sarif'

	- name: Upload Trivy scan results
	uses: github/codeql-action/upload-sarif@v2
	with:
	sarif_file: 'trivy-${{ matrix.image }}.sarif'
	category: 'trivy-${{ matrix.image }}'

	- name: Run Hadolint Dockerfile linter
	uses: hadolint/hadolint-action@v3.1.0
	with:
	dockerfile: ./docker/Dockerfile.${{ matrix.image }}
	format: sarif
	output-file: hadolint-${{ matrix.image }}.sarif

	- name: Upload Hadolint results
	uses: github/codeql-action/upload-sarif@v2
	with:
	sarif_file: hadolint-${{ matrix.image }}.sarif
	category: 'hadolint-${{ matrix.image }}'

	# ================================
	# Build and Push Container Images
	# ================================
	build-images:
	name: Build & Push Images
	runs-on: ubuntu-latest
	needs: [code-quality, test, container-security]
	if: github.event_name == 'push' && (github.ref == 'refs/heads/main' \|\| startsWith(github.ref, 'refs/tags/'))

	strategy:
	matrix:
	image: [api, ml-training, scraping]

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v3

	- name: Log in to Container Registry
	uses: docker/login-action@v3
	with:
	registry: ${{ env.REGISTRY }}
	username: ${{ github.actor }}
	password: ${{ secrets.GITHUB_TOKEN }}

	- name: Extract metadata
	id: meta
	uses: docker/metadata-action@v5
	with:
	images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-${{ matrix.image }}
	tags: \|
	type=ref,event=branch
	type=ref,event=pr
	type=semver,pattern={{version}}
	type=semver,pattern={{major}}.{{minor}}
	type=sha,prefix={{branch}}-
	type=raw,value=latest,enable={{is_default_branch}}

	- name: Build and push image
	uses: docker/build-push-action@v5
	with:
	context: .
	file: ./docker/Dockerfile.${{ matrix.image }}
	target: production
	push: true
	tags: ${{ steps.meta.outputs.tags }}
	labels: ${{ steps.meta.outputs.labels }}
	cache-from: type=gha
	cache-to: type=gha,mode=max
	platforms: linux/amd64,linux/arm64

	# ================================
	# Deploy to Staging
	# ================================
	deploy-staging:
	name: Deploy to Staging
	runs-on: ubuntu-latest
	needs: [build-images, performance-test]
	if: github.ref == 'refs/heads/main'
	environment:
	name: staging
	url: https://staging.rental-ml.com

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Configure kubectl
	run: \|
	echo "${{ secrets.STAGING_KUBECONFIG }}" \| base64 -d > kubeconfig
	export KUBECONFIG=kubeconfig

	- name: Deploy to staging
	run: \|
	# Update image tags in Kustomization
	cd k8s/overlays/staging
	kustomize edit set image \
	ghcr.io/${{ env.IMAGE_NAME }}-api:${{ github.sha }} \
	ghcr.io/${{ env.IMAGE_NAME }}-ml-training:${{ github.sha }} \
	ghcr.io/${{ env.IMAGE_NAME }}-scraping:${{ github.sha }}

	# Apply manifests
	kubectl apply -k . --wait=true

	# Wait for rollout
	kubectl rollout status deployment/rental-ml-api -n rental-ml-staging --timeout=600s
	kubectl rollout status deployment/rental-ml-worker -n rental-ml-staging --timeout=600s

	- name: Run smoke tests
	run: \|
	# Wait for services to be ready
	sleep 60

	# Run basic health checks
	curl -f https://staging.rental-ml.com/health
	curl -f https://staging.rental-ml.com/api/v1/health

	- name: Run E2E tests
	run: \|
	npm install -g @playwright/test
	npx playwright test tests/e2e/ --config=playwright.staging.config.js

	# ================================
	# Deploy to Production
	# ================================
	deploy-production:
	name: Deploy to Production
	runs-on: ubuntu-latest
	needs: [deploy-staging]
	if: startsWith(github.ref, 'refs/tags/v')
	environment:
	name: production
	url: https://api.rental-ml.com

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Configure kubectl
	run: \|
	echo "${{ secrets.PRODUCTION_KUBECONFIG }}" \| base64 -d > kubeconfig
	export KUBECONFIG=kubeconfig

	- name: Pre-deployment checks
	run: \|
	# Check cluster health
	kubectl get nodes
	kubectl get pods -n rental-ml-prod

	# Check database connectivity
	kubectl exec -n rental-ml-prod statefulset/postgres-primary -- pg_isready

	- name: Blue-Green Deployment
	run: \|
	# Deploy to green environment
	cd k8s/overlays/prod

	# Create green deployment
	sed 's/rental-ml-api/rental-ml-api-green/g' ../../base/08-app-deployment.yaml > green-deployment.yaml

	# Update image tags
	kustomize edit set image \
	ghcr.io/${{ env.IMAGE_NAME }}-api:${{ github.ref_name }} \
	ghcr.io/${{ env.IMAGE_NAME }}-ml-training:${{ github.ref_name }} \
	ghcr.io/${{ env.IMAGE_NAME }}-scraping:${{ github.ref_name }}

	# Deploy green environment
	kubectl apply -f green-deployment.yaml
	kubectl rollout status deployment/rental-ml-api-green -n rental-ml-prod --timeout=600s

	- name: Health check green deployment
	run: \|
	# Get green service endpoint
	GREEN_IP=$(kubectl get service rental-ml-api-green-service -n rental-ml-prod -o jsonpath='{.status.loadBalancer.ingress[0].ip}')

	# Health check
	curl -f http://$GREEN_IP:8000/health
	curl -f http://$GREEN_IP:8000/api/v1/health

	- name: Switch traffic to green
	run: \|
	# Update service selector to point to green deployment
	kubectl patch service rental-ml-api-service -n rental-ml-prod -p '{"spec":{"selector":{"version":"green"}}}'

	# Wait for traffic switch
	sleep 30

	- name: Final health check
	run: \|
	# Production health check
	curl -f https://api.rental-ml.com/health
	curl -f https://api.rental-ml.com/api/v1/health

	- name: Cleanup blue deployment
	run: \|
	# Remove old blue deployment
	kubectl delete deployment rental-ml-api -n rental-ml-prod --ignore-not-found=true

	# Rename green to blue for next deployment
	kubectl patch deployment rental-ml-api-green -n rental-ml-prod -p '{"metadata":{"name":"rental-ml-api"}}'

	- name: Post-deployment monitoring
	run: \|
	# Check metrics and logs
	kubectl logs -n rental-ml-prod deployment/rental-ml-api --tail=100

	# Verify all pods are running
	kubectl get pods -n rental-ml-prod

	# ================================
	# Security Monitoring
	# ================================
	security-monitoring:
	name: Security Monitoring
	runs-on: ubuntu-latest
	if: github.event_name == 'schedule'

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Run OWASP ZAP security scan
	uses: zaproxy/action-full-scan@v0.8.0
	with:
	target: 'https://api.rental-ml.com'
	rules_file_name: '.zap/rules.tsv'
	cmd_options: '-a'

	- name: Upload ZAP results
	uses: actions/upload-artifact@v3
	with:
	name: zap-security-scan
	path: report_html.html
	retention-days: 30

	- name: Notify security team
	if: failure()
	uses: 8398a7/action-slack@v3
	with:
	status: failure
	channel: '#security-alerts'
	text: 'Security scan failed for Rental ML System'
	env:
	SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Production CI/CD Pipeline #43

Workflow file

Production CI/CD Pipeline #43

Uh oh!

Jobs

Run details

Workflow file for this run