Improving Kubeflow pipeline, refactored data ingestion and processing #13
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CI/CD Pipeline | |
on: | |
push: | |
branches: [ main ] | |
paths: | |
- 'src/**' | |
- 'tests/**' | |
- 'docker/**' | |
- 'cloudbuild.yaml' | |
- 'kubeflow/**' | |
- 'deployment/**' | |
pull_request: | |
branches: [ main ] | |
paths: | |
- 'src/**' | |
- 'tests/**' | |
- 'docker/**' | |
- 'cloudbuild.yaml' | |
- 'kubeflow/**' | |
- 'deployment/**' | |
env: | |
IMAGE: music-recommender | |
REGION: us-central1 | |
PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }} | |
GCS_BUCKET: ${{ secrets.GCS_BUCKET }} | |
GCP_WORKLOAD_IDENTITY_PROVIDER: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} | |
GCP_SA_EMAIL: ${{ secrets.GCP_SA_EMAIL }} | |
permissions: | |
contents: read | |
jobs: | |
lint-and-test: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
python-version: ['3.9', '3.10'] | |
timeout-minutes: 15 | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Cache pip | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt', '**/setup.py') }}-${{ matrix.python-version }} | |
restore-keys: | | |
${{ runner.os }}-pip-${{ matrix.python-version }}- | |
- name: Install dependencies | |
run: | | |
set -e | |
python -m pip install --upgrade pip | |
pip install -r requirements.txt | |
pip install flake8 black isort pytest pytest-cov | |
- name: Run linters | |
run: | | |
set -e | |
flake8 . | |
black --check . | |
isort --check-only . | |
- name: Run tests | |
run: | | |
set -e | |
pytest tests/ --cov=src --cov-report=xml | |
- name: Upload coverage | |
uses: codecov/codecov-action@v3 | |
with: | |
file: ./coverage.xml | |
- name: Upload test results | |
uses: actions/upload-artifact@v3 | |
with: | |
name: test-results | |
path: ./reports/ | |
build-and-push: | |
needs: lint-and-test | |
runs-on: ubuntu-latest | |
permissions: | |
contents: read | |
id-token: write | |
packages: write | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Cloud SDK | |
uses: google-github-actions/setup-gcloud@v1 | |
- name: Authenticate to Google Cloud | |
uses: google-github-actions/auth@v1 | |
with: | |
workload_identity_provider: ${{ env.GCP_WORKLOAD_IDENTITY_PROVIDER }} | |
service_account: ${{ env.GCP_SA_EMAIL }} | |
- name: Verify GCP Authentication | |
run: gcloud auth list | |
- name: Validate repository structure | |
run: | | |
set -e | |
if [ ! -f "cloudbuild.yaml" ]; then | |
echo "cloudbuild.yaml not found, failing build." | |
exit 1 | |
fi | |
if [ ! -f "kubeflow/pipeline.py" ]; then | |
echo "pipeline.py not found, failing build." | |
exit 1 | |
fi | |
- name: Configure Docker | |
run: gcloud auth configure-docker | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v2 | |
- name: Build and push Docker image | |
uses: docker/build-push-action@v4 | |
with: | |
context: . | |
push: true | |
tags: gcr.io/${{ env.PROJECT_ID }}/${{ env.IMAGE }}:${{ github.sha }} | |
cache-from: type=gha | |
cache-to: type=gha,mode=max | |
deploy-cloud-run: | |
needs: build-and-push | |
runs-on: ubuntu-latest | |
if: github.ref == 'refs/heads/main' && github.event_name == 'push' | |
permissions: | |
contents: read | |
id-token: write | |
steps: | |
- name: Set up Cloud SDK | |
uses: google-github-actions/setup-gcloud@v1 | |
- name: Authenticate to Google Cloud | |
uses: google-github-actions/auth@v1 | |
with: | |
workload_identity_provider: ${{ env.GCP_WORKLOAD_IDENTITY_PROVIDER }} | |
service_account: ${{ env.GCP_SA_EMAIL }} | |
- name: Verify GCP Authentication | |
run: gcloud auth list | |
- name: Get the current Cloud Run revision | |
id: current_revision | |
run: | | |
set -e | |
# Capture the current revision name to enable rollback if deployment fails | |
revision=$(gcloud run services describe ${{ env.IMAGE }} \ | |
--platform managed \ | |
--region ${{ env.REGION }} \ | |
--format="value(status.latestReadyRevisionName)") | |
echo "current_revision=$revision" >> $GITHUB_OUTPUT | |
- name: Deploy to Cloud Run | |
env: | |
IMAGE_TAG: gcr.io/${{ env.PROJECT_ID }}/${{ env.IMAGE }}:${{ github.sha }} | |
run: | | |
set -e | |
gcloud run deploy ${{ env.IMAGE }} \ | |
--image $IMAGE_TAG \ | |
--region ${{ env.REGION }} \ | |
--platform managed \ | |
--allow-unauthenticated | |
- name: Health check Cloud Run service | |
run: | | |
set -e | |
SERVICE_URL=$(gcloud run services describe ${{ env.IMAGE }} \ | |
--platform managed \ | |
--region ${{ env.REGION }} \ | |
--format='value(status.url)') | |
if ! curl -fsSL ${SERVICE_URL}/health; then | |
echo "Deployment failed. Rolling back to previous revision..." | |
gcloud run services update-traffic ${{ env.IMAGE }} \ | |
--platform managed \ | |
--region ${{ env.REGION }} \ | |
--to-revisions=${{ steps.current_revision.outputs.current_revision }}=100 | |
exit 1 | |
fi | |
deploy-vertex-ai: | |
needs: build-and-push | |
runs-on: ubuntu-latest | |
if: github.ref == 'refs/heads/main' && github.event_name == 'push' | |
permissions: | |
contents: read | |
id-token: write | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Cloud SDK | |
uses: google-github-actions/setup-gcloud@v1 | |
- name: Authenticate to Google Cloud | |
uses: google-github-actions/auth@v1 | |
with: | |
workload_identity_provider: ${{ env.GCP_WORKLOAD_IDENTITY_PROVIDER }} | |
service_account: ${{ env.GCP_SA_EMAIL }} | |
- name: Verify GCP Authentication | |
run: gcloud auth list | |
- name: Trigger Cloud Build | |
run: | | |
set -e | |
gcloud builds submit --config cloudbuild.yaml \ | |
--substitutions=_REGION=${{ env.REGION }} | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10' | |
- name: Install Kubeflow Pipelines SDK | |
run: | | |
set -e | |
python -m pip install --upgrade pip | |
pip install kfp==2.0.0 google-cloud-aiplatform==1.28.0 | |
- name: Deploy to Vertex AI | |
run: | | |
set -e | |
python kubeflow/pipeline.py \ | |
--project_id ${{ env.PROJECT_ID }} \ | |
--region ${{ env.REGION }} \ | |
--pipeline_root gs://${{ env.GCS_BUCKET }}/pipeline_root \ | |
--output_file pipeline.json | |
python deployment/vertex_ai/vertex_deployment.py \ | |
--project_id ${{ env.PROJECT_ID }} \ | |
--region ${{ env.REGION }} \ | |
--pipeline_spec pipeline.json \ | |
--pipeline_root gs://${{ env.GCS_BUCKET }}/pipeline_root \ | |
--model_name_file model_name.txt | |
- name: Upload model name | |
if: always() | |
uses: actions/upload-artifact@v3 | |
with: | |
name: model-name | |
path: model_name.txt | |
- name: Check Vertex AI model deployment status | |
run: | | |
set -e | |
MODEL_NAME=$(cat model_name.txt) | |
MODEL_STATUS=$(gcloud ai models describe $MODEL_NAME --region ${{ env.REGION }} --format="value(state)") | |
if [[ "$MODEL_STATUS" != "DEPLOYED" ]]; then | |
echo "Model not deployed successfully, exiting." | |
exit 1 | |
fi | |
final-health-check: | |
needs: [deploy-cloud-run, deploy-vertex-ai] | |
runs-on: ubuntu-latest | |
permissions: | |
contents: read | |
id-token: write | |
steps: | |
- name: Set up Cloud SDK | |
uses: google-github-actions/setup-gcloud@v1 | |
- name: Authenticate to Google Cloud | |
uses: google-github-actions/auth@v1 | |
with: | |
workload_identity_provider: ${{ env.GCP_WORKLOAD_IDENTITY_PROVIDER }} | |
service_account: ${{ env.GCP_SA_EMAIL }} | |
- name: Download model name | |
uses: actions/download-artifact@v3 | |
with: | |
name: model-name | |
path: ./ | |
- name: Final Health Checks | |
run: | | |
set -e | |
# Check Cloud Run | |
SERVICE_URL=$(gcloud run services describe ${{ env.IMAGE }} \ | |
--platform managed \ | |
--region ${{ env.REGION }} \ | |
--format='value(status.url)') | |
CLOUD_RUN_STATUS=$(curl -s -o /dev/null -w "%{http_code}" $SERVICE_URL/health) | |
# Check Vertex AI | |
MODEL_NAME=$(cat model_name.txt) | |
VERTEX_AI_STATUS=$(gcloud ai models describe $MODEL_NAME --region ${{ env.REGION }} --format="value(state)") | |
if [[ "$CLOUD_RUN_STATUS" != "200" || "$VERTEX_AI_STATUS" != "DEPLOYED" ]]; then | |
echo "Final health check failed. Cloud Run status: $CLOUD_RUN_STATUS, Vertex AI status: $VERTEX_AI_STATUS" | |
exit 1 | |
fi | |
echo "All systems operational. Cloud Run status: $CLOUD_RUN_STATUS, Vertex AI status: $VERTEX_AI_STATUS" |