Skip to content

Improving Kubeflow pipeline, refactored data ingestion and processing #13

Improving Kubeflow pipeline, refactored data ingestion and processing

Improving Kubeflow pipeline, refactored data ingestion and processing #13

Workflow file for this run

name: CI/CD Pipeline
on:
push:
branches: [ main ]
paths:
- 'src/**'
- 'tests/**'
- 'docker/**'
- 'cloudbuild.yaml'
- 'kubeflow/**'
- 'deployment/**'
pull_request:
branches: [ main ]
paths:
- 'src/**'
- 'tests/**'
- 'docker/**'
- 'cloudbuild.yaml'
- 'kubeflow/**'
- 'deployment/**'
env:
IMAGE: music-recommender
REGION: us-central1
PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
GCS_BUCKET: ${{ secrets.GCS_BUCKET }}
GCP_WORKLOAD_IDENTITY_PROVIDER: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
GCP_SA_EMAIL: ${{ secrets.GCP_SA_EMAIL }}
permissions:
contents: read
jobs:
lint-and-test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.9', '3.10']
timeout-minutes: 15
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Cache pip
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt', '**/setup.py') }}-${{ matrix.python-version }}
restore-keys: |
${{ runner.os }}-pip-${{ matrix.python-version }}-
- name: Install dependencies
run: |
set -e
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install flake8 black isort pytest pytest-cov
- name: Run linters
run: |
set -e
flake8 .
black --check .
isort --check-only .
- name: Run tests
run: |
set -e
pytest tests/ --cov=src --cov-report=xml
- name: Upload coverage
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
- name: Upload test results
uses: actions/upload-artifact@v3
with:
name: test-results
path: ./reports/
build-and-push:
needs: lint-and-test
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
packages: write
steps:
- uses: actions/checkout@v3
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v1
- name: Authenticate to Google Cloud
uses: google-github-actions/auth@v1
with:
workload_identity_provider: ${{ env.GCP_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ env.GCP_SA_EMAIL }}
- name: Verify GCP Authentication
run: gcloud auth list
- name: Validate repository structure
run: |
set -e
if [ ! -f "cloudbuild.yaml" ]; then
echo "cloudbuild.yaml not found, failing build."
exit 1
fi
if [ ! -f "kubeflow/pipeline.py" ]; then
echo "pipeline.py not found, failing build."
exit 1
fi
- name: Configure Docker
run: gcloud auth configure-docker
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
push: true
tags: gcr.io/${{ env.PROJECT_ID }}/${{ env.IMAGE }}:${{ github.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
deploy-cloud-run:
needs: build-and-push
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
permissions:
contents: read
id-token: write
steps:
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v1
- name: Authenticate to Google Cloud
uses: google-github-actions/auth@v1
with:
workload_identity_provider: ${{ env.GCP_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ env.GCP_SA_EMAIL }}
- name: Verify GCP Authentication
run: gcloud auth list
- name: Get the current Cloud Run revision
id: current_revision
run: |
set -e
# Capture the current revision name to enable rollback if deployment fails
revision=$(gcloud run services describe ${{ env.IMAGE }} \
--platform managed \
--region ${{ env.REGION }} \
--format="value(status.latestReadyRevisionName)")
echo "current_revision=$revision" >> $GITHUB_OUTPUT
- name: Deploy to Cloud Run
env:
IMAGE_TAG: gcr.io/${{ env.PROJECT_ID }}/${{ env.IMAGE }}:${{ github.sha }}
run: |
set -e
gcloud run deploy ${{ env.IMAGE }} \
--image $IMAGE_TAG \
--region ${{ env.REGION }} \
--platform managed \
--allow-unauthenticated
- name: Health check Cloud Run service
run: |
set -e
SERVICE_URL=$(gcloud run services describe ${{ env.IMAGE }} \
--platform managed \
--region ${{ env.REGION }} \
--format='value(status.url)')
if ! curl -fsSL ${SERVICE_URL}/health; then
echo "Deployment failed. Rolling back to previous revision..."
gcloud run services update-traffic ${{ env.IMAGE }} \
--platform managed \
--region ${{ env.REGION }} \
--to-revisions=${{ steps.current_revision.outputs.current_revision }}=100
exit 1
fi
deploy-vertex-ai:
needs: build-and-push
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
permissions:
contents: read
id-token: write
steps:
- uses: actions/checkout@v3
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v1
- name: Authenticate to Google Cloud
uses: google-github-actions/auth@v1
with:
workload_identity_provider: ${{ env.GCP_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ env.GCP_SA_EMAIL }}
- name: Verify GCP Authentication
run: gcloud auth list
- name: Trigger Cloud Build
run: |
set -e
gcloud builds submit --config cloudbuild.yaml \
--substitutions=_REGION=${{ env.REGION }}
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install Kubeflow Pipelines SDK
run: |
set -e
python -m pip install --upgrade pip
pip install kfp==2.0.0 google-cloud-aiplatform==1.28.0
- name: Deploy to Vertex AI
run: |
set -e
python kubeflow/pipeline.py \
--project_id ${{ env.PROJECT_ID }} \
--region ${{ env.REGION }} \
--pipeline_root gs://${{ env.GCS_BUCKET }}/pipeline_root \
--output_file pipeline.json
python deployment/vertex_ai/vertex_deployment.py \
--project_id ${{ env.PROJECT_ID }} \
--region ${{ env.REGION }} \
--pipeline_spec pipeline.json \
--pipeline_root gs://${{ env.GCS_BUCKET }}/pipeline_root \
--model_name_file model_name.txt
- name: Upload model name
if: always()
uses: actions/upload-artifact@v3
with:
name: model-name
path: model_name.txt
- name: Check Vertex AI model deployment status
run: |
set -e
MODEL_NAME=$(cat model_name.txt)
MODEL_STATUS=$(gcloud ai models describe $MODEL_NAME --region ${{ env.REGION }} --format="value(state)")
if [[ "$MODEL_STATUS" != "DEPLOYED" ]]; then
echo "Model not deployed successfully, exiting."
exit 1
fi
final-health-check:
needs: [deploy-cloud-run, deploy-vertex-ai]
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
steps:
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v1
- name: Authenticate to Google Cloud
uses: google-github-actions/auth@v1
with:
workload_identity_provider: ${{ env.GCP_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ env.GCP_SA_EMAIL }}
- name: Download model name
uses: actions/download-artifact@v3
with:
name: model-name
path: ./
- name: Final Health Checks
run: |
set -e
# Check Cloud Run
SERVICE_URL=$(gcloud run services describe ${{ env.IMAGE }} \
--platform managed \
--region ${{ env.REGION }} \
--format='value(status.url)')
CLOUD_RUN_STATUS=$(curl -s -o /dev/null -w "%{http_code}" $SERVICE_URL/health)
# Check Vertex AI
MODEL_NAME=$(cat model_name.txt)
VERTEX_AI_STATUS=$(gcloud ai models describe $MODEL_NAME --region ${{ env.REGION }} --format="value(state)")
if [[ "$CLOUD_RUN_STATUS" != "200" || "$VERTEX_AI_STATUS" != "DEPLOYED" ]]; then
echo "Final health check failed. Cloud Run status: $CLOUD_RUN_STATUS, Vertex AI status: $VERTEX_AI_STATUS"
exit 1
fi
echo "All systems operational. Cloud Run status: $CLOUD_RUN_STATUS, Vertex AI status: $VERTEX_AI_STATUS"