end-to-end integration tests #57
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Full Kubeflow End-to-End Integration Test | |
| on: | |
| workflow_dispatch: | |
| push: | |
| branches: | |
| - master | |
| pull_request: | |
| branches: | |
| - master | |
| env: | |
| KIND_CLUSTER_NAME: kubeflow | |
| KF_PROFILE: kubeflow-user-example-com | |
| KIND_NETWORK: kind | |
| jobs: | |
| kubeflow-integration: | |
| name: Kubeflow Installation and Testing | |
| runs-on: | |
| labels: ubuntu-latest-16-cores | |
| timeout-minutes: 60 | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| run: | | |
| ./tests/gh-actions/install_KinD_create_KinD_cluster_install_kustomize.sh | |
| ./tests/gh-actions/install_kubectl.sh | |
| kustomize build common/kubeflow-namespace/base | kubectl apply -f - | |
| - name: Install Core Infrastructure | |
| run: | | |
| ./tests/gh-actions/install_cert_manager.sh | |
| ./tests/gh-actions/install_istio-cni.sh | |
| ./tests/gh-actions/install_oauth2-proxy.sh | |
| kustomize build common/istio-cni-1-24/kubeflow-istio-resources/base | kubectl apply -f - | |
| ./tests/gh-actions/install_multi_tenancy.sh | |
| kustomize build ./common/dex/overlays/oauth2-proxy | kubectl apply -f - | |
| - name: Install Central Dashboard and Pipelines | |
| run: | | |
| kustomize build apps/centraldashboard/upstream/overlays/kserve | kubectl apply -f - | |
| ./tests/gh-actions/install_pipelines.sh | |
| kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 180s | |
| - name: Create User Profile | |
| run: | | |
| kustomize build common/user-namespace/base | kubectl apply -f - | |
| sleep 60 | |
| if ! kubectl get secret mlpipeline-minio-artifact -n $KF_PROFILE > /dev/null 2>&1; then | |
| exit 1 | |
| fi | |
| - name: Install Jupyter and Notebook Components | |
| run: | | |
| kustomize build apps/jupyter/jupyter-web-app/upstream/overlays/istio/ | kubectl apply -f - | |
| kustomize build apps/jupyter/notebook-controller/upstream/overlays/kubeflow/ | kubectl apply -f - | |
| kustomize build apps/admission-webhook/upstream/overlays/cert-manager | kubectl apply -f - | |
| kubectl get crd poddefaults.kubeflow.org || kubectl apply -f https://raw.githubusercontent.com/kubeflow/kubeflow/master/components/admission-webhook/manifests/base/crd.yaml | |
| kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 300s --field-selector=status.phase!=Succeeded | |
| - name: Install ML Components | |
| run: | | |
| # Katib | |
| sudo apt-get install -y apparmor-profiles | |
| sudo apparmor_parser -R /etc/apparmor.d/usr.sbin.mysqld | |
| cd apps/katib/upstream && kustomize build installs/katib-with-kubeflow | kubectl apply -f - && cd ../../../ | |
| kubectl wait --for=condition=Available deployment/katib-controller -n kubeflow --timeout=300s | |
| kubectl wait --for=condition=Available deployment/katib-mysql -n kubeflow --timeout=300s | |
| kubectl label namespace $KF_PROFILE katib.kubeflow.org/metrics-collector-injection=enabled --overwrite | |
| # Other ML components | |
| ./tests/gh-actions/install_training_operator.sh | |
| ./tests/gh-actions/install_knative.sh | |
| ./tests/gh-actions/install_kserve.sh | |
| chmod u+x tests/gh-actions/spark_*.sh && ./tests/gh-actions/spark_install.sh | |
| # Dependencies | |
| pip install pytest kubernetes kfp==2.11.0 kserve pytest-timeout pyyaml requests | |
| - name: Setup Gateway | |
| run: | | |
| GATEWAY=$(kubectl get svc -n istio-system -l app=istio-ingressgateway -o jsonpath='{.items[0].metadata.name}') | |
| nohup kubectl port-forward -n istio-system svc/$GATEWAY 8080:80 & | |
| while ! curl -s localhost:8080 > /dev/null; do sleep 1; done | |
| - name: Test Dex Authentication | |
| run: | | |
| # Authentication | |
| chmod +x tests/gh-actions/test_dex_auth.sh && ./tests/gh-actions/test_dex_auth.sh | |
| # ML Pipeline | |
| TOKEN="$(kubectl -n $KF_PROFILE create token default-editor)" | |
| python3 tests/gh-actions/pipeline_test.py run_pipeline "${TOKEN}" "${KF_PROFILE}" | |
| python3 tests/gh-actions/pipeline_test.py test_unauthorized_access "$(kubectl -n default create token default)" "${KF_PROFILE}" | |
| # Notebook Pipeline | |
| if [ -f "tests/gh-actions/run_and_wait_kubeflow_pipeline.py" ]; then | |
| kubectl -n $KF_PROFILE cp ./tests/gh-actions/run_and_wait_kubeflow_pipeline.py test-0:/home/jovyan/run_and_wait_kubeflow_pipeline.py | |
| kubectl -n $KF_PROFILE exec -ti test-0 -- python /home/jovyan/run_and_wait_kubeflow_pipeline.py | |
| fi | |
| # Katib | |
| if kubectl get crd experiments.kubeflow.org > /dev/null 2>&1; then | |
| sed "s/kubeflow-user/$KF_PROFILE/g" tests/gh-actions/kf-objects/katib_test.yaml | kubectl apply -f - | |
| kubectl wait --for=condition=Running experiments.kubeflow.org -n $KF_PROFILE --all --timeout=300s || true | |
| sleep 30 | |
| fi | |
| # Training and Spark | |
| if kubectl get crd pytorchjobs.kubeflow.org > /dev/null 2>&1; then | |
| sed "s/namespace: .*/namespace: $KF_PROFILE/g" tests/gh-actions/kf-objects/training_operator_job.yaml | kubectl apply -f - | |
| fi | |
| if [ -f "tests/gh-actions/spark_test.sh" ]; then | |
| chmod u+x tests/gh-actions/spark_*.sh && ./tests/gh-actions/spark_test.sh "${KF_PROFILE}" | |
| fi | |
| - name: Test Security Standards | |
| run: | | |
| ./tests/gh-actions/enable_baseline_PSS.sh | |
| NAMESPACES=("istio-system" "auth" "cert-manager" "oauth2-proxy" "kubeflow" "knative-serving") | |
| for NS in "${NAMESPACES[@]}"; do | |
| if kubectl get namespace "$NS" >/dev/null 2>&1; then | |
| if kubectl get namespace $NS -o jsonpath='{.metadata.labels.pod-security\.kubernetes\.io/enforce}' > /dev/null 2>&1; then | |
| kubectl label namespace $NS pod-security.kubernetes.io/enforce- | |
| fi | |
| fi | |
| done | |
| ./tests/gh-actions/enable_restricted_PSS.sh | |
| [ -f "tests/gh-actions/runasnonroot.sh" ] && chmod +x tests/gh-actions/runasnonroot.sh && ./tests/gh-actions/runasnonroot.sh | |
| - name: Verify Components | |
| run: kubectl get pods --all-namespaces | grep -E '(Error|CrashLoopBackOff)' && exit 1 || true | |
| - name: Collect Logs on Failure | |
| if: failure() | |
| run: | | |
| mkdir -p logs | |
| kubectl get all --all-namespaces > logs/resources.txt | |
| kubectl get events --all-namespaces --sort-by=.metadata.creationTimestamp > logs/events.txt | |
| for ns in kubeflow istio-system cert-manager auth; do | |
| kubectl describe pods -n $ns > logs/$ns-pods.txt | |
| for pod in $(kubectl get pods -n $ns -o jsonpath='{.items[*].metadata.name}'); do | |
| kubectl logs -n $ns $pod --tail=100 > logs/$ns-$pod.txt 2>&1 || true | |
| done | |
| done | |
| - name: Upload Diagnostic Logs | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: kubeflow-test-logs | |
| path: logs/ |