Skip to content

end-to-end integration tests #57

end-to-end integration tests

end-to-end integration tests #57

name: Full Kubeflow End-to-End Integration Test
on:
workflow_dispatch:
push:
branches:
- master
pull_request:
branches:
- master
env:
KIND_CLUSTER_NAME: kubeflow
KF_PROFILE: kubeflow-user-example-com
KIND_NETWORK: kind
jobs:
kubeflow-integration:
name: Kubeflow Installation and Testing
runs-on:
labels: ubuntu-latest-16-cores
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Environment
run: |
./tests/gh-actions/install_KinD_create_KinD_cluster_install_kustomize.sh
./tests/gh-actions/install_kubectl.sh
kustomize build common/kubeflow-namespace/base | kubectl apply -f -
- name: Install Core Infrastructure
run: |
./tests/gh-actions/install_cert_manager.sh
./tests/gh-actions/install_istio-cni.sh
./tests/gh-actions/install_oauth2-proxy.sh
kustomize build common/istio-cni-1-24/kubeflow-istio-resources/base | kubectl apply -f -
./tests/gh-actions/install_multi_tenancy.sh
kustomize build ./common/dex/overlays/oauth2-proxy | kubectl apply -f -
- name: Install Central Dashboard and Pipelines
run: |
kustomize build apps/centraldashboard/upstream/overlays/kserve | kubectl apply -f -
./tests/gh-actions/install_pipelines.sh
kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 180s
- name: Create User Profile
run: |
kustomize build common/user-namespace/base | kubectl apply -f -
sleep 60
if ! kubectl get secret mlpipeline-minio-artifact -n $KF_PROFILE > /dev/null 2>&1; then
exit 1
fi
- name: Install Jupyter and Notebook Components
run: |
kustomize build apps/jupyter/jupyter-web-app/upstream/overlays/istio/ | kubectl apply -f -
kustomize build apps/jupyter/notebook-controller/upstream/overlays/kubeflow/ | kubectl apply -f -
kustomize build apps/admission-webhook/upstream/overlays/cert-manager | kubectl apply -f -
kubectl get crd poddefaults.kubeflow.org || kubectl apply -f https://raw.githubusercontent.com/kubeflow/kubeflow/master/components/admission-webhook/manifests/base/crd.yaml
kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 300s --field-selector=status.phase!=Succeeded
- name: Install ML Components
run: |
# Katib
sudo apt-get install -y apparmor-profiles
sudo apparmor_parser -R /etc/apparmor.d/usr.sbin.mysqld
cd apps/katib/upstream && kustomize build installs/katib-with-kubeflow | kubectl apply -f - && cd ../../../
kubectl wait --for=condition=Available deployment/katib-controller -n kubeflow --timeout=300s
kubectl wait --for=condition=Available deployment/katib-mysql -n kubeflow --timeout=300s
kubectl label namespace $KF_PROFILE katib.kubeflow.org/metrics-collector-injection=enabled --overwrite
# Other ML components
./tests/gh-actions/install_training_operator.sh
./tests/gh-actions/install_knative.sh
./tests/gh-actions/install_kserve.sh
chmod u+x tests/gh-actions/spark_*.sh && ./tests/gh-actions/spark_install.sh
# Dependencies
pip install pytest kubernetes kfp==2.11.0 kserve pytest-timeout pyyaml requests
- name: Setup Gateway
run: |
GATEWAY=$(kubectl get svc -n istio-system -l app=istio-ingressgateway -o jsonpath='{.items[0].metadata.name}')
nohup kubectl port-forward -n istio-system svc/$GATEWAY 8080:80 &
while ! curl -s localhost:8080 > /dev/null; do sleep 1; done
- name: Test Dex Authentication
run: |
# Authentication
chmod +x tests/gh-actions/test_dex_auth.sh && ./tests/gh-actions/test_dex_auth.sh
# ML Pipeline
TOKEN="$(kubectl -n $KF_PROFILE create token default-editor)"
python3 tests/gh-actions/pipeline_test.py run_pipeline "${TOKEN}" "${KF_PROFILE}"
python3 tests/gh-actions/pipeline_test.py test_unauthorized_access "$(kubectl -n default create token default)" "${KF_PROFILE}"
# Notebook Pipeline
if [ -f "tests/gh-actions/run_and_wait_kubeflow_pipeline.py" ]; then
kubectl -n $KF_PROFILE cp ./tests/gh-actions/run_and_wait_kubeflow_pipeline.py test-0:/home/jovyan/run_and_wait_kubeflow_pipeline.py
kubectl -n $KF_PROFILE exec -ti test-0 -- python /home/jovyan/run_and_wait_kubeflow_pipeline.py
fi
# Katib
if kubectl get crd experiments.kubeflow.org > /dev/null 2>&1; then
sed "s/kubeflow-user/$KF_PROFILE/g" tests/gh-actions/kf-objects/katib_test.yaml | kubectl apply -f -
kubectl wait --for=condition=Running experiments.kubeflow.org -n $KF_PROFILE --all --timeout=300s || true
sleep 30
fi
# Training and Spark
if kubectl get crd pytorchjobs.kubeflow.org > /dev/null 2>&1; then
sed "s/namespace: .*/namespace: $KF_PROFILE/g" tests/gh-actions/kf-objects/training_operator_job.yaml | kubectl apply -f -
fi
if [ -f "tests/gh-actions/spark_test.sh" ]; then
chmod u+x tests/gh-actions/spark_*.sh && ./tests/gh-actions/spark_test.sh "${KF_PROFILE}"
fi
- name: Test Security Standards
run: |
./tests/gh-actions/enable_baseline_PSS.sh
NAMESPACES=("istio-system" "auth" "cert-manager" "oauth2-proxy" "kubeflow" "knative-serving")
for NS in "${NAMESPACES[@]}"; do
if kubectl get namespace "$NS" >/dev/null 2>&1; then
if kubectl get namespace $NS -o jsonpath='{.metadata.labels.pod-security\.kubernetes\.io/enforce}' > /dev/null 2>&1; then
kubectl label namespace $NS pod-security.kubernetes.io/enforce-
fi
fi
done
./tests/gh-actions/enable_restricted_PSS.sh
[ -f "tests/gh-actions/runasnonroot.sh" ] && chmod +x tests/gh-actions/runasnonroot.sh && ./tests/gh-actions/runasnonroot.sh
- name: Verify Components
run: kubectl get pods --all-namespaces | grep -E '(Error|CrashLoopBackOff)' && exit 1 || true
- name: Collect Logs on Failure
if: failure()
run: |
mkdir -p logs
kubectl get all --all-namespaces > logs/resources.txt
kubectl get events --all-namespaces --sort-by=.metadata.creationTimestamp > logs/events.txt
for ns in kubeflow istio-system cert-manager auth; do
kubectl describe pods -n $ns > logs/$ns-pods.txt
for pod in $(kubectl get pods -n $ns -o jsonpath='{.items[*].metadata.name}'); do
kubectl logs -n $ns $pod --tail=100 > logs/$ns-$pod.txt 2>&1 || true
done
done
- name: Upload Diagnostic Logs
if: always()
uses: actions/upload-artifact@v4
with:
name: kubeflow-test-logs
path: logs/