Skip to content

Commit b92b4b6

Browse files
committed
Install gpu-operator with CAAPH in e2e
1 parent aab6044 commit b92b4b6

File tree

6 files changed

+50
-18
lines changed

6 files changed

+50
-18
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
apiVersion: addons.cluster.x-k8s.io/v1alpha1
2+
kind: HelmChartProxy
3+
metadata:
4+
name: gpu-operator
5+
spec:
6+
clusterSelector:
7+
matchLabels:
8+
gpu-operator: "true"
9+
repoURL: https://helm.ngc.nvidia.com/nvidia
10+
chartName: gpu-operator
11+
releaseName: nvidia-gpu-operator
12+
namespace: default

templates/test/ci/cluster-template-prow-nvidia-gpu.yaml

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: cluster.x-k8s.io/v1beta1
2+
kind: Cluster
3+
metadata:
4+
name: ${CLUSTER_NAME}
5+
labels:
6+
gpu-operator: "true"

templates/test/ci/prow-nvidia-gpu/kustomization.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ resources:
77
- ../../../addons/cluster-api-helm/azuredisk-csi-driver.yaml
88
- ../../../addons/cluster-api-helm/cloud-provider-azure.yaml
99
- ../../../addons/cluster-api-helm/cloud-provider-azure-ci.yaml
10+
- ../../../addons/cluster-api-helm/gpu-operator.yaml
1011
patches:
1112
- path: patches/node-storage-type.yaml
1213
target:
@@ -19,6 +20,7 @@ patches:
1920
- path: ../patches/azurecluster-gpu.yaml
2021
- path: ../patches/cluster-label-calico.yaml
2122
- path: ../patches/cluster-label-cloud-provider-azure.yaml
23+
- path: ../patches/cluster-label-gpu-operator.yaml
2224

2325
sortOptions:
2426
order: fifo

test/e2e/azure_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,8 +620,8 @@ var _ = Describe("Workload cluster creation", func() {
620620
ClusterName: clusterName,
621621
}
622622
})
623-
InstallGPUOperator(ctx, func() GPUOperatorSpecInput {
624-
return GPUOperatorSpecInput{
623+
EnsureGPUOperator(ctx, func() EnsureGPUOperatorInput {
624+
return EnsureGPUOperatorInput{
625625
BootstrapClusterProxy: bootstrapClusterProxy,
626626
Namespace: namespace,
627627
ClusterName: clusterName,

test/e2e/gpu_operator.go

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,21 @@ import (
2929
)
3030

3131
const (
32-
nvidiaHelmChartRepoURL string = "https://helm.ngc.nvidia.com/nvidia"
33-
nvidiaGPUOperatorNamespace string = "default"
34-
nvidiaGPUOperatorHelmReleaseName string = "nvidia-gpu-operator"
35-
nvidiaGPUOperatorHelmChartName string = "gpu-operator"
32+
nvidiaGPUOperatorNamespace string = "default"
3633
)
3734

38-
// GPUOperatorSpecInput is the input for InstallGPUOperator.
39-
type GPUOperatorSpecInput struct {
35+
// EnsureGPUOperatorInput is the input for InstallGPUOperator.
36+
type EnsureGPUOperatorInput struct {
4037
BootstrapClusterProxy framework.ClusterProxy
4138
Namespace *corev1.Namespace
4239
ClusterName string
4340
}
4441

45-
// InstallGPUOperator installs the official nvidia/gpu-operator helm chart.
46-
func InstallGPUOperator(ctx context.Context, inputGetter func() GPUOperatorSpecInput) {
42+
// EnsureGPUOperator installs the official nvidia/gpu-operator helm chart.
43+
func EnsureGPUOperator(ctx context.Context, inputGetter func() EnsureGPUOperatorInput) {
4744
var (
4845
specName = "nvidia-gpu-operator"
49-
input GPUOperatorSpecInput
46+
input EnsureGPUOperatorInput
5047
)
5148

5249
Expect(ctx).NotTo(BeNil(), "ctx is required for %s spec", specName)
@@ -56,12 +53,12 @@ func InstallGPUOperator(ctx context.Context, inputGetter func() GPUOperatorSpecI
5653
Expect(input.Namespace).NotTo(BeNil(), "Invalid argument. input.Namespace can't be nil when calling %s spec", specName)
5754
Expect(input.ClusterName).NotTo(BeEmpty(), "Invalid argument. input.ClusterName can't be empty when calling %s spec", specName)
5855
clusterProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, input.Namespace.Name, input.ClusterName)
59-
InstallNvidiaGPUOperatorChart(ctx, clusterProxy)
60-
}
6156

62-
// InstallNvidiaGPUOperatorChart installs the official nvidia/gpu-operator helm chart
63-
func InstallNvidiaGPUOperatorChart(ctx context.Context, clusterProxy framework.ClusterProxy) {
64-
By("Installing nvidia/gpu-operator via helm")
65-
values := &HelmOptions{}
66-
InstallHelmChart(ctx, clusterProxy, nvidiaGPUOperatorNamespace, nvidiaHelmChartRepoURL, nvidiaGPUOperatorHelmChartName, nvidiaGPUOperatorHelmReleaseName, values, "")
57+
By("Ensuring GPU Operator is installed via CAAPH")
58+
59+
By("Waiting for Ready gpu-operator deployment pods")
60+
for _, d := range []string{"gpu-operator"} {
61+
waitInput := GetWaitForDeploymentsAvailableInput(ctx, clusterProxy, d, nvidiaGPUOperatorNamespace, specName)
62+
WaitForDeploymentsAvailable(ctx, waitInput, e2eConfig.GetIntervals(specName, "wait-deployment")...)
63+
}
6764
}

0 commit comments

Comments
 (0)