Skip to content

Commit 178ede7

Browse files
authored
feat: Add preflight checks framework (#1129)
**What problem does this PR solve?**: Adds a framework for preflight checks. A preflight check is a type of validation that typically requires access to an infrastructure API. A validating webhook on the Cluster resource executes all preflight checks, and returns failures, and warnings to the client. **Which issue(s) this PR fixes**: Fixes # **How Has This Been Tested?**: <!-- Please describe the tests that you ran to verify your changes. Provide output from the tests and any manual steps needed to replicate the tests. --> **Special notes for your reviewer**: <!-- Use this to provide any additional information to the reviewers. This may include: - Best way to review the PR. - Where the author wants the most review attention on. - etc. -->
1 parent 50a97fd commit 178ede7

File tree

6 files changed

+1254
-1
lines changed

6 files changed

+1254
-1
lines changed

charts/cluster-api-runtime-extensions-nutanix/templates/webhooks.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,23 @@ webhooks:
5656
resources:
5757
- clusters
5858
sideEffects: None
59+
- admissionReviewVersions:
60+
- v1
61+
clientConfig:
62+
service:
63+
name: '{{ include "chart.name" . }}-admission'
64+
namespace: '{{ .Release.Namespace }}'
65+
path: /preflight-v1beta1-cluster
66+
failurePolicy: Fail
67+
name: preflight.cluster.caren.nutanix.com
68+
rules:
69+
- apiGroups:
70+
- cluster.x-k8s.io
71+
apiVersions:
72+
- '*'
73+
operations:
74+
- CREATE
75+
resources:
76+
- clusters
77+
sideEffects: None
78+
timeoutSeconds: 30

cmd/main.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import (
4141
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/handlers/nutanix"
4242
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/handlers/options"
4343
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/webhook/cluster"
44+
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/webhook/preflight"
4445
)
4546

4647
func main() {
@@ -219,6 +220,13 @@ func main() {
219220
Handler: cluster.NewValidator(mgr.GetClient(), admission.NewDecoder(mgr.GetScheme())),
220221
})
221222

223+
mgr.GetWebhookServer().Register("/preflight-v1beta1-cluster", &webhook.Admission{
224+
Handler: preflight.New(mgr.GetClient(), admission.NewDecoder(mgr.GetScheme()),
225+
[]preflight.Checker{
226+
// Add your preflight checkers here.
227+
}...,
228+
),
229+
})
222230
if err := mgr.Start(signalCtx); err != nil {
223231
setupLog.Error(err, "unable to start controller manager")
224232
os.Exit(1)

hack/update-webhook-configurations.yq

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ with(.metadata;
88
.name = "{{ include \"chart.name\" . }}-" + .name,
99
.annotations["cert-manager.io/inject-ca-from"] = "{{ .Release.Namespace}}/{{ template \"chart.name\" . }}-admission-tls"
1010
),
11-
with(.webhooks[0].clientConfig.service;
11+
with(.webhooks[].clientConfig.service;
1212
.name = "{{ include \"chart.name\" . }}-admission",
1313
.namespace = "{{ .Release.Namespace }}"
1414
)

pkg/webhook/preflight/doc.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// Copyright 2025 Nutanix. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package preflight
4+
5+
// +kubebuilder:webhook:path=/preflight-v1beta1-cluster,mutating=false,failurePolicy=fail,groups="cluster.x-k8s.io",resources=clusters,verbs=create,versions=*,name=preflight.cluster.caren.nutanix.com,admissionReviewVersions=v1,sideEffects=None,timeoutSeconds=30

pkg/webhook/preflight/preflight.go

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
// Copyright 2025 Nutanix. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
package preflight
4+
5+
import (
6+
"context"
7+
"fmt"
8+
"net/http"
9+
"runtime/debug"
10+
"sync"
11+
12+
admissionv1 "k8s.io/api/admission/v1"
13+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
14+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
15+
ctrl "sigs.k8s.io/controller-runtime"
16+
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
17+
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
18+
)
19+
20+
type (
21+
// Checker returns a set of checks that have been initialized with common dependencies,
22+
// such as an infrastructure API client.
23+
Checker interface {
24+
// Init returns the checks that should run for the cluster.
25+
Init(ctx context.Context, client ctrlclient.Client, cluster *clusterv1.Cluster) []Check
26+
}
27+
28+
// Check represents a single preflight check that can be run against a cluster.
29+
// It has a Name method that returns the name of the check, and a Run method executes
30+
// the check, and returns a CheckResult.
31+
// The Name method is used to identify the check if Run fails to return a result, for
32+
// example if it panics.
33+
Check interface {
34+
Name() string
35+
Run(ctx context.Context) CheckResult
36+
}
37+
38+
// CheckResult represents the result of a check.
39+
// It contains the name of the check, a boolean indicating whether the check passed, an
40+
// error boolean indicating whether there was an internal error running the check, and a
41+
// list of causes for the failure. It also contains a list of warnings that were
42+
// generated during the check.
43+
CheckResult struct {
44+
Allowed bool
45+
Error bool
46+
47+
Causes []Cause
48+
Warnings []string
49+
}
50+
51+
// Cause represents a cause of a check failure. It contains a message and an optional
52+
// field that the cause relates to. The field is used to indicate which part of the
53+
// cluster configuration the cause relates to.
54+
Cause struct {
55+
Message string
56+
Field string
57+
}
58+
)
59+
60+
type WebhookHandler struct {
61+
client ctrlclient.Client
62+
decoder admission.Decoder
63+
checkers []Checker
64+
}
65+
66+
func New(client ctrlclient.Client, decoder admission.Decoder, checkers ...Checker) *WebhookHandler {
67+
h := &WebhookHandler{
68+
client: client,
69+
decoder: decoder,
70+
checkers: checkers,
71+
}
72+
return h
73+
}
74+
75+
type namedResult struct {
76+
Name string
77+
CheckResult
78+
}
79+
80+
func (h *WebhookHandler) Handle(ctx context.Context, req admission.Request) admission.Response {
81+
if req.Operation == admissionv1.Delete {
82+
return admission.Allowed("")
83+
}
84+
85+
cluster := &clusterv1.Cluster{}
86+
err := h.decoder.Decode(req, cluster)
87+
if err != nil {
88+
return admission.Errored(http.StatusBadRequest, err)
89+
}
90+
91+
// Checks run only for ClusterClass-based clusters.
92+
if cluster.Spec.Topology == nil {
93+
return admission.Allowed("")
94+
}
95+
96+
resultsOrderedByCheckerAndCheck := run(ctx, h.client, cluster, h.checkers)
97+
98+
// Summarize the results.
99+
resp := admission.Response{
100+
AdmissionResponse: admissionv1.AdmissionResponse{
101+
Allowed: true,
102+
Result: &metav1.Status{
103+
Details: &metav1.StatusDetails{},
104+
},
105+
},
106+
}
107+
internalError := false
108+
for _, results := range resultsOrderedByCheckerAndCheck {
109+
for _, result := range results {
110+
if result.Error {
111+
internalError = true
112+
}
113+
if !result.Allowed {
114+
resp.Allowed = false
115+
}
116+
for _, cause := range result.Causes {
117+
resp.Result.Details.Causes = append(resp.Result.Details.Causes, metav1.StatusCause{
118+
Type: metav1.CauseType(fmt.Sprintf("FailedPreflight%s", result.Name)),
119+
Message: cause.Message,
120+
Field: cause.Field,
121+
})
122+
}
123+
resp.Warnings = append(resp.Warnings, result.Warnings...)
124+
}
125+
}
126+
127+
switch {
128+
case internalError:
129+
// Internal errors take precedence over check failures.
130+
resp.Result.Message = "preflight checks failed due to an internal error"
131+
resp.Result.Code = http.StatusInternalServerError
132+
resp.Result.Reason = metav1.StatusReasonInternalError
133+
case !resp.Allowed:
134+
// Because the response is not allowed, preflights must have failed.
135+
resp.Result.Message = "preflight checks failed"
136+
resp.Result.Code = http.StatusUnprocessableEntity
137+
resp.Result.Reason = metav1.StatusReasonInvalid
138+
}
139+
140+
return resp
141+
}
142+
143+
// run runs all checks for the cluster, concurrently, and returns the results ordered by checker and check.
144+
// Checker are initialized concurrently, and checks runs concurrently as well.
145+
func run(ctx context.Context,
146+
client ctrlclient.Client,
147+
cluster *clusterv1.Cluster,
148+
checkers []Checker,
149+
) [][]namedResult {
150+
resultsOrderedByCheckerAndCheck := make([][]namedResult, len(checkers))
151+
152+
checkersWG := sync.WaitGroup{}
153+
for i, checker := range checkers {
154+
checkersWG.Add(1)
155+
go func(ctx context.Context, client ctrlclient.Client, cluster *clusterv1.Cluster, checker Checker, i int) {
156+
defer checkersWG.Done()
157+
158+
checks := checker.Init(ctx, client, cluster)
159+
resultsOrderedByCheck := make([]namedResult, len(checks))
160+
161+
checksWG := sync.WaitGroup{}
162+
for j, check := range checks {
163+
checksWG.Add(1)
164+
go func(ctx context.Context, check Check, j int) {
165+
defer checksWG.Done()
166+
defer func() {
167+
if r := recover(); r != nil {
168+
resultsOrderedByCheck[j] = namedResult{
169+
Name: check.Name(),
170+
CheckResult: CheckResult{
171+
Error: true,
172+
Causes: []Cause{
173+
{
174+
Message: fmt.Sprintf("internal error (panic): %s", r),
175+
Field: "",
176+
},
177+
},
178+
},
179+
}
180+
ctrl.LoggerFrom(ctx).Error(
181+
fmt.Errorf("preflight check panic"),
182+
fmt.Sprintf("%v", r),
183+
"checkName", check.Name(),
184+
"clusterName", cluster.Name,
185+
"clusterNamespace", cluster.Namespace,
186+
"stackTrace", string(debug.Stack()),
187+
)
188+
}
189+
}()
190+
result := check.Run(ctx)
191+
resultsOrderedByCheck[j] = namedResult{
192+
Name: check.Name(),
193+
CheckResult: result,
194+
}
195+
}(ctx, check, j)
196+
}
197+
checksWG.Wait()
198+
resultsOrderedByCheckerAndCheck[i] = resultsOrderedByCheck
199+
}(
200+
ctx,
201+
client,
202+
cluster,
203+
checker,
204+
i,
205+
)
206+
}
207+
checkersWG.Wait()
208+
209+
return resultsOrderedByCheckerAndCheck
210+
}

0 commit comments

Comments
 (0)