Skip to content

feat: Add preflight checks framework #1129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jun 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
7511f49
fix: Use helm variables for all webhook configurations
dlipovetsky May 19, 2025
a40fd92
feat: Add preflight checks framework
dlipovetsky May 20, 2025
24c9d45
fixup! feat: Add preflight checks framework
dlipovetsky May 20, 2025
858e6fd
fixup! feat: Add preflight checks framework
dlipovetsky May 20, 2025
2c69dd0
fixup! feat: Add preflight checks framework
dlipovetsky May 20, 2025
7e687a2
fixup! feat: Add preflight checks framework
dlipovetsky May 20, 2025
306a852
fixup! feat: Add preflight checks framework
dlipovetsky May 20, 2025
b3da7d4
fixup! feat: Add preflight checks framework
dlipovetsky May 21, 2025
690a89e
fixup! feat: Add preflight checks framework
dlipovetsky May 22, 2025
47c6ad8
fixup! feat: Add preflight checks framework
dlipovetsky May 21, 2025
3211e7a
fixup! feat: Add preflight checks framework
dlipovetsky May 22, 2025
5eb6d26
fixup! feat: Add preflight checks framework
dlipovetsky May 22, 2025
4a518b3
fixup! feat: Add preflight checks framework
dlipovetsky May 23, 2025
438495b
fixup! feat: Add preflight checks framework
dlipovetsky May 23, 2025
b377301
fixup! feat: Add preflight checks framework
dlipovetsky May 27, 2025
ec20c1e
fixup! feat: Add preflight checks framework
dlipovetsky May 30, 2025
d766f3a
fixup! feat: Add preflight checks framework
dlipovetsky May 30, 2025
14ea63d
fixup! feat: Add preflight checks framework
dlipovetsky Jun 3, 2025
a4078ad
fixup! feat: Add preflight checks framework
dlipovetsky Jun 12, 2025
10f132a
fixup! feat: Add preflight checks framework
dlipovetsky Jun 13, 2025
a067941
fixup! feat: Add preflight checks framework
dlipovetsky Jun 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,23 @@ webhooks:
resources:
- clusters
sideEffects: None
- admissionReviewVersions:
- v1
clientConfig:
service:
name: '{{ include "chart.name" . }}-admission'
namespace: '{{ .Release.Namespace }}'
path: /preflight-v1beta1-cluster
failurePolicy: Fail
name: preflight.cluster.caren.nutanix.com
rules:
- apiGroups:
- cluster.x-k8s.io
apiVersions:
- '*'
operations:
- CREATE
resources:
- clusters
sideEffects: None
timeoutSeconds: 30
8 changes: 8 additions & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/handlers/nutanix"
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/handlers/options"
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/webhook/cluster"
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/webhook/preflight"
)

func main() {
Expand Down Expand Up @@ -219,6 +220,13 @@ func main() {
Handler: cluster.NewValidator(mgr.GetClient(), admission.NewDecoder(mgr.GetScheme())),
})

mgr.GetWebhookServer().Register("/preflight-v1beta1-cluster", &webhook.Admission{
Handler: preflight.New(mgr.GetClient(), admission.NewDecoder(mgr.GetScheme()),
[]preflight.Checker{
// Add your preflight checkers here.
}...,
),
})
if err := mgr.Start(signalCtx); err != nil {
setupLog.Error(err, "unable to start controller manager")
os.Exit(1)
Expand Down
2 changes: 1 addition & 1 deletion hack/update-webhook-configurations.yq
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ with(.metadata;
.name = "{{ include \"chart.name\" . }}-" + .name,
.annotations["cert-manager.io/inject-ca-from"] = "{{ .Release.Namespace}}/{{ template \"chart.name\" . }}-admission-tls"
),
with(.webhooks[0].clientConfig.service;
with(.webhooks[].clientConfig.service;
.name = "{{ include \"chart.name\" . }}-admission",
.namespace = "{{ .Release.Namespace }}"
)
5 changes: 5 additions & 0 deletions pkg/webhook/preflight/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// Copyright 2025 Nutanix. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package preflight

// +kubebuilder:webhook:path=/preflight-v1beta1-cluster,mutating=false,failurePolicy=fail,groups="cluster.x-k8s.io",resources=clusters,verbs=create,versions=*,name=preflight.cluster.caren.nutanix.com,admissionReviewVersions=v1,sideEffects=None,timeoutSeconds=30
210 changes: 210 additions & 0 deletions pkg/webhook/preflight/preflight.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
// Copyright 2025 Nutanix. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package preflight

import (
"context"
"fmt"
"net/http"
"runtime/debug"
"sync"

admissionv1 "k8s.io/api/admission/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
ctrl "sigs.k8s.io/controller-runtime"
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
)

type (
// Checker returns a set of checks that have been initialized with common dependencies,
// such as an infrastructure API client.
Checker interface {
// Init returns the checks that should run for the cluster.
Init(ctx context.Context, client ctrlclient.Client, cluster *clusterv1.Cluster) []Check
}

// Check represents a single preflight check that can be run against a cluster.
// It has a Name method that returns the name of the check, and a Run method executes
// the check, and returns a CheckResult.
// The Name method is used to identify the check if Run fails to return a result, for
// example if it panics.
Check interface {
Name() string
Run(ctx context.Context) CheckResult
}

// CheckResult represents the result of a check.
// It contains the name of the check, a boolean indicating whether the check passed, an
// error boolean indicating whether there was an internal error running the check, and a
// list of causes for the failure. It also contains a list of warnings that were
// generated during the check.
CheckResult struct {
Allowed bool
Error bool

Causes []Cause
Warnings []string
}

// Cause represents a cause of a check failure. It contains a message and an optional
// field that the cause relates to. The field is used to indicate which part of the
// cluster configuration the cause relates to.
Cause struct {
Message string
Field string
}
)

type WebhookHandler struct {
client ctrlclient.Client
decoder admission.Decoder
checkers []Checker
}

func New(client ctrlclient.Client, decoder admission.Decoder, checkers ...Checker) *WebhookHandler {
h := &WebhookHandler{
client: client,
decoder: decoder,
checkers: checkers,
}
return h
}

type namedResult struct {
Name string
CheckResult
}

func (h *WebhookHandler) Handle(ctx context.Context, req admission.Request) admission.Response {
if req.Operation == admissionv1.Delete {
return admission.Allowed("")
}

cluster := &clusterv1.Cluster{}
err := h.decoder.Decode(req, cluster)
if err != nil {
return admission.Errored(http.StatusBadRequest, err)
}

// Checks run only for ClusterClass-based clusters.
if cluster.Spec.Topology == nil {
return admission.Allowed("")
}

resultsOrderedByCheckerAndCheck := run(ctx, h.client, cluster, h.checkers)

// Summarize the results.
resp := admission.Response{
AdmissionResponse: admissionv1.AdmissionResponse{
Allowed: true,
Result: &metav1.Status{
Details: &metav1.StatusDetails{},
},
},
}
internalError := false
for _, results := range resultsOrderedByCheckerAndCheck {
for _, result := range results {
if result.Error {
internalError = true
}
if !result.Allowed {
resp.Allowed = false
}
for _, cause := range result.Causes {
resp.Result.Details.Causes = append(resp.Result.Details.Causes, metav1.StatusCause{
Type: metav1.CauseType(fmt.Sprintf("FailedPreflight%s", result.Name)),
Message: cause.Message,
Field: cause.Field,
})
}
resp.Warnings = append(resp.Warnings, result.Warnings...)
}
}

switch {
case internalError:
// Internal errors take precedence over check failures.
resp.Result.Message = "preflight checks failed due to an internal error"
resp.Result.Code = http.StatusInternalServerError
resp.Result.Reason = metav1.StatusReasonInternalError
case !resp.Allowed:
// Because the response is not allowed, preflights must have failed.
resp.Result.Message = "preflight checks failed"
resp.Result.Code = http.StatusUnprocessableEntity
resp.Result.Reason = metav1.StatusReasonInvalid
}

return resp
}

// run runs all checks for the cluster, concurrently, and returns the results ordered by checker and check.
// Checker are initialized concurrently, and checks runs concurrently as well.
func run(ctx context.Context,
client ctrlclient.Client,
cluster *clusterv1.Cluster,
checkers []Checker,
) [][]namedResult {
resultsOrderedByCheckerAndCheck := make([][]namedResult, len(checkers))

checkersWG := sync.WaitGroup{}
for i, checker := range checkers {
checkersWG.Add(1)
go func(ctx context.Context, client ctrlclient.Client, cluster *clusterv1.Cluster, checker Checker, i int) {
defer checkersWG.Done()

checks := checker.Init(ctx, client, cluster)
resultsOrderedByCheck := make([]namedResult, len(checks))

checksWG := sync.WaitGroup{}
for j, check := range checks {
checksWG.Add(1)
go func(ctx context.Context, check Check, j int) {
defer checksWG.Done()
defer func() {
if r := recover(); r != nil {
resultsOrderedByCheck[j] = namedResult{
Name: check.Name(),
CheckResult: CheckResult{
Error: true,
Causes: []Cause{
{
Message: fmt.Sprintf("internal error (panic): %s", r),
Field: "",
},
},
},
}
ctrl.LoggerFrom(ctx).Error(
fmt.Errorf("preflight check panic"),
fmt.Sprintf("%v", r),
"checkName", check.Name(),
"clusterName", cluster.Name,
"clusterNamespace", cluster.Namespace,
"stackTrace", string(debug.Stack()),
)
}
}()
result := check.Run(ctx)
resultsOrderedByCheck[j] = namedResult{
Name: check.Name(),
CheckResult: result,
}
}(ctx, check, j)
}
checksWG.Wait()
resultsOrderedByCheckerAndCheck[i] = resultsOrderedByCheck
}(
ctx,
client,
cluster,
checker,
i,
)
}
checkersWG.Wait()

return resultsOrderedByCheckerAndCheck
}
Loading
Loading