Skip to content

Commit a4078ad

Browse files
committed
fixup! feat: Add preflight checks framework
Recover from and log panic in check
1 parent 14ea63d commit a4078ad

File tree

2 files changed

+382
-196
lines changed

2 files changed

+382
-196
lines changed

pkg/webhook/preflight/preflight.go

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,43 @@ import (
66
"context"
77
"fmt"
88
"net/http"
9+
"runtime/debug"
910
"sync"
1011

1112
admissionv1 "k8s.io/api/admission/v1"
1213
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1314
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
15+
ctrl "sigs.k8s.io/controller-runtime"
1416
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
1517
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
1618
)
1719

1820
type (
1921
// CheckerFactory returns a Checker for a given cluster.
2022
CheckerFactory func(client ctrlclient.Client, cluster *clusterv1.Cluster) Checker
21-
Checker interface {
23+
24+
// Checker returns a set of checks that have been initialized with common dependencies,
25+
// such as an infrastructure API client.
26+
Checker interface {
2227
// Init returns the checks that should run for the cluster.
2328
Init(ctx context.Context) []Check
2429
}
2530

26-
Check = func(ctx context.Context) CheckResult
31+
// Check represents a single preflight check that can be run against a cluster.
32+
// It has a Name method that returns the name of the check, and a Run method executes
33+
// the check, and returns a CheckResult.
34+
// The Name method is used to identify the check if Run fails to return a result, for
35+
// example if it panics.
36+
Check interface {
37+
Name() string
38+
Run(ctx context.Context) CheckResult
39+
}
40+
41+
// CheckResult represents the result of a check.
42+
// It contains the name of the check, a boolean indicating whether the check passed, an
43+
// error boolean indicating whether there was an internal error running the check, and a
44+
// list of causes for the failure. It also contains a list of warnings that were
45+
// generated during the check.
2746
CheckResult struct {
2847
Name string
2948

@@ -33,6 +52,10 @@ type (
3352
Causes []Cause
3453
Warnings []string
3554
}
55+
56+
// Cause represents a cause of a check failure. It contains a message and an optional
57+
// field that the cause relates to. The field is used to indicate which part of the
58+
// cluster configuration the cause relates to.
3659
Cause struct {
3760
Message string
3861
Field string
@@ -130,6 +153,7 @@ func run(ctx context.Context,
130153
for i, factory := range factories {
131154
checkersWG.Add(1)
132155
go func(ctx context.Context, client ctrlclient.Client, cluster *clusterv1.Cluster, factory CheckerFactory, i int) {
156+
defer checkersWG.Done()
133157
checker := factory(client, cluster)
134158

135159
checks := checker.Init(ctx)
@@ -139,15 +163,35 @@ func run(ctx context.Context,
139163
for j, check := range checks {
140164
checksWG.Add(1)
141165
go func(ctx context.Context, check Check, j int) {
142-
result := check(ctx)
166+
defer checksWG.Done()
167+
defer func() {
168+
if r := recover(); r != nil {
169+
resultsOrderedByCheck[j] = CheckResult{
170+
Name: check.Name(),
171+
Error: true,
172+
Causes: []Cause{
173+
{
174+
Message: fmt.Sprintf("internal error (panic): %s", r),
175+
Field: "",
176+
},
177+
},
178+
}
179+
ctrl.LoggerFrom(ctx).Error(
180+
fmt.Errorf("preflight check panic"),
181+
fmt.Sprintf("%v", r),
182+
"checkName", check.Name(),
183+
"clusterName", cluster.Name,
184+
"clusterNamespace", cluster.Namespace,
185+
"stackTrace", string(debug.Stack()),
186+
)
187+
}
188+
}()
189+
result := check.Run(ctx)
143190
resultsOrderedByCheck[j] = result
144-
checksWG.Done()
145191
}(ctx, check, j)
146192
}
147193
checksWG.Wait()
148194
resultsOrderedByCheckerAndCheck[i] = resultsOrderedByCheck
149-
150-
checkersWG.Done()
151195
}(
152196
ctx,
153197
client,

0 commit comments

Comments
 (0)