Skip to content

Commit c739d0a

Browse files
authored
Merge pull request #11432 from fabriziopandini/refine-v1beta2-scalingup-conditions
🌱 Refine v1beta2 ScalingUp conditions
2 parents b4a02a6 + 074211a commit c739d0a

File tree

8 files changed

+201
-150
lines changed

8 files changed

+201
-150
lines changed

controlplane/kubeadm/internal/controllers/status.go

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ func setScalingUpCondition(_ context.Context, kcp *controlplanev1.KubeadmControl
230230
if currentReplicas >= desiredReplicas {
231231
var message string
232232
if missingReferencesMessage != "" {
233-
message = fmt.Sprintf("Scaling up would be blocked %s", missingReferencesMessage)
233+
message = fmt.Sprintf("Scaling up would be blocked because %s", missingReferencesMessage)
234234
}
235235
v1beta2conditions.Set(kcp, metav1.Condition{
236236
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
@@ -242,28 +242,21 @@ func setScalingUpCondition(_ context.Context, kcp *controlplanev1.KubeadmControl
242242
}
243243

244244
message := fmt.Sprintf("Scaling up from %d to %d replicas", currentReplicas, desiredReplicas)
245-
if missingReferencesMessage != "" {
246-
message = fmt.Sprintf("%s is blocked %s", message, missingReferencesMessage)
247-
}
248-
messages := []string{message}
249-
250-
if preflightChecks.HasDeletingMachine {
251-
messages = append(messages, "waiting for Machine being deleted")
252-
}
253245

254-
if preflightChecks.ControlPlaneComponentsNotHealthy {
255-
messages = append(messages, "waiting for control plane components to be healthy")
246+
additionalMessages := getPreflightMessages(preflightChecks)
247+
if missingReferencesMessage != "" {
248+
additionalMessages = append(additionalMessages, fmt.Sprintf("* %s", missingReferencesMessage))
256249
}
257250

258-
if preflightChecks.EtcdClusterNotHealthy {
259-
messages = append(messages, "waiting for etcd cluster to be healthy")
251+
if len(additionalMessages) > 0 {
252+
message += fmt.Sprintf(" is blocked because:\n%s", strings.Join(additionalMessages, "\n"))
260253
}
261254

262255
v1beta2conditions.Set(kcp, metav1.Condition{
263256
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
264257
Status: metav1.ConditionTrue,
265258
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
266-
Message: strings.Join(messages, "; "),
259+
Message: message,
267260
})
268261
}
269262

@@ -292,28 +285,22 @@ func setScalingDownCondition(_ context.Context, kcp *controlplanev1.KubeadmContr
292285
return
293286
}
294287

295-
messages := []string{fmt.Sprintf("Scaling down from %d to %d replicas", currentReplicas, desiredReplicas)}
296-
if preflightChecks.HasDeletingMachine {
297-
messages = append(messages, "waiting for Machine being deleted")
298-
}
288+
message := fmt.Sprintf("Scaling down from %d to %d replicas", currentReplicas, desiredReplicas)
299289

290+
additionalMessages := getPreflightMessages(preflightChecks)
300291
if staleMessage := aggregateStaleMachines(machines); staleMessage != "" {
301-
messages = append(messages, staleMessage)
302-
}
303-
304-
if preflightChecks.ControlPlaneComponentsNotHealthy {
305-
messages = append(messages, "waiting for control plane components to be healthy")
292+
additionalMessages = append(additionalMessages, fmt.Sprintf("* %s", staleMessage))
306293
}
307294

308-
if preflightChecks.EtcdClusterNotHealthy {
309-
messages = append(messages, "waiting for etcd cluster to be healthy")
295+
if len(additionalMessages) > 0 {
296+
message += fmt.Sprintf(" is blocked because:\n%s", strings.Join(additionalMessages, "\n"))
310297
}
311298

312299
v1beta2conditions.Set(kcp, metav1.Condition{
313300
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
314301
Status: metav1.ConditionTrue,
315302
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
316-
Message: strings.Join(messages, "; "),
303+
Message: message,
317304
})
318305
}
319306

@@ -406,7 +393,7 @@ func setMachinesUpToDateCondition(ctx context.Context, kcp *controlplanev1.Kubea
406393

407394
func calculateMissingReferencesMessage(kcp *controlplanev1.KubeadmControlPlane, infraMachineTemplateNotFound bool) string {
408395
if infraMachineTemplateNotFound {
409-
return fmt.Sprintf("because %s does not exist", kcp.Spec.MachineTemplate.InfrastructureRef.Kind)
396+
return fmt.Sprintf("%s does not exist", kcp.Spec.MachineTemplate.InfrastructureRef.Kind)
410397
}
411398
return ""
412399
}
@@ -691,6 +678,22 @@ func minTime(t1, t2 time.Time) time.Time {
691678
return t1
692679
}
693680

681+
func getPreflightMessages(preflightChecks internal.PreflightCheckResults) []string {
682+
additionalMessages := []string{}
683+
if preflightChecks.HasDeletingMachine {
684+
additionalMessages = append(additionalMessages, "* waiting for a control plane Machine to complete deletion")
685+
}
686+
687+
if preflightChecks.ControlPlaneComponentsNotHealthy {
688+
additionalMessages = append(additionalMessages, "* waiting for control plane components to be healthy")
689+
}
690+
691+
if preflightChecks.EtcdClusterNotHealthy {
692+
additionalMessages = append(additionalMessages, "* waiting for etcd cluster to be healthy")
693+
}
694+
return additionalMessages
695+
}
696+
694697
func aggregateStaleMachines(machines collections.Machines) string {
695698
if len(machines) == 0 {
696699
return ""

controlplane/kubeadm/internal/controllers/status_test.go

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,11 @@ func Test_setScalingUpCondition(t *testing.T) {
233233
InfraMachineTemplateIsNotFound: true,
234234
},
235235
expectCondition: metav1.Condition{
236-
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
237-
Status: metav1.ConditionTrue,
238-
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
239-
Message: "Scaling up from 3 to 5 replicas is blocked because AWSTemplate does not exist",
236+
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
237+
Status: metav1.ConditionTrue,
238+
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
239+
Message: "Scaling up from 3 to 5 replicas is blocked because:\n" +
240+
"* AWSTemplate does not exist",
240241
},
241242
},
242243
{
@@ -258,10 +259,13 @@ func Test_setScalingUpCondition(t *testing.T) {
258259
},
259260
},
260261
expectCondition: metav1.Condition{
261-
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
262-
Status: metav1.ConditionTrue,
263-
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
264-
Message: "Scaling up from 3 to 5 replicas; waiting for Machine being deleted; waiting for control plane components to be healthy; waiting for etcd cluster to be healthy",
262+
Type: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition,
263+
Status: metav1.ConditionTrue,
264+
Reason: controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Reason,
265+
Message: "Scaling up from 3 to 5 replicas is blocked because:\n" +
266+
"* waiting for a control plane Machine to complete deletion\n" +
267+
"* waiting for control plane components to be healthy\n" +
268+
"* waiting for etcd cluster to be healthy",
265269
},
266270
},
267271
}
@@ -373,10 +377,11 @@ func Test_setScalingDownCondition(t *testing.T) {
373377
),
374378
},
375379
expectCondition: metav1.Condition{
376-
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
377-
Status: metav1.ConditionTrue,
378-
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
379-
Message: "Scaling down from 3 to 1 replicas; Machine m1 is in deletion since more than 30m",
380+
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
381+
Status: metav1.ConditionTrue,
382+
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
383+
Message: "Scaling down from 3 to 1 replicas is blocked because:\n" +
384+
"* Machine m1 is in deletion since more than 30m",
380385
},
381386
},
382387
{
@@ -393,10 +398,11 @@ func Test_setScalingDownCondition(t *testing.T) {
393398
),
394399
},
395400
expectCondition: metav1.Condition{
396-
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
397-
Status: metav1.ConditionTrue,
398-
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
399-
Message: "Scaling down from 3 to 1 replicas; Machines m1, m2 are in deletion since more than 30m",
401+
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
402+
Status: metav1.ConditionTrue,
403+
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
404+
Message: "Scaling down from 3 to 1 replicas is blocked because:\n" +
405+
"* Machines m1, m2 are in deletion since more than 30m",
400406
},
401407
},
402408
{
@@ -418,10 +424,13 @@ func Test_setScalingDownCondition(t *testing.T) {
418424
},
419425
},
420426
expectCondition: metav1.Condition{
421-
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
422-
Status: metav1.ConditionTrue,
423-
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
424-
Message: "Scaling down from 3 to 1 replicas; waiting for Machine being deleted; waiting for control plane components to be healthy; waiting for etcd cluster to be healthy",
427+
Type: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition,
428+
Status: metav1.ConditionTrue,
429+
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
430+
Message: "Scaling down from 3 to 1 replicas is blocked because:\n" +
431+
"* waiting for a control plane Machine to complete deletion\n" +
432+
"* waiting for control plane components to be healthy\n" +
433+
"* waiting for etcd cluster to be healthy",
425434
},
426435
},
427436
}

internal/controllers/machineset/machineset_controller.go

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ type scope struct {
272272
infrastructureObjectNotFound bool
273273
getAndAdoptMachinesForMachineSetSucceeded bool
274274
owningMachineDeployment *clusterv1.MachineDeployment
275-
scaleUpPreflightCheckErrMessage string
275+
scaleUpPreflightCheckErrMessages []string
276276
reconciliationTime time.Time
277277
}
278278

@@ -673,15 +673,19 @@ func (r *Reconciler) syncReplicas(ctx context.Context, s *scope) (ctrl.Result, e
673673
}
674674
}
675675

676-
result, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, cluster, ms, "Scale up")
677-
if err != nil || !result.IsZero() {
676+
preflightCheckErrMessages, err := r.runPreflightChecks(ctx, cluster, ms, "Scale up")
677+
if err != nil || len(preflightCheckErrMessages) > 0 {
678678
if err != nil {
679-
// If the error is not nil use that as the message for the condition.
680-
preflightCheckErrMessage = err.Error()
679+
// If err is not nil use that as the preflightCheckErrMessage
680+
preflightCheckErrMessages = append(preflightCheckErrMessages, err.Error())
681681
}
682-
s.scaleUpPreflightCheckErrMessage = preflightCheckErrMessage
683-
conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.PreflightCheckFailedReason, clusterv1.ConditionSeverityError, preflightCheckErrMessage)
684-
return result, err
682+
683+
s.scaleUpPreflightCheckErrMessages = preflightCheckErrMessages
684+
conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.PreflightCheckFailedReason, clusterv1.ConditionSeverityError, strings.Join(preflightCheckErrMessages, "; "))
685+
if err != nil {
686+
return ctrl.Result{}, err
687+
}
688+
return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, nil
685689
}
686690

687691
var (
@@ -1418,31 +1422,39 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope) (
14181422
}
14191423

14201424
// Run preflight checks.
1421-
preflightChecksResult, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, cluster, ms, "Machine remediation")
1422-
if err != nil {
1423-
// If err is not nil use that as the preflightCheckErrMessage
1424-
preflightCheckErrMessage = err.Error()
1425-
}
1425+
preflightCheckErrMessages, err := r.runPreflightChecks(ctx, cluster, ms, "Machine remediation")
1426+
if err != nil || len(preflightCheckErrMessages) > 0 {
1427+
if err != nil {
1428+
// If err is not nil use that as the preflightCheckErrMessage
1429+
preflightCheckErrMessages = append(preflightCheckErrMessages, err.Error())
1430+
}
1431+
1432+
listMessages := make([]string, len(preflightCheckErrMessages))
1433+
for i, msg := range preflightCheckErrMessages {
1434+
listMessages[i] = fmt.Sprintf("* %s", msg)
1435+
}
14261436

1427-
preflightChecksFailed := err != nil || !preflightChecksResult.IsZero()
1428-
if preflightChecksFailed {
14291437
// PreflightChecks did not pass. Update the MachineOwnerRemediated condition on the unhealthy Machines with
14301438
// WaitingForRemediationReason reason.
1431-
if err := patchMachineConditions(ctx, r.Client, machinesToRemediate, metav1.Condition{
1439+
if patchErr := patchMachineConditions(ctx, r.Client, machinesToRemediate, metav1.Condition{
14321440
Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition,
14331441
Status: metav1.ConditionFalse,
14341442
Reason: clusterv1.MachineSetMachineRemediationDeferredV1Beta2Reason,
1435-
Message: preflightCheckErrMessage,
1443+
Message: strings.Join(listMessages, "\n"),
14361444
}, &clusterv1.Condition{
14371445
Type: clusterv1.MachineOwnerRemediatedCondition,
14381446
Status: corev1.ConditionFalse,
14391447
Reason: clusterv1.WaitingForRemediationReason,
14401448
Severity: clusterv1.ConditionSeverityWarning,
1441-
Message: preflightCheckErrMessage,
1442-
}); err != nil {
1449+
Message: strings.Join(preflightCheckErrMessages, "; "),
1450+
}); patchErr != nil {
1451+
return ctrl.Result{}, kerrors.NewAggregate([]error{err, patchErr})
1452+
}
1453+
1454+
if err != nil {
14431455
return ctrl.Result{}, err
14441456
}
1445-
return preflightChecksResult, nil
1457+
return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, nil
14461458
}
14471459

14481460
// PreflightChecks passed, so it is safe to remediate unhealthy machines by deleting them.

internal/controllers/machineset/machineset_controller_status.go

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package machineset
1919
import (
2020
"context"
2121
"fmt"
22-
"slices"
2322
"sort"
2423
"strings"
2524
"time"
@@ -48,7 +47,7 @@ func (r *Reconciler) updateStatus(ctx context.Context, s *scope) {
4847
// Conditions
4948

5049
// Update the ScalingUp and ScalingDown condition.
51-
setScalingUpCondition(ctx, s.machineSet, s.machines, s.bootstrapObjectNotFound, s.infrastructureObjectNotFound, s.getAndAdoptMachinesForMachineSetSucceeded, s.scaleUpPreflightCheckErrMessage)
50+
setScalingUpCondition(ctx, s.machineSet, s.machines, s.bootstrapObjectNotFound, s.infrastructureObjectNotFound, s.getAndAdoptMachinesForMachineSetSucceeded, s.scaleUpPreflightCheckErrMessages)
5251
setScalingDownCondition(ctx, s.machineSet, s.machines, s.getAndAdoptMachinesForMachineSetSucceeded)
5352

5453
// MachinesReady condition: aggregate the Machine's Ready condition.
@@ -93,7 +92,7 @@ func setReplicas(_ context.Context, ms *clusterv1.MachineSet, machines []*cluste
9392
ms.Status.V1Beta2.UpToDateReplicas = ptr.To(upToDateReplicas)
9493
}
9594

96-
func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines []*clusterv1.Machine, bootstrapObjectNotFound, infrastructureObjectNotFound, getAndAdoptMachinesForMachineSetSucceeded bool, scaleUpPreflightCheckErrMessage string) {
95+
func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines []*clusterv1.Machine, bootstrapObjectNotFound, infrastructureObjectNotFound, getAndAdoptMachinesForMachineSetSucceeded bool, scaleUpPreflightCheckErrMessages []string) {
9796
// If we got unexpected errors in listing the machines (this should never happen), surface them.
9897
if !getAndAdoptMachinesForMachineSetSucceeded {
9998
v1beta2conditions.Set(ms, metav1.Condition{
@@ -140,11 +139,15 @@ func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines
140139

141140
// Scaling up.
142141
message := fmt.Sprintf("Scaling up from %d to %d replicas", currentReplicas, desiredReplicas)
143-
if missingReferencesMessage != "" || scaleUpPreflightCheckErrMessage != "" {
144-
blockMessages := slices.DeleteFunc([]string{missingReferencesMessage, scaleUpPreflightCheckErrMessage}, func(s string) bool {
145-
return s == ""
146-
})
147-
message += fmt.Sprintf(" is blocked because %s", strings.Join(blockMessages, " and "))
142+
if missingReferencesMessage != "" || len(scaleUpPreflightCheckErrMessages) > 0 {
143+
listMessages := make([]string, len(scaleUpPreflightCheckErrMessages))
144+
for i, msg := range scaleUpPreflightCheckErrMessages {
145+
listMessages[i] = fmt.Sprintf("* %s", msg)
146+
}
147+
if missingReferencesMessage != "" {
148+
listMessages = append(listMessages, fmt.Sprintf("* %s", missingReferencesMessage))
149+
}
150+
message += fmt.Sprintf(" is blocked because:\n%s", strings.Join(listMessages, "\n"))
148151
}
149152
v1beta2conditions.Set(ms, metav1.Condition{
150153
Type: clusterv1.MachineSetScalingUpV1Beta2Condition,

0 commit comments

Comments
 (0)