@@ -20,13 +20,15 @@ import (
20
20
"context"
21
21
"encoding/json"
22
22
"fmt"
23
+ "sort"
23
24
"time"
24
25
25
26
"github.com/go-logr/logr"
26
27
"github.com/pkg/errors"
27
28
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
28
29
kerrors "k8s.io/apimachinery/pkg/util/errors"
29
30
"k8s.io/klog/v2"
31
+ "k8s.io/utils/ptr"
30
32
ctrl "sigs.k8s.io/controller-runtime"
31
33
32
34
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
@@ -101,7 +103,10 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
101
103
// NOTE: The current solution is considered acceptable for the most frequent use case (only one machine to be remediated),
102
104
// however, in the future this could potentially be improved for the scenario where more than one machine to be remediated exists
103
105
// by considering which machine has lower impact on etcd quorum.
104
- machineToBeRemediated := getMachineToBeRemediated (machinesToBeRemediated )
106
+ machineToBeRemediated := getMachineToBeRemediated (machinesToBeRemediated , controlPlane .IsEtcdManaged ())
107
+ if machineToBeRemediated == nil {
108
+ return ctrl.Result {}, errors .New ("failed to find a Machine to remediate within unhealthy Machines" )
109
+ }
105
110
106
111
// Returns if the machine is in the process of being deleted.
107
112
if ! machineToBeRemediated .ObjectMeta .DeletionTimestamp .IsZero () {
@@ -339,14 +344,88 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
339
344
return ctrl.Result {Requeue : true }, nil
340
345
}
341
346
342
- // Gets the machine to be remediated, which is the oldest machine marked as unhealthy not yet provisioned (if any)
343
- // or the oldest machine marked as unhealthy.
344
- func getMachineToBeRemediated (unhealthyMachines collections.Machines ) * clusterv1.Machine {
345
- machineToBeRemediated := unhealthyMachines .Filter (collections .Not (collections .HasNode ())).Oldest ()
346
- if machineToBeRemediated == nil {
347
- machineToBeRemediated = unhealthyMachines .Oldest ()
347
+ // Gets the machine to be remediated, which is the "most broken" among the unhealthy machines, determined as the machine
348
+ // having the highest priority issue that other machines have not.
349
+ // The following issues are considered (from highest to lowest priority):
350
+ // - machine without .status.nodeRef
351
+ // - machine with etcd issue or etcd status unknown (etcd member, etcd pod)
352
+ // - machine with control plane component issue or status unknown (API server, controller manager, scheduler)
353
+ //
354
+ // Note: In case of more than one faulty machine the chance to recover mostly depends on the control plane being able to
355
+ // successfully create a replacement Machine, because due to scale up preflight checks, this cannot happen if there are
356
+ // still issues on the control plane after the first remediation.
357
+ // This func tries to maximize those chances of a successful remediation by picking for remediation the "most broken" machine first.
358
+ func getMachineToBeRemediated (unhealthyMachines collections.Machines , isEtcdManaged bool ) * clusterv1.Machine {
359
+ if unhealthyMachines .Len () == 0 {
360
+ return nil
361
+ }
362
+
363
+ machinesToBeRemediated := unhealthyMachines .UnsortedList ()
364
+ if len (machinesToBeRemediated ) == 1 {
365
+ return machinesToBeRemediated [0 ]
366
+ }
367
+
368
+ sort .Slice (machinesToBeRemediated , func (i , j int ) bool {
369
+ return pickMachineToBeRemediated (machinesToBeRemediated [i ], machinesToBeRemediated [j ], isEtcdManaged )
370
+ })
371
+ return machinesToBeRemediated [0 ]
372
+ }
373
+
374
+ // pickMachineToBeRemediated returns true if machine i should be remediated before machine j.
375
+ func pickMachineToBeRemediated (i , j * clusterv1.Machine , isEtcdManaged bool ) bool {
376
+ // if one machine does not have a node ref, we assume that provisioning failed and there is no CP components at all,
377
+ // so remediate first; also without a node, it is not possible to get further info about status.
378
+ if i .Status .NodeRef == nil && j .Status .NodeRef != nil {
379
+ return true
380
+ }
381
+ if i .Status .NodeRef != nil && j .Status .NodeRef == nil {
382
+ return false
383
+ }
384
+
385
+ // if one machine has unhealthy etcd member or pod, remediate first.
386
+ if isEtcdManaged {
387
+ if p := pickMachineToBeRemediatedByConditionState (i , j , controlplanev1 .MachineEtcdMemberHealthyCondition ); p != nil {
388
+ return * p
389
+ }
390
+ if p := pickMachineToBeRemediatedByConditionState (i , j , controlplanev1 .MachineEtcdPodHealthyCondition ); p != nil {
391
+ return * p
392
+ }
393
+
394
+ // Note: in the future we might consider etcd leadership and kubelet status to prevent being stuck when it is not possible
395
+ // to forward leadership, but this requires further investigation and most probably also to surface a few additional info in the controlPlane object.
396
+ }
397
+
398
+ // if one machine has unhealthy control plane component, remediate first.
399
+ if p := pickMachineToBeRemediatedByConditionState (i , j , controlplanev1 .MachineAPIServerPodHealthyCondition ); p != nil {
400
+ return * p
401
+ }
402
+ if p := pickMachineToBeRemediatedByConditionState (i , j , controlplanev1 .MachineControllerManagerPodHealthyCondition ); p != nil {
403
+ return * p
404
+ }
405
+ if p := pickMachineToBeRemediatedByConditionState (i , j , controlplanev1 .MachineSchedulerPodHealthyCondition ); p != nil {
406
+ return * p
407
+ }
408
+
409
+ // Use oldest (and Name) as a tie-breaker criteria.
410
+ if i .CreationTimestamp .Equal (& j .CreationTimestamp ) {
411
+ return i .Name < j .Name
412
+ }
413
+ return i .CreationTimestamp .Before (& j .CreationTimestamp )
414
+ }
415
+
416
+ // pickMachineToBeRemediatedByConditionState returns true if condition t report issue on machine i and not on machine j,
417
+ // false if the vice-versa apply, or nil if condition t doesn't provide a discriminating criteria for picking one machine or another for remediation.
418
+ func pickMachineToBeRemediatedByConditionState (i , j * clusterv1.Machine , t clusterv1.ConditionType ) * bool {
419
+ iCondition := conditions .IsTrue (i , t )
420
+ jCondition := conditions .IsTrue (j , t )
421
+
422
+ if ! iCondition && jCondition {
423
+ return ptr .To (true )
424
+ }
425
+ if iCondition && ! jCondition {
426
+ return ptr .To (false )
348
427
}
349
- return machineToBeRemediated
428
+ return nil
350
429
}
351
430
352
431
// checkRetryLimits checks if KCP is allowed to remediate considering retry limits:
0 commit comments