Skip to content

Commit 8326ede

Browse files
authored
🌱 Implement KCP and MS Deleting conditions (#11381)
* Implement KCP and MS Deleting conditions Signed-off-by: Stefan Büringer buringerst@vmware.com * Fix review findings * Fix review findings --------- Signed-off-by: Stefan Büringer buringerst@vmware.com
1 parent 931758e commit 8326ede

File tree

12 files changed

+395
-43
lines changed

12 files changed

+395
-43
lines changed

api/v1beta1/machineset_types.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,23 @@ const (
157157
const (
158158
// MachineSetRemediatingV1Beta2Condition surfaces details about ongoing remediation of the controlled machines, if any.
159159
MachineSetRemediatingV1Beta2Condition = RemediatingV1Beta2Condition
160+
)
160161

162+
// MachineSet's Deleting condition and corresponding reasons that will be used in v1Beta2 API version.
163+
const (
161164
// MachineSetDeletingV1Beta2Condition surfaces details about ongoing deletion of the controlled machines.
162165
MachineSetDeletingV1Beta2Condition = DeletingV1Beta2Condition
166+
167+
// MachineSetDeletingDeletionTimestampNotSetV1Beta2Reason surfaces when the MachineSet is not deleting because the
168+
// DeletionTimestamp is not set.
169+
MachineSetDeletingDeletionTimestampNotSetV1Beta2Reason = DeletionTimestampNotSetV1Beta2Reason
170+
171+
// MachineSetDeletingDeletionTimestampSetV1Beta2Reason surfaces when the MachineSet is deleting because the
172+
// DeletionTimestamp is set.
173+
MachineSetDeletingDeletionTimestampSetV1Beta2Reason = DeletionTimestampSetV1Beta2Reason
174+
175+
// MachineSetDeletingInternalErrorV1Beta2Reason surfaces unexpected failures when deleting a MachineSet.
176+
MachineSetDeletingInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason
163177
)
164178

165179
// ANCHOR_END: MachineSetSpec

controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -213,12 +213,27 @@ const (
213213
const (
214214
// KubeadmControlPlaneDeletingV1Beta2Condition surfaces details about ongoing deletion of the controlled machines.
215215
KubeadmControlPlaneDeletingV1Beta2Condition = clusterv1.DeletingV1Beta2Condition
216-
)
217216

218-
// KubeadmControlPlane's Paused condition and corresponding reasons that will be used in v1Beta2 API version.
219-
const (
220-
// KubeadmControlPlanePausedV1Beta2Condition is true if this resource or the Cluster it belongs to are paused.
221-
KubeadmControlPlanePausedV1Beta2Condition = clusterv1.PausedV1Beta2Condition
217+
// KubeadmControlPlaneDeletingDeletionTimestampNotSetV1Beta2Reason surfaces when the KCP is not deleting because the
218+
// DeletionTimestamp is not set.
219+
KubeadmControlPlaneDeletingDeletionTimestampNotSetV1Beta2Reason = clusterv1.DeletionTimestampNotSetV1Beta2Reason
220+
221+
// KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason surfaces when the KCP deletion
222+
// waits for the workers to be deleted.
223+
KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason = "WaitingForWorkersDeletion"
224+
225+
// KubeadmControlPlaneDeletingWaitingForMachineDeletionV1Beta2Reason surfaces when the KCP deletion
226+
// waits for the control plane Machines to be deleted.
227+
KubeadmControlPlaneDeletingWaitingForMachineDeletionV1Beta2Reason = "WaitingForMachineDeletion"
228+
229+
// KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason surfaces when the KCP deletion has been completed.
230+
// This reason is set right after the `kubeadm.controlplane.cluster.x-k8s.io` finalizer is removed.
231+
// This means that the object will go away (i.e. be removed from etcd), except if there are other
232+
// finalizers on the KCP object.
233+
KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason = clusterv1.DeletionCompletedV1Beta2Reason
234+
235+
// KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason surfaces unexpected failures when deleting a KCP object.
236+
KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason = clusterv1.InternalErrorV1Beta2Reason
222237
)
223238

224239
// APIServerPodHealthy, ControllerManagerPodHealthy, SchedulerPodHealthy and EtcdPodHealthy condition and corresponding

controlplane/kubeadm/internal/control_plane.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,12 @@ type ControlPlane struct {
6060

6161
managementCluster ManagementCluster
6262
workloadCluster WorkloadCluster
63+
64+
// deletingReason is the reason that should be used when setting the Deleting condition.
65+
DeletingReason string
66+
67+
// deletingMessage is the message that should be used when setting the Deleting condition.
68+
DeletingMessage string
6369
}
6470

6571
// PreflightCheckResults contains description about pre flight check results blocking machines creation or deletion.

controlplane/kubeadm/internal/controllers/controller.go

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package controllers
1919
import (
2020
"context"
2121
"fmt"
22+
"sort"
2223
"strings"
2324
"time"
2425

@@ -52,6 +53,7 @@ import (
5253
"sigs.k8s.io/cluster-api/util/conditions"
5354
v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2"
5455
"sigs.k8s.io/cluster-api/util/finalizers"
56+
clog "sigs.k8s.io/cluster-api/util/log"
5557
"sigs.k8s.io/cluster-api/util/patch"
5658
"sigs.k8s.io/cluster-api/util/paused"
5759
"sigs.k8s.io/cluster-api/util/predicates"
@@ -220,7 +222,7 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.
220222
}
221223
}
222224

223-
r.updateV1beta2Status(ctx, controlPlane)
225+
r.updateV1Beta2Status(ctx, controlPlane)
224226

225227
// Always attempt to Patch the KubeadmControlPlane object and status after each reconciliation.
226228
patchOpts := []patch.Option{}
@@ -596,6 +598,9 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
596598

597599
// If no control plane machines remain, remove the finalizer
598600
if len(controlPlane.Machines) == 0 {
601+
controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason
602+
controlPlane.DeletingMessage = ""
603+
599604
controllerutil.RemoveFinalizer(controlPlane.KCP, controlplanev1.KubeadmControlPlaneFinalizer)
600605
return ctrl.Result{}, nil
601606
}
@@ -615,6 +620,8 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
615620
// Gets all machines, not just control plane machines.
616621
allMachines, err := r.managementCluster.GetMachinesForCluster(ctx, controlPlane.Cluster)
617622
if err != nil {
623+
controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason
624+
controlPlane.DeletingMessage = "Please check controller logs for errors" //nolint:goconst // Not making this a constant for now
618625
return ctrl.Result{}, err
619626
}
620627

@@ -623,20 +630,25 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
623630
if feature.Gates.Enabled(feature.MachinePool) {
624631
allMachinePools, err = r.managementCluster.GetMachinePoolsForCluster(ctx, controlPlane.Cluster)
625632
if err != nil {
633+
controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason
634+
controlPlane.DeletingMessage = "Please check controller logs for errors"
626635
return ctrl.Result{}, err
627636
}
628637
}
629638
// Verify that only control plane machines remain
630639
if len(allMachines) != len(controlPlane.Machines) || len(allMachinePools.Items) != 0 {
631640
log.Info("Waiting for worker nodes to be deleted first")
632641
conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "Waiting for worker nodes to be deleted first")
642+
643+
controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason
644+
controlPlane.DeletingMessage = fmt.Sprintf("KCP deletion blocked because %s still exist", objectsPendingDeleteNames(allMachines, allMachinePools, controlPlane.Cluster))
633645
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
634646
}
635647

636648
// Delete control plane machines in parallel
637-
machinesToDelete := controlPlane.Machines
649+
machines := controlPlane.Machines
638650
var errs []error
639-
for _, machineToDelete := range machinesToDelete {
651+
for _, machineToDelete := range machines {
640652
log := log.WithValues("Machine", klog.KObj(machineToDelete))
641653
ctx := ctrl.LoggerInto(ctx, log)
642654

@@ -665,15 +677,61 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
665677
err := kerrors.NewAggregate(errs)
666678
r.recorder.Eventf(controlPlane.KCP, corev1.EventTypeWarning, "FailedDelete",
667679
"Failed to delete control plane Machines for cluster %s control plane: %v", klog.KObj(controlPlane.Cluster), err)
680+
681+
controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason
682+
controlPlane.DeletingMessage = "Please check controller logs for errors"
668683
return ctrl.Result{}, err
669684
}
670685

671686
log.Info("Waiting for control plane Machines to not exist anymore")
672687

673688
conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "")
689+
690+
message := ""
691+
if len(machines) > 0 {
692+
if len(machines) == 1 {
693+
message = fmt.Sprintf("Deleting %d Machine", len(machines))
694+
} else {
695+
message = fmt.Sprintf("Deleting %d Machines", len(machines))
696+
}
697+
staleMessage := aggregateStaleMachines(machines)
698+
if staleMessage != "" {
699+
message += fmt.Sprintf(" and %s", staleMessage)
700+
}
701+
}
702+
controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingWaitingForMachineDeletionV1Beta2Reason
703+
controlPlane.DeletingMessage = message
674704
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
675705
}
676706

707+
// objectsPendingDeleteNames return the names of worker Machines and MachinePools pending delete.
708+
func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools *expv1.MachinePoolList, cluster *clusterv1.Cluster) string {
709+
controlPlaneMachines := allMachines.Filter(collections.ControlPlaneMachines(cluster.Name))
710+
workerMachines := allMachines.Difference(controlPlaneMachines)
711+
712+
descendants := make([]string, 0)
713+
if feature.Gates.Enabled(feature.MachinePool) {
714+
machinePoolNames := make([]string, len(allMachinePools.Items))
715+
for i, machinePool := range allMachinePools.Items {
716+
machinePoolNames[i] = machinePool.Name
717+
}
718+
if len(machinePoolNames) > 0 {
719+
sort.Strings(machinePoolNames)
720+
descendants = append(descendants, "MachinePools: "+clog.StringListToString(machinePoolNames))
721+
}
722+
}
723+
724+
workerMachineNames := make([]string, len(workerMachines))
725+
for i, workerMachine := range workerMachines.UnsortedList() {
726+
workerMachineNames[i] = workerMachine.Name
727+
}
728+
if len(workerMachineNames) > 0 {
729+
sort.Strings(workerMachineNames)
730+
descendants = append(descendants, "worker Machines: "+clog.StringListToString(workerMachineNames))
731+
}
732+
return strings.Join(descendants, "; ")
733+
}
734+
677735
func (r *KubeadmControlPlaneReconciler) removePreTerminateHookAnnotationFromMachine(ctx context.Context, machine *clusterv1.Machine) error {
678736
if _, exists := machine.Annotations[controlplanev1.PreTerminateHookCleanupAnnotation]; !exists {
679737
// Nothing to do, the annotation is not set (anymore) on the Machine

controlplane/kubeadm/internal/controllers/controller_test.go

Lines changed: 77 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3041,6 +3041,8 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
30413041
g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: deleteRequeueAfter}))
30423042
g.Expect(err).ToNot(HaveOccurred())
30433043
g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
3044+
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForMachineDeletionV1Beta2Reason))
3045+
g.Expect(controlPlane.DeletingMessage).To(Equal("Deleting 3 Machines"))
30443046

30453047
controlPlaneMachines := clusterv1.MachineList{}
30463048
g.Expect(fakeClient.List(ctx, &controlPlaneMachines)).To(Succeed())
@@ -3067,6 +3069,8 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
30673069
g.Expect(result).To(BeComparableTo(ctrl.Result{}))
30683070
g.Expect(err).ToNot(HaveOccurred())
30693071
g.Expect(kcp.Finalizers).To(BeEmpty())
3072+
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason))
3073+
g.Expect(controlPlane.DeletingMessage).To(BeEmpty())
30703074
})
30713075

30723076
t.Run("does not remove any control plane Machines if other Machines exist", func(t *testing.T) {
@@ -3075,18 +3079,20 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
30753079
cluster, kcp, _ := createClusterWithControlPlane(metav1.NamespaceDefault)
30763080
controllerutil.AddFinalizer(kcp, controlplanev1.KubeadmControlPlaneFinalizer)
30773081

3078-
workerMachine := &clusterv1.Machine{
3079-
ObjectMeta: metav1.ObjectMeta{
3080-
Name: "worker",
3081-
Namespace: cluster.Namespace,
3082-
Labels: map[string]string{
3083-
clusterv1.ClusterNameLabel: cluster.Name,
3082+
initObjs := []client.Object{cluster.DeepCopy(), kcp.DeepCopy()}
3083+
3084+
for i := range 10 {
3085+
initObjs = append(initObjs, &clusterv1.Machine{
3086+
ObjectMeta: metav1.ObjectMeta{
3087+
Name: fmt.Sprintf("worker-%d", i),
3088+
Namespace: cluster.Namespace,
3089+
Labels: map[string]string{
3090+
clusterv1.ClusterNameLabel: cluster.Name,
3091+
},
30843092
},
3085-
},
3093+
})
30863094
}
30873095

3088-
initObjs := []client.Object{cluster.DeepCopy(), kcp.DeepCopy(), workerMachine.DeepCopy()}
3089-
30903096
machines := collections.New()
30913097
for i := range 3 {
30923098
m, _ := createMachineNodePair(fmt.Sprintf("test-%d", i), cluster, kcp, true)
@@ -3115,8 +3121,9 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
31153121
result, err := r.reconcileDelete(ctx, controlPlane)
31163122
g.Expect(result).To(BeComparableTo(ctrl.Result{RequeueAfter: deleteRequeueAfter}))
31173123
g.Expect(err).ToNot(HaveOccurred())
3118-
31193124
g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
3125+
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason))
3126+
g.Expect(controlPlane.DeletingMessage).To(Equal("KCP deletion blocked because worker Machines: worker-0, worker-1, worker-2, worker-3, worker-4, ... (5 more) still exist"))
31203127

31213128
controlPlaneMachines := clusterv1.MachineList{}
31223129
labels := map[string]string{
@@ -3133,18 +3140,20 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
31333140
cluster, kcp, _ := createClusterWithControlPlane(metav1.NamespaceDefault)
31343141
controllerutil.AddFinalizer(kcp, controlplanev1.KubeadmControlPlaneFinalizer)
31353142

3136-
workerMachinePool := &expv1.MachinePool{
3137-
ObjectMeta: metav1.ObjectMeta{
3138-
Name: "worker",
3139-
Namespace: cluster.Namespace,
3140-
Labels: map[string]string{
3141-
clusterv1.ClusterNameLabel: cluster.Name,
3143+
initObjs := []client.Object{cluster.DeepCopy(), kcp.DeepCopy()}
3144+
3145+
for i := range 10 {
3146+
initObjs = append(initObjs, &expv1.MachinePool{
3147+
ObjectMeta: metav1.ObjectMeta{
3148+
Name: fmt.Sprintf("mp-%d", i),
3149+
Namespace: cluster.Namespace,
3150+
Labels: map[string]string{
3151+
clusterv1.ClusterNameLabel: cluster.Name,
3152+
},
31423153
},
3143-
},
3154+
})
31443155
}
31453156

3146-
initObjs := []client.Object{cluster.DeepCopy(), kcp.DeepCopy(), workerMachinePool.DeepCopy()}
3147-
31483157
machines := collections.New()
31493158
for i := range 3 {
31503159
m, _ := createMachineNodePair(fmt.Sprintf("test-%d", i), cluster, kcp, true)
@@ -3173,8 +3182,9 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
31733182
result, err := r.reconcileDelete(ctx, controlPlane)
31743183
g.Expect(result).To(BeComparableTo(ctrl.Result{RequeueAfter: deleteRequeueAfter}))
31753184
g.Expect(err).ToNot(HaveOccurred())
3176-
31773185
g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
3186+
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason))
3187+
g.Expect(controlPlane.DeletingMessage).To(Equal("KCP deletion blocked because MachinePools: mp-0, mp-1, mp-2, mp-3, mp-4, ... (5 more) still exist"))
31783188

31793189
controlPlaneMachines := clusterv1.MachineList{}
31803190
labels := map[string]string{
@@ -3211,9 +3221,56 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
32113221
g.Expect(result).To(BeComparableTo(ctrl.Result{}))
32123222
g.Expect(err).ToNot(HaveOccurred())
32133223
g.Expect(kcp.Finalizers).To(BeEmpty())
3224+
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason))
3225+
g.Expect(controlPlane.DeletingMessage).To(BeEmpty())
32143226
})
32153227
}
32163228

3229+
func TestObjectsPendingDelete(t *testing.T) {
3230+
c := &clusterv1.Cluster{
3231+
ObjectMeta: metav1.ObjectMeta{
3232+
Name: "test-cluster",
3233+
},
3234+
}
3235+
3236+
cpMachineLabels := map[string]string{
3237+
clusterv1.ClusterNameLabel: c.Name,
3238+
clusterv1.MachineControlPlaneLabel: "",
3239+
}
3240+
workerMachineLabels := map[string]string{
3241+
clusterv1.ClusterNameLabel: c.Name,
3242+
}
3243+
3244+
allMachines := collections.FromMachineList(&clusterv1.MachineList{
3245+
Items: []clusterv1.Machine{
3246+
*machine("cp1", withLabels(cpMachineLabels)),
3247+
*machine("cp2", withLabels(cpMachineLabels)),
3248+
*machine("cp3", withLabels(cpMachineLabels)),
3249+
*machine("w1", withLabels(workerMachineLabels)),
3250+
*machine("w2", withLabels(workerMachineLabels)),
3251+
*machine("w3", withLabels(workerMachineLabels)),
3252+
*machine("w4", withLabels(workerMachineLabels)),
3253+
*machine("w5", withLabels(workerMachineLabels)),
3254+
*machine("w6", withLabels(workerMachineLabels)),
3255+
*machine("w7", withLabels(workerMachineLabels)),
3256+
*machine("w8", withLabels(workerMachineLabels)),
3257+
},
3258+
})
3259+
machinePools := &expv1.MachinePoolList{
3260+
Items: []expv1.MachinePool{
3261+
{
3262+
ObjectMeta: metav1.ObjectMeta{
3263+
Name: "mp1",
3264+
},
3265+
},
3266+
},
3267+
}
3268+
3269+
g := NewWithT(t)
3270+
3271+
g.Expect(objectsPendingDeleteNames(allMachines, machinePools, c)).To(Equal("MachinePools: mp1; worker Machines: w1, w2, w3, w4, w5, ... (3 more)"))
3272+
}
3273+
32173274
// test utils.
32183275

32193276
func newFakeClient(initObjs ...client.Object) client.Client {

controlplane/kubeadm/internal/controllers/scale_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,12 @@ func withAnnotation(annotation string) machineOpt {
758758
}
759759
}
760760

761+
func withLabels(labels map[string]string) machineOpt {
762+
return func(m *clusterv1.Machine) {
763+
m.ObjectMeta.Labels = labels
764+
}
765+
}
766+
761767
func withTimestamp(t time.Time) machineOpt {
762768
return func(m *clusterv1.Machine) {
763769
m.CreationTimestamp = metav1.NewTime(t)

0 commit comments

Comments
 (0)