Skip to content

Commit 6d32337

Browse files
authored
Merge pull request #11404 from fabriziopandini/refine-v1beta2-condition-messages
🌱 Refine v1beta2 condition messages
2 parents 657b5f0 + 2373832 commit 6d32337

25 files changed

+772
-457
lines changed

controlplane/kubeadm/internal/controllers/controller.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,11 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
639639
conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "Waiting for worker nodes to be deleted first")
640640

641641
controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason
642-
controlPlane.DeletingMessage = fmt.Sprintf("KCP deletion blocked because %s still exist", objectsPendingDeleteNames(allMachines, allMachinePools, controlPlane.Cluster))
642+
names := objectsPendingDeleteNames(allMachines, allMachinePools, controlPlane.Cluster)
643+
for i := range names {
644+
names[i] = "* " + names[i]
645+
}
646+
controlPlane.DeletingMessage = fmt.Sprintf("KubeadmControlPlane deletion blocked because following objects still exist:\n%s", strings.Join(names, "\n"))
643647
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
644648
}
645649

@@ -703,7 +707,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
703707
}
704708

705709
// objectsPendingDeleteNames return the names of worker Machines and MachinePools pending delete.
706-
func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools *expv1.MachinePoolList, cluster *clusterv1.Cluster) string {
710+
func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools *expv1.MachinePoolList, cluster *clusterv1.Cluster) []string {
707711
controlPlaneMachines := allMachines.Filter(collections.ControlPlaneMachines(cluster.Name))
708712
workerMachines := allMachines.Difference(controlPlaneMachines)
709713

@@ -725,9 +729,9 @@ func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools
725729
}
726730
if len(workerMachineNames) > 0 {
727731
sort.Strings(workerMachineNames)
728-
descendants = append(descendants, "worker Machines: "+clog.StringListToString(workerMachineNames))
732+
descendants = append(descendants, "Machines: "+clog.StringListToString(workerMachineNames))
729733
}
730-
return strings.Join(descendants, "; ")
734+
return descendants
731735
}
732736

733737
func (r *KubeadmControlPlaneReconciler) removePreTerminateHookAnnotationFromMachine(ctx context.Context, machine *clusterv1.Machine) error {

controlplane/kubeadm/internal/controllers/controller_test.go

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2008,16 +2008,21 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneAndMachinesCondition
20082008
Reason: controlplanev1.KubeadmControlPlaneInitializedV1Beta2Reason,
20092009
},
20102010
{
2011-
Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition,
2012-
Status: metav1.ConditionUnknown,
2013-
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason,
2014-
Message: "Following Machines are reporting etcd member unknown: machine1-test",
2011+
Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition,
2012+
Status: metav1.ConditionUnknown,
2013+
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason,
2014+
Message: "* Machine machine1-test:\n" +
2015+
" * EtcdMemberHealthy: Node does not exist",
20152016
},
20162017
{
2017-
Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition,
2018-
Status: metav1.ConditionUnknown,
2019-
Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason,
2020-
Message: "Following Machines are reporting control plane unknown: machine1-test",
2018+
Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition,
2019+
Status: metav1.ConditionUnknown,
2020+
Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason,
2021+
Message: "* Machine machine1-test:\n" +
2022+
" * APIServerPodHealthy: Node does not exist\n" +
2023+
" * ControllerManagerPodHealthy: Node does not exist\n" +
2024+
" * SchedulerPodHealthy: Node does not exist\n" +
2025+
" * EtcdPodHealthy: Node does not exist",
20212026
},
20222027
},
20232028
expectMachineConditions: []metav1.Condition{
@@ -2083,16 +2088,21 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneAndMachinesCondition
20832088
Reason: controlplanev1.KubeadmControlPlaneInitializedV1Beta2Reason,
20842089
},
20852090
{
2086-
Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition,
2087-
Status: metav1.ConditionUnknown,
2088-
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason,
2089-
Message: "Following Machines are reporting etcd member unknown: machine1-test",
2091+
Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition,
2092+
Status: metav1.ConditionUnknown,
2093+
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason,
2094+
Message: "* Machine machine1-test:\n" +
2095+
" * EtcdMemberHealthy: Node does not exist",
20902096
},
20912097
{
2092-
Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition,
2093-
Status: metav1.ConditionUnknown,
2094-
Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason,
2095-
Message: "Following Machines are reporting control plane unknown: machine1-test",
2098+
Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition,
2099+
Status: metav1.ConditionUnknown,
2100+
Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason,
2101+
Message: "* Machine machine1-test:\n" +
2102+
" * APIServerPodHealthy: Node does not exist\n" +
2103+
" * ControllerManagerPodHealthy: Node does not exist\n" +
2104+
" * SchedulerPodHealthy: Node does not exist\n" +
2105+
" * EtcdPodHealthy: Node does not exist",
20962106
},
20972107
},
20982108
expectMachineConditions: []metav1.Condition{
@@ -3344,7 +3354,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
33443354
g.Expect(err).ToNot(HaveOccurred())
33453355
g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
33463356
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason))
3347-
g.Expect(controlPlane.DeletingMessage).To(Equal("KCP deletion blocked because worker Machines: worker-0, worker-1, worker-2, worker-3, worker-4, ... (5 more) still exist"))
3357+
g.Expect(controlPlane.DeletingMessage).To(Equal("KubeadmControlPlane deletion blocked because following objects still exist:\n* Machines: worker-0, worker-1, worker-2, worker-3, worker-4, ... (5 more)"))
33483358

33493359
controlPlaneMachines := clusterv1.MachineList{}
33503360
labels := map[string]string{
@@ -3405,7 +3415,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
34053415
g.Expect(err).ToNot(HaveOccurred())
34063416
g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
34073417
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason))
3408-
g.Expect(controlPlane.DeletingMessage).To(Equal("KCP deletion blocked because MachinePools: mp-0, mp-1, mp-2, mp-3, mp-4, ... (5 more) still exist"))
3418+
g.Expect(controlPlane.DeletingMessage).To(Equal("KubeadmControlPlane deletion blocked because following objects still exist:\n* MachinePools: mp-0, mp-1, mp-2, mp-3, mp-4, ... (5 more)"))
34093419

34103420
controlPlaneMachines := clusterv1.MachineList{}
34113421
labels := map[string]string{
@@ -3489,7 +3499,7 @@ func TestObjectsPendingDelete(t *testing.T) {
34893499

34903500
g := NewWithT(t)
34913501

3492-
g.Expect(objectsPendingDeleteNames(allMachines, machinePools, c)).To(Equal("MachinePools: mp1; worker Machines: w1, w2, w3, w4, w5, ... (3 more)"))
3502+
g.Expect(objectsPendingDeleteNames(allMachines, machinePools, c)).To(Equal([]string{"MachinePools: mp1", "Machines: w1, w2, w3, w4, w5, ... (3 more)"}))
34933503
}
34943504

34953505
// test utils.

controlplane/kubeadm/internal/controllers/status_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -482,13 +482,13 @@ func Test_setMachinesReadyAndMachinesUpToDateConditions(t *testing.T) {
482482
Type: controlplanev1.KubeadmControlPlaneMachinesReadyV1Beta2Condition,
483483
Status: metav1.ConditionFalse,
484484
Reason: "SomeReason", // There is only one machine reporting issues, using the reason from that machine.
485-
Message: "NotReady from Machine m3",
485+
Message: "* Machine m3: NotReady",
486486
},
487487
expectMachinesUpToDateCondition: metav1.Condition{
488488
Type: controlplanev1.KubeadmControlPlaneMachinesUpToDateV1Beta2Condition,
489489
Status: metav1.ConditionFalse,
490490
Reason: v1beta2conditions.MultipleIssuesReportedReason, // There are many machines reporting issues, using a generic reason.
491-
Message: "NotUpToDate from Machines m2, m3",
491+
Message: "* Machines m2, m3: NotUpToDate",
492492
},
493493
},
494494
}
@@ -550,7 +550,7 @@ func Test_setRemediatingCondition(t *testing.T) {
550550
Type: controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Condition,
551551
Status: metav1.ConditionTrue,
552552
Reason: controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Reason,
553-
Message: "Machine deletionTimestamp set from Machine m3",
553+
Message: "* Machine m3: Machine deletionTimestamp set",
554554
},
555555
},
556556
{

controlplane/kubeadm/internal/workload_cluster_conditions.go

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package internal
1919
import (
2020
"context"
2121
"fmt"
22+
"sort"
2223
"strings"
2324
"time"
2425

@@ -38,6 +39,7 @@ import (
3839
"sigs.k8s.io/cluster-api/util/collections"
3940
"sigs.k8s.io/cluster-api/util/conditions"
4041
v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2"
42+
clog "sigs.k8s.io/cluster-api/util/log"
4143
)
4244

4345
// UpdateEtcdConditions is responsible for updating machine conditions reflecting the status of all the etcd members.
@@ -934,33 +936,81 @@ func aggregateV1Beta2ConditionsFromMachinesToKCP(input aggregateV1Beta2Condition
934936
kcpMachinesWithUnknown := sets.Set[string]{}
935937
kcpMachinesWithInfo := sets.Set[string]{}
936938

939+
messageMap := map[string][]string{}
937940
for i := range input.controlPlane.Machines {
938941
machine := input.controlPlane.Machines[i]
942+
machineMessages := []string{}
939943
for _, condition := range input.machineConditions {
940944
if machineCondition := v1beta2conditions.Get(machine, condition); machineCondition != nil {
941945
switch machineCondition.Status {
942946
case metav1.ConditionTrue:
943947
kcpMachinesWithInfo.Insert(machine.Name)
944948
case metav1.ConditionFalse:
945949
kcpMachinesWithErrors.Insert(machine.Name)
950+
m := machineCondition.Message
951+
if m == "" {
952+
m = fmt.Sprintf("condition is %s", machineCondition.Status)
953+
}
954+
machineMessages = append(machineMessages, fmt.Sprintf(" * %s: %s", machineCondition.Type, m))
946955
case metav1.ConditionUnknown:
947956
kcpMachinesWithUnknown.Insert(machine.Name)
957+
m := machineCondition.Message
958+
if m == "" {
959+
m = fmt.Sprintf("condition is %s", machineCondition.Status)
960+
}
961+
machineMessages = append(machineMessages, fmt.Sprintf(" * %s: %s", machineCondition.Type, m))
948962
}
949963
}
950964
}
965+
966+
if len(machineMessages) > 0 {
967+
message := strings.Join(machineMessages, "\n")
968+
messageMap[message] = append(messageMap[message], machine.Name)
969+
}
970+
}
971+
972+
// compute the order of messages according to the number of machines reporting the same message.
973+
// Note: The list of object names is used as a secondary criteria to sort messages with the same number of objects.
974+
messageIndex := make([]string, 0, len(messageMap))
975+
for m := range messageMap {
976+
messageIndex = append(messageIndex, m)
977+
}
978+
979+
sort.SliceStable(messageIndex, func(i, j int) bool {
980+
return len(messageMap[messageIndex[i]]) > len(messageMap[messageIndex[j]]) ||
981+
(len(messageMap[messageIndex[i]]) == len(messageMap[messageIndex[j]]) && strings.Join(messageMap[messageIndex[i]], ",") < strings.Join(messageMap[messageIndex[j]], ","))
982+
})
983+
984+
// Build the message
985+
messages := []string{}
986+
for _, message := range messageIndex {
987+
machines := messageMap[message]
988+
machinesMessage := "Machine"
989+
if len(messageMap[message]) > 1 {
990+
machinesMessage += "s"
991+
}
992+
993+
sort.Strings(machines)
994+
machinesMessage += " " + clog.ListToString(machines, func(s string) string { return s }, 3)
995+
996+
messages = append(messages, fmt.Sprintf("* %s:\n%s", machinesMessage, message))
951997
}
952998

999+
// Append messages impacting KCP as a whole, if any
1000+
if len(input.kcpErrors) > 0 {
1001+
for _, message := range input.kcpErrors {
1002+
messages = append(messages, fmt.Sprintf("* %s", message))
1003+
}
1004+
}
1005+
message := strings.Join(messages, "\n")
1006+
9531007
// In case of at least one machine with errors or KCP level errors (nodes without machines), report false.
9541008
if len(input.kcpErrors) > 0 || len(kcpMachinesWithErrors) > 0 {
955-
messages := input.kcpErrors
956-
if len(kcpMachinesWithErrors) > 0 {
957-
messages = append(messages, fmt.Sprintf("Following Machines are reporting %s errors: %s", input.note, strings.Join(sets.List(kcpMachinesWithErrors), ", ")))
958-
}
9591009
v1beta2conditions.Set(input.controlPlane.KCP, metav1.Condition{
9601010
Type: input.condition,
9611011
Status: metav1.ConditionFalse,
9621012
Reason: input.falseReason,
963-
Message: strings.Join(messages, ", "),
1013+
Message: message,
9641014
})
9651015
return
9661016
}
@@ -971,7 +1021,7 @@ func aggregateV1Beta2ConditionsFromMachinesToKCP(input aggregateV1Beta2Condition
9711021
Type: input.condition,
9721022
Status: metav1.ConditionUnknown,
9731023
Reason: input.unknownReason,
974-
Message: fmt.Sprintf("Following Machines are reporting %s unknown: %s", input.note, strings.Join(sets.List(kcpMachinesWithUnknown), ", ")),
1024+
Message: message,
9751025
})
9761026
return
9771027
}

0 commit comments

Comments
 (0)